91 |
|
|
92 |
32-bit signed integer |
32-bit signed integer |
93 |
|
|
94 |
|
=item counter |
95 |
|
|
96 |
|
32-bit unsigned integer |
97 |
|
|
98 |
=item date |
=item date |
99 |
|
|
100 |
64-bit unsigned integer, representing a PERL date/time value |
64-bit unsigned integer, representing a PERL date/time value |
322 |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
323 |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
324 |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
325 |
# record sizes. |
# record sizes. "sort" is the key modifier for the sort command. |
326 |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, dataGen => "StringGen('A')" }, |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, sort => "", dataGen => "StringGen('A')" }, |
327 |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, dataGen => "IntGen(0, 99999999)" }, |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, sort => "n", dataGen => "IntGen(0, 99999999)" }, |
328 |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, dataGen => "StringGen(IntGen(10,250))" }, |
counter => { sqlType => 'INTEGER UNSIGNED', maxLen => 20, avgLen => 4, sort => "n", dataGen => "IntGen(0, 99999999)" }, |
329 |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" }, |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, sort => "", dataGen => "StringGen(IntGen(10,250))" }, |
330 |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, sort => "", dataGen => "StringGen(IntGen(80,1000))" }, |
331 |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, dataGen => "FloatGen(0.0, 100.0)" }, |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, sort => "n", dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
332 |
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, dataGen => "IntGen(0, 1)" }, |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, sort => "g", dataGen => "FloatGen(0.0, 100.0)" }, |
333 |
|
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, sort => "n", dataGen => "IntGen(0, 1)" }, |
334 |
'hash-string' => |
'hash-string' => |
335 |
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, dataGen => "SringGen(22)" }, |
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, sort => "", dataGen => "SringGen(22)" }, |
336 |
'id-string' => |
'id-string' => |
337 |
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, dataGen => "SringGen(22)" }, |
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, sort => "", dataGen => "SringGen(22)" }, |
338 |
'key-string' => |
'key-string' => |
339 |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, dataGen => "StringGen(IntGen(10,40))" }, |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, sort => "", dataGen => "StringGen(IntGen(10,40))" }, |
340 |
'name-string' => |
'name-string' => |
341 |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, dataGen => "StringGen(IntGen(10,80))" }, |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, sort => "", dataGen => "StringGen(IntGen(10,80))" }, |
342 |
'medium-string' => |
'medium-string' => |
343 |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, dataGen => "StringGen(IntGen(10,160))" }, |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, sort => "", dataGen => "StringGen(IntGen(10,160))" }, |
344 |
); |
); |
345 |
|
|
346 |
# Table translating arities into natural language. |
# Table translating arities into natural language. |
1628 |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
1629 |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
1630 |
|
|
1631 |
C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
1632 |
|
|
1633 |
=over 4 |
=over 4 |
1634 |
|
|
1838 |
} |
} |
1839 |
} |
} |
1840 |
# Analyze the table to improve performance. |
# Analyze the table to improve performance. |
1841 |
|
Trace("Analyzing and compacting $relationName.") if T(3); |
1842 |
$dbh->vacuum_it($relationName); |
$dbh->vacuum_it($relationName); |
1843 |
|
# Flush the database cache. |
1844 |
|
$dbh->flush_tables(); |
1845 |
|
Trace("$relationName load completed.") if T(3); |
1846 |
# Return the statistics. |
# Return the statistics. |
1847 |
return $retVal; |
return $retVal; |
1848 |
} |
} |
3242 |
|
|
3243 |
=head3 SortNeeded |
=head3 SortNeeded |
3244 |
|
|
3245 |
C<< my $flag = $erdb->SortNeeded($relationName); >> |
C<< my $parms = $erdb->SortNeeded($relationName); >> |
3246 |
|
|
3247 |
|
Return the pipe command for the sort that should be applied to the specified |
3248 |
|
relation when creating the load file. |
3249 |
|
|
3250 |
|
For example, if the load file should be sorted ascending by the first |
3251 |
|
field with duplicates removed, this method would return |
3252 |
|
|
3253 |
Return TRUE if the specified relation should be sorted during loading to remove duplicate keys, |
sort -k 1 -u -t "\t" |
3254 |
else FALSE. |
|
3255 |
|
If the first field is numeric and duplicates are okay, the method would |
3256 |
|
return |
3257 |
|
|
3258 |
|
sort -k 1n -t "\t" |
3259 |
|
|
3260 |
=over 4 |
=over 4 |
3261 |
|
|
3263 |
|
|
3264 |
Name of the relation to be examined. |
Name of the relation to be examined. |
3265 |
|
|
3266 |
=item RETURN |
=item |
3267 |
|
|
3268 |
Returns TRUE if the relation needs a sort, else FALSE. |
Returns the sort command to use for sorting the relation, suitable for piping. |
3269 |
|
|
3270 |
=back |
=back |
3271 |
|
|
3274 |
sub SortNeeded { |
sub SortNeeded { |
3275 |
# Get the parameters. |
# Get the parameters. |
3276 |
my ($self, $relationName) = @_; |
my ($self, $relationName) = @_; |
3277 |
# Declare the return variable. |
# Declare a descriptor to hold the names of the key fields. |
3278 |
my $retVal = 0; |
my @keyNames = (); |
3279 |
# Find out if the relation is a primary entity relation. |
# Declare a flag for indicating uniqueness. |
3280 |
|
my $unique; |
3281 |
|
# Get the relation structure. |
3282 |
|
my $relationData = $self->_FindRelation($relationName); |
3283 |
|
# Find out if the relation is a primary entity relation, |
3284 |
|
# a relationship relation, or a secondary entity relation. |
3285 |
my $entityTable = $self->{_metaData}->{Entities}; |
my $entityTable = $self->{_metaData}->{Entities}; |
3286 |
|
my $relationshipTable = $self->{_metaData}->{Relationships}; |
3287 |
if (exists $entityTable->{$relationName}) { |
if (exists $entityTable->{$relationName}) { |
3288 |
my $keyType = $entityTable->{$relationName}->{keyType}; |
# Here we have a primary entity relation, so we have a unique sort on the |
3289 |
Trace("Relation $relationName found in entity table with key type $keyType.") if T(3); |
# ID field. |
3290 |
# If the key is not a hash string, we must do the sort. |
$unique = "-u "; |
3291 |
if ($keyType ne 'hash-string') { |
push @keyNames, "id"; |
3292 |
$retVal = 1; |
} elsif (exists $relationshipTable->{$relationName}) { |
3293 |
|
# Here we have a relationship. We sort using the FROM index. |
3294 |
|
$unique = ""; |
3295 |
|
my $relationshipData = $relationshipTable->{$relationName}; |
3296 |
|
my $index = $relationData->{Indexes}->{"idx${relationName}From"}; |
3297 |
|
push @keyNames, @{$index->{IndexFields}}; |
3298 |
|
} else { |
3299 |
|
# Here we have a secondary entity relation, so we have a non-unique sort on |
3300 |
|
# the ID field. |
3301 |
|
$unique = ""; |
3302 |
|
push @keyNames, "id"; |
3303 |
|
} |
3304 |
|
# Now we parse the key names into sort parameters. First, we prime the return |
3305 |
|
# string. |
3306 |
|
my $retVal = "sort -t \\t $unique"; |
3307 |
|
# Get the relation's field list. |
3308 |
|
my @fields = @{$relationData->{Fields}}; |
3309 |
|
# Loop through the keys. |
3310 |
|
for my $keyData (@keyNames) { |
3311 |
|
# Get the key and the ordering. |
3312 |
|
my ($keyName, $ordering); |
3313 |
|
if ($keyData =~ /^([^ ]+) DESC/) { |
3314 |
|
($keyName, $ordering) = ($1, "descending"); |
3315 |
|
} else { |
3316 |
|
($keyName, $ordering) = ($keyData, "ascending"); |
3317 |
|
} |
3318 |
|
# Find the key's position and type. |
3319 |
|
my $fieldSpec; |
3320 |
|
for (my $i = 0; $i <= $#fields && ! $fieldSpec; $i++) { |
3321 |
|
my $thisField = $fields[$i]; |
3322 |
|
if ($thisField->{name} eq $keyName) { |
3323 |
|
# Get the sort modifier for this field type. The modifier |
3324 |
|
# decides whether we're using a character, numeric, or |
3325 |
|
# floating-point sort. |
3326 |
|
my $modifier = $TypeTable{$thisField->{type}}->{sort}; |
3327 |
|
# If the index is descending for this field, denote we want |
3328 |
|
# to reverse the sort order on this field. |
3329 |
|
if ($ordering eq 'descending') { |
3330 |
|
$modifier .= "r"; |
3331 |
|
} |
3332 |
|
# Store the position and modifier into the field spec, which |
3333 |
|
# will stop the inner loop. Note that the field number is |
3334 |
|
# 1-based in the sort command, so we have to increment the |
3335 |
|
# index. |
3336 |
|
$fieldSpec = ($i + 1) . $modifier; |
3337 |
|
} |
3338 |
} |
} |
3339 |
|
# Add this field to the sort command. |
3340 |
|
$retVal .= " -k $fieldSpec"; |
3341 |
} |
} |
3342 |
# Return the result. |
# Return the result. |
3343 |
return $retVal; |
return $retVal; |