[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.41, Wed Mar 29 20:51:37 2006 UTC revision 1.42, Wed Apr 19 03:34:15 2006 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use Digest::MD5 qw(md5_base64);
13      use FIG;      use FIG;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
# Line 125  Line 126 
126    
127  variable-length string, maximum 255 characters  variable-length string, maximum 255 characters
128    
129    =item hash-string
130    
131    variable-length string, maximum 22 characters
132    
133  =back  =back
134    
135    The hash-string data type has a special meaning. The actual key passed into the loader will
136    be a string, but it will be digested into a 22-character MD5 code to save space. Although the
137    MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same
138    digest. Therefore, it is presumed the keys will be unique. When the database is actually
139    in use, the hashed keys will be presented rather than the original values. For this reason,
140    they should not be used for entities where the key is meaningful.
141    
142  =head3 Global Tags  =head3 Global Tags
143    
144  The entire database definition must be inside a B<Database> tag. The display name of  The entire database definition must be inside a B<Database> tag. The display name of
# Line 310  Line 322 
322                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
323                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
324                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },
325                     'hash-string' =>
326                                 { sqlType => 'VARCHAR(22)',        maxLen => 22,           avgLen =>  22, dataGen => "SringGen(22)" },
327                   'key-string' =>                   'key-string' =>
328                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
329                   'name-string' =>                   'name-string' =>
# Line 687  Line 701 
701      return $retVal;      return $retVal;
702  }  }
703    
704    =head3 DigestFields
705    
706    C<< $erdb->DigestFields($relName, $fieldList); >>
707    
708    Digest the strings in the field list that correspond to data type C<hash-string> in the
709    specified relation.
710    
711    =over 4
712    
713    =item relName
714    
715    Name of the relation to which the fields belong.
716    
717    =item fieldList
718    
719    List of field contents to be loaded into the relation.
720    
721    =back
722    
723    =cut
724    #: Return Type ;
725    sub DigestFields {
726        # Get the parameters.
727        my ($self, $relName, $fieldList) = @_;
728        # Get the relation definition.
729        my $relData = $self->_FindRelation($relName);
730        # Get the list of field descriptors.
731        my $fieldTypes = $relData->{Fields};
732        my $fieldCount = scalar @{$fieldTypes};
733        # Loop through the two lists.
734        for (my $i = 0; $i < $fieldCount; $i++) {
735            # Get the type of the current field.
736            my $fieldType = $fieldTypes->[$i]->{type};
737            # If it's a hash string, digest it in place.
738            if ($fieldType eq 'hash-string') {
739                $fieldList->[$i] = md5_base64($fieldList->[$i]);
740            }
741        }
742    }
743    
744  =head3 CreateIndex  =head3 CreateIndex
745    
746  C<< $erdb->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
# Line 2685  Line 2739 
2739      return $metadata;      return $metadata;
2740  }  }
2741    
2742    =head3 SortNeeded
2743    
2744    C<< my $flag = $erdb->SortNeeded($relationName); >>
2745    
2746    Return TRUE if the specified relation should be sorted during loading to remove duplicate keys,
2747    else FALSE.
2748    
2749    =over 4
2750    
2751    =item relationName
2752    
2753    Name of the relation to be examined.
2754    
2755    =item RETURN
2756    
2757    Returns TRUE if the relation needs a sort, else FALSE.
2758    
2759    =back
2760    
2761    =cut
2762    #: Return Type $;
2763    sub SortNeeded {
2764        # Get the parameters.
2765        my ($self, $relationName) = @_;
2766        # Declare the return variable.
2767        my $retVal = 0;
2768        # Find out if the relation is a primary entity relation.
2769        my $entityTable = $self->{Entities};
2770        if (exists $entityTable->{$relationName}) {
2771            my $keyType = $entityTable->{$relationName}->{keyType};
2772            # If the key is not a hash string, we must do the sort.
2773            if ($keyType ne 'hash-string') {
2774                $retVal = 1;
2775            }
2776        }
2777        # Return the result.
2778        return $retVal;
2779    }
2780    
2781  =head3 CreateRelationshipIndex  =head3 CreateRelationshipIndex
2782    
2783  Create an index for a relationship's relation.  Create an index for a relationship's relation.

Legend:
Removed from v.1.41  
changed lines
  Added in v.1.42

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3