[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.87, Sun Feb 18 21:28:19 2007 UTC revision 1.91, Wed May 2 05:51:48 2007 UTC
# Line 6  Line 6 
6      use Data::Dumper;      use Data::Dumper;
7      use XML::Simple;      use XML::Simple;
8      use DBQuery;      use DBQuery;
9      use DBObject;      use ERDBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12      use Digest::MD5 qw(md5_base64);      use Digest::MD5 qw(md5_base64);
     use FIG;  
13      use CGI;      use CGI;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
# Line 373  Line 372 
372                   'medium-string' =>                   'medium-string' =>
373                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, sort => "",                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, sort => "",
374                                 indexMod =>   0, notes => "character string, 0 to 160 characters"},                                 indexMod =>   0, notes => "character string, 0 to 160 characters"},
375                     'long-string' =>
376                                 { sqlType => 'VARCHAR(500)',       maxLen => 500,          avglen => 255, sort => "",
377                                   indexMod =>   0, notes => "character string, 0 to 500 characters"},
378                  );                  );
379    
380  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 655  Line 657 
657      return Data::Dumper::Dumper($self->{_metaData});      return Data::Dumper::Dumper($self->{_metaData});
658  }  }
659    
660    =head3 CreatePPO
661    
662    C<< ERDB::CreatePPO($erdbXMLFile, $ppoXMLFile); >>
663    
664    Create a PPO XML file from an ERDB data definition XML file. At the
665    current time, the PPO XML file can be used to create a database with
666    similar functionality. Eventually, the PPO will be able to use the
667    created XML to access the live ERDB database.
668    
669    =over 4
670    
671    =item erdbXMLFile
672    
673    Name of the XML data definition file for the ERDB database. This
674    file must exist.
675    
676    =item ppoXMLFile
677    
678    Output file for the PPO XML definition. If this file exists, it
679    will be overwritten.
680    
681    =back
682    
683    =cut
684    
685    sub CreatePPO {
686        # Get the parameters.
687        my ($erdbXMLFile, $ppoXMLFile) = @_;
688        # First, we want to slurp in the ERDB XML file in its raw form.
689        my $xml = ReadMetaXML($erdbXMLFile);
690        # Create a variable to hold all of the objects in the PPO project.
691        my @objects = ();
692        # Get the relationship hash.
693        my $relationships = $xml->{Relationships};
694        # Loop through the entities.
695        my $entities = $xml->{Entities};
696        for my $entityName (keys %{$entities}) {
697            # Get the entity's data structures.
698            my $entityObject = $entities->{$entityName};
699            # We put the object's fields in here, according to their type.
700            my (@object_refs, @scalars, @indexes, @arrays);
701            # Create the ID field for the entity. We get the key type from the
702            # entity object and compute the corresponding SQL type.
703            my $type = $TypeTable{$entityObject->{keyType}}->{sqlType};
704            push @scalars, { label => 'id', type => $type };
705            # Loop through the entity fields.
706            for my $fieldName ( keys %{$entityObject->{Fields}} ) {
707                # Get the field object.
708                my $fieldObject = $entityObject->{Fields}->{$fieldName};
709                # Convert it to a scalar tag.
710                my $scalar = _CreatePPOField($fieldName, $fieldObject);
711                # If we have a relation, this field is stored in an array.
712                # otherwise, it is a scalar. The array tag has scalars
713                # stored as an XML array. In ERDB, there is only ever one,
714                # but PPO can have more.
715                my $relation = $fieldObject->{relation};
716                if ($relation) {
717                    push @arrays, { scalar => [$scalar] };
718                } else {
719                    push @scalars, $scalar;
720                }
721            }
722            # Loop through the relationships. If this entity is the to-entity
723            # on a relationship of 1M arity, then it is implemented as a PPO
724            # object reference.
725            for my $relationshipName (keys %{$relationships}) {
726                # Get the relationship data.
727                my $relationshipData = $relationships->{$relationshipName};
728                # If we have a from for this entity and an arity of 1M, we
729                # have an object reference.
730                if ($relationshipData->{to} eq $entityName &&
731                    $relationshipData->{arity} eq '1M') {
732                    # Build the object reference tag.
733                    push @object_refs, { label => $relationshipName,
734                                         type => $relationshipData->{from} };
735                }
736            }
737            # Create the indexes.
738            my $indexList = $entityObject->{Indexes};
739            push @indexes, map { _CreatePPOIndex($_) } @{$indexList};
740            # Build the object XML tree.
741            my $object = { label => $entityName,
742                           object_ref => \@object_refs,
743                           scalar => \@scalars,
744                           index => \@indexes,
745                           array => \@arrays
746                          };
747            # Push the object onto the objects list.
748            push @objects, $object;
749        }
750        # Loop through the relationships, searching for MMs. The 1Ms were
751        # already handled by the entity search above.
752        for my $relationshipName (keys %{$relationships}) {
753            # Get this relationship's object.
754            my $relationshipObject = $relationships->{$relationshipName};
755            # Only proceed if it's many-to-many.
756            if ($relationshipObject->{arity} eq 'MM') {
757                # Create the tag lists for the relationship object.
758                my (@object_refs, @scalars, @indexes);
759                # The relationship will be created as an object with object
760                # references for its links to the participating entities.
761                my %links = ( from_link => $relationshipObject->{from},
762                              to_link => $relationshipObject->{to} );
763                for my $link (keys %links) {
764                    # Create an object_ref tag for this piece of the
765                    # relationship (from or to).
766                    my $object_ref = { label => $link,
767                                       type => $links{$link} };
768                    push @object_refs, $object_ref;
769                }
770                # Loop through the intersection data fields, creating scalar tags.
771                # There are no fancy array tags in a relationship.
772                for my $fieldName (keys %{$relationshipObject->{Fields}}) {
773                    my $fieldObject = $relationshipObject->{Fields}->{$fieldName};
774                    push @scalars, _CreatePPOField($fieldName, $fieldObject);
775                }
776                # Finally, the indexes: currently we cannot support the to-index and
777                # from-index in PPO, so we just process the alternate indexes.
778                my $indexList = $relationshipObject->{Indexes};
779                push @indexes, map { _CreatePPOIndex($_) } @{$indexList};
780                # Wrap up all the stuff about this relationship.
781                my $object = { label => $relationshipName,
782                               scalar => \@scalars,
783                               object_ref => \@object_refs,
784                               index => \@indexes
785                             };
786                # Push it into the object list.
787                push @objects, $object;
788            }
789        }
790        # Compute a title.
791        my $title;
792        if ($erdbXMLFile =~ /(\/|^)([^\/]+)DBD\.xml/) {
793            # Here we have a standard file name we can use for a title.
794            $title = $2;
795        } else {
796            # Here the file name is non-standard, so we carve up the
797            # database title.
798            $title = $xml->{Title}->{content};
799            $title =~ s/\s\.,//g;
800        }
801        # Wrap up the XML as a project.
802        my $ppoXML = { project => { label => $title,
803                                    object => \@objects }};
804        # Write out the results.
805        my $ppoString = XML::Simple::XMLout($ppoXML,
806                                            AttrIndent => 1,
807                                            KeepRoot => 1);
808        Tracer::PutFile($ppoXMLFile, [ $ppoString ]);
809    }
810    
811  =head3 FindIndexForEntity  =head3 FindIndexForEntity
812    
813  C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >>  C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >>
# Line 1913  Line 2066 
2066      $dbh->SQL($command, undef, @parms);      $dbh->SQL($command, undef, @parms);
2067  }  }
2068    
2069    =head3 DeleteLike
2070    
2071    C<< my $deleteCount = $erdb->DeleteLike($relName, $filter, \@parms); >>
2072    
2073    Delete all the relationship rows that satisfy a particular filter condition. Unlike a normal
2074    filter, only fields from the relationship itself can be used.
2075    
2076    =over 4
2077    
2078    =item relName
2079    
2080    Name of the relationship whose records are to be deleted.
2081    
2082    =item filter
2083    
2084    A filter clause (L</Get>-style) for the delete query.
2085    
2086    =item parms
2087    
2088    Reference to a list of parameters for the filter clause.
2089    
2090    =item RETURN
2091    
2092    Returns a count of the number of rows deleted.
2093    
2094    =back
2095    
2096    =cut
2097    
2098    sub DeleteLike {
2099        # Get the parameters.
2100        my ($self, $objectName, $filter, $parms) = @_;
2101        # Declare the return variable.
2102        my $retVal;
2103        # Insure the parms argument is an array reference if the caller left it off.
2104        if (! defined($parms)) {
2105            $parms = [];
2106        }
2107        # Insure we have a relationship. The main reason for this is if we delete an entity
2108        # instance we have to yank out a bunch of other stuff with it.
2109        if ($self->IsEntity($objectName)) {
2110            Confess("Cannot use DeleteLike on $objectName, because it is not a relationship.");
2111        } else {
2112            # Create the SQL command suffix to get the desierd records.
2113            my ($suffix) = $self->_SetupSQL([$objectName], $filter);
2114            # Convert it to a DELETE command.
2115            my $command = "DELETE $suffix";
2116            # Execute the command.
2117            my $dbh = $self->{_dbh};
2118            my $result = $dbh->SQL($command, 0, @{$parms});
2119            # Check the results. Note we convert the "0D0" result to a real zero.
2120            # A failure causes an abnormal termination, so the caller isn't going to
2121            # worry about it.
2122            if (! defined $result) {
2123                Confess("Error deleting from $objectName: " . $dbh->errstr());
2124            } elsif ($result == 0) {
2125                $retVal = 0;
2126            } else {
2127                $retVal = $result;
2128            }
2129        }
2130        # Return the result count.
2131        return $retVal;
2132    }
2133    
2134  =head3 SortNeeded  =head3 SortNeeded
2135    
2136  C<< my $parms = $erdb->SortNeeded($relationName); >>  C<< my $parms = $erdb->SortNeeded($relationName); >>
# Line 2053  Line 2271 
2271    
2272  =item RETURN  =item RETURN
2273    
2274  Returns a list of B<DBObject>s that satisfy the query conditions.  Returns a list of B<ERDBObject>s that satisfy the query conditions.
2275    
2276  =back  =back
2277    
# Line 2551  Line 2769 
2769          # leave extra room. We postulate a minimum row count of 1000 to          # leave extra room. We postulate a minimum row count of 1000 to
2770          # prevent problems with incoming empty load files.          # prevent problems with incoming empty load files.
2771          my $rowSize = $self->EstimateRowSize($relationName);          my $rowSize = $self->EstimateRowSize($relationName);
2772          my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);          my $estimate = $fileSize * 1.5 / $rowSize;
2773            if ($estimate < 1000) {
2774                $estimate = 1000;
2775            }
2776          # Re-create the table without its index.          # Re-create the table without its index.
2777          $self->CreateTable($relationName, 0, $estimate);          $self->CreateTable($relationName, 0, $estimate);
2778          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
# Line 2773  Line 2994 
2994    
2995  =item RETURN  =item RETURN
2996    
2997  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a B<ERDBObject> representing the desired entity instance, or an undefined value if no
2998  instance is found with the specified key.  instance is found with the specified key.
2999    
3000  =back  =back
# Line 3376  Line 3597 
3597      $self->{_dbh}->roll_tran();      $self->{_dbh}->roll_tran();
3598  }  }
3599    
3600    =head3 UpdateField
3601    
3602    C<< my $count = $erdb->UpdateField($objectNames, $fieldName, $oldValue, $newValue, $filter, $parms); >>
3603    
3604    Update all occurrences of a specific field value to a new value. The number of rows changed will be
3605    returned.
3606    
3607    =over 4
3608    
3609    =item fieldName
3610    
3611    Name of the field in standard I<objectName>C<(>I<fieldName>C<)> format.
3612    
3613    =item oldValue
3614    
3615    Value to be modified. All occurrences of this value in the named field will be replaced by the
3616    new value.
3617    
3618    =item newValue
3619    
3620    New value to be substituted for the old value when it's found.
3621    
3622    =item filter
3623    
3624    A standard ERDB filter clause (see L</Get>). The filter will be applied before any substitutions take place.
3625    
3626    =item parms
3627    
3628    Reference to a list of parameter values in the filter.
3629    
3630    =item RETURN
3631    
3632    Returns the number of rows modified.
3633    
3634    =back
3635    
3636    =cut
3637    
3638    sub UpdateField {
3639        # Get the parameters.
3640        my ($self, $fieldName, $oldValue, $newValue, $filter, $parms) = @_;
3641        # Get the object and field names from the field name parameter.
3642        $fieldName =~ /^([^(]+)\(([^)]+)\)/;
3643        my $objectName = $1;
3644        my $realFieldName = _FixName($2);
3645        # Add the old value to the filter. Note we allow the possibility that no
3646        # filter was specified.
3647        my $realFilter = "$fieldName = ?";
3648        if ($filter) {
3649            $realFilter .= " AND $filter";
3650        }
3651        # Format the query filter.
3652        my ($suffix, $mappedNameListRef, $mappedNameHashRef) =
3653            $self->_SetupSQL([$objectName], $realFilter);
3654        # Create the query. Since there is only one object name, the mapped-name data is not
3655        # necessary. Neither is the FROM clause.
3656        $suffix =~ s/^FROM.+WHERE\s+//;
3657        # Create the update statement.
3658        my $command = "UPDATE $objectName SET $realFieldName = ? WHERE $suffix";
3659        # Get the database handle.
3660        my $dbh = $self->{_dbh};
3661        # Add the old and new values to the parameter list. Note we allow the possibility that
3662        # there are no user-supplied parameters.
3663        my @params = ($newValue, $oldValue);
3664        if (defined $parms) {
3665            push @params, @{$parms};
3666        }
3667        # Execute the update.
3668        my $retVal = $dbh->SQL($command, 0, @params);
3669        # Make the funky zero a real zero.
3670        if ($retVal == 0) {
3671            $retVal = 0;
3672        }
3673        # Return the result.
3674        return $retVal;
3675    }
3676    
3677    
3678  =head2 Data Mining Methods  =head2 Data Mining Methods
3679    
# Line 3532  Line 3830 
3830    
3831  =head2 Virtual Methods  =head2 Virtual Methods
3832    
3833    =head3 _CreatePPOIndex
3834    
3835    C<< my $index = ERDB::_CreatePPOIndex($indexObject); >>
3836    
3837    Convert the XML for an ERDB index to the XML structure for a PPO
3838    index.
3839    
3840    =over 4
3841    
3842    ERDB XML structure for an index.
3843    
3844    =item RETURN
3845    
3846    PPO XML structure for the same index.
3847    
3848    =back
3849    
3850    =cut
3851    
3852    sub _CreatePPOIndex {
3853        # Get the parameters.
3854        my ($indexObject) = @_;
3855        # The incoming index contains a list of the index fields in the IndexFields
3856        # member. We loop through it to create the index tags.
3857        my @fields = map { { label => _FixName($_->{name}) } } @{$indexObject->{IndexFields}};
3858        # Wrap the fields in attribute tags.
3859        my $retVal = { attribute => \@fields };
3860        # Return the result.
3861        return $retVal;
3862    }
3863    
3864    =head3 _CreatePPOField
3865    
3866    C<< my $fieldXML = ERDB::_CreatePPOField($fieldName, $fieldObject); >>
3867    
3868    Convert the ERDB XML structure for a field to a PPO scalar XML structure.
3869    
3870    =over 4
3871    
3872    =item fieldName
3873    
3874    Name of the scalar field.
3875    
3876    =item fieldObject
3877    
3878    ERDB XML structure describing the field.
3879    
3880    =item RETURN
3881    
3882    Returns a PPO XML structure for the same field.
3883    
3884    =back
3885    
3886    =cut
3887    
3888    sub _CreatePPOField {
3889        # Get the parameters.
3890        my ($fieldName, $fieldObject) = @_;
3891        # Get the field type.
3892        my $type = $TypeTable{$fieldObject->{type}}->{sqlType};
3893        # Fix up the field name.
3894        $fieldName = _FixName($fieldName);
3895        # Build the scalar tag.
3896        my $retVal = { label => $fieldName, type => $type };
3897        # Return the result.
3898        return $retVal;
3899    }
3900    
3901  =head3 CleanKeywords  =head3 CleanKeywords
3902    
3903  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>
# Line 3583  Line 3949 
3949    
3950  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>
3951    
3952  Create the relation map for an SQL query. The relation map is used by B<DBObject>  Create the relation map for an SQL query. The relation map is used by B<ERDBObject>
3953  to determine how to interpret the results of the query.  to determine how to interpret the results of the query.
3954    
3955  =over 4  =over 4
# Line 3600  Line 3966 
3966  =item RETURN  =item RETURN
3967    
3968  Returns a list of 2-tuples. Each tuple consists of an object name as used in the  Returns a list of 2-tuples. Each tuple consists of an object name as used in the
3969  query followed by the actual name of that object. This enables the B<DBObject> to  query followed by the actual name of that object. This enables the B<ERDBObject> to
3970  determine the order of the tables in the query and which object name belongs to each  determine the order of the tables in the query and which object name belongs to each
3971  mapped object name. Most of the time these two values are the same; however, if a  mapped object name. Most of the time these two values are the same; however, if a
3972  relation occurs twice in the query, the relation name in the field list and WHERE  relation occurs twice in the query, the relation name in the field list and WHERE

Legend:
Removed from v.1.87  
changed lines
  Added in v.1.91

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3