[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.87, Sun Feb 18 21:28:19 2007 UTC revision 1.90, Fri Apr 27 22:19:49 2007 UTC
# Line 6  Line 6 
6      use Data::Dumper;      use Data::Dumper;
7      use XML::Simple;      use XML::Simple;
8      use DBQuery;      use DBQuery;
9      use DBObject;      use ERDBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12      use Digest::MD5 qw(md5_base64);      use Digest::MD5 qw(md5_base64);
     use FIG;  
13      use CGI;      use CGI;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
# Line 655  Line 654 
654      return Data::Dumper::Dumper($self->{_metaData});      return Data::Dumper::Dumper($self->{_metaData});
655  }  }
656    
657    =head3 CreatePPO
658    
659    C<< ERDB::CreatePPO($erdbXMLFile, $ppoXMLFile); >>
660    
661    Create a PPO XML file from an ERDB data definition XML file. At the
662    current time, the PPO XML file can be used to create a database with
663    similar functionality. Eventually, the PPO will be able to use the
664    created XML to access the live ERDB database.
665    
666    =over 4
667    
668    =item erdbXMLFile
669    
670    Name of the XML data definition file for the ERDB database. This
671    file must exist.
672    
673    =item ppoXMLFile
674    
675    Output file for the PPO XML definition. If this file exists, it
676    will be overwritten.
677    
678    =back
679    
680    =cut
681    
682    sub CreatePPO {
683        # Get the parameters.
684        my ($erdbXMLFile, $ppoXMLFile) = @_;
685        # First, we want to slurp in the ERDB XML file in its raw form.
686        my $xml = ReadMetaXML($erdbXMLFile);
687        # Create a variable to hold all of the objects in the PPO project.
688        my @objects = ();
689        # Get the relationship hash.
690        my $relationships = $xml->{Relationships};
691        # Loop through the entities.
692        my $entities = $xml->{Entities};
693        for my $entityName (keys %{$entities}) {
694            # Get the entity's data structures.
695            my $entityObject = $entities->{$entityName};
696            # We put the object's fields in here, according to their type.
697            my (@object_refs, @scalars, @indexes, @arrays);
698            # Create the ID field for the entity. We get the key type from the
699            # entity object and compute the corresponding SQL type.
700            my $type = $TypeTable{$entityObject->{keyType}}->{sqlType};
701            push @scalars, { label => 'id', type => $type };
702            # Loop through the entity fields.
703            for my $fieldName ( keys %{$entityObject->{Fields}} ) {
704                # Get the field object.
705                my $fieldObject = $entityObject->{Fields}->{$fieldName};
706                # Convert it to a scalar tag.
707                my $scalar = _CreatePPOField($fieldName, $fieldObject);
708                # If we have a relation, this field is stored in an array.
709                # otherwise, it is a scalar. The array tag has scalars
710                # stored as an XML array. In ERDB, there is only ever one,
711                # but PPO can have more.
712                my $relation = $fieldObject->{relation};
713                if ($relation) {
714                    push @arrays, { scalar => [$scalar] };
715                } else {
716                    push @scalars, $scalar;
717                }
718            }
719            # Loop through the relationships. If this entity is the to-entity
720            # on a relationship of 1M arity, then it is implemented as a PPO
721            # object reference.
722            for my $relationshipName (keys %{$relationships}) {
723                # Get the relationship data.
724                my $relationshipData = $relationships->{$relationshipName};
725                # If we have a from for this entity and an arity of 1M, we
726                # have an object reference.
727                if ($relationshipData->{to} eq $entityName &&
728                    $relationshipData->{arity} eq '1M') {
729                    # Build the object reference tag.
730                    push @object_refs, { label => $relationshipName,
731                                         type => $relationshipData->{from} };
732                }
733            }
734            # Create the indexes.
735            my $indexList = $entityObject->{Indexes};
736            push @indexes, map { _CreatePPOIndex($_) } @{$indexList};
737            # Build the object XML tree.
738            my $object = { label => $entityName,
739                           object_ref => \@object_refs,
740                           scalar => \@scalars,
741                           index => \@indexes,
742                           array => \@arrays
743                          };
744            # Push the object onto the objects list.
745            push @objects, $object;
746        }
747        # Loop through the relationships, searching for MMs. The 1Ms were
748        # already handled by the entity search above.
749        for my $relationshipName (keys %{$relationships}) {
750            # Get this relationship's object.
751            my $relationshipObject = $relationships->{$relationshipName};
752            # Only proceed if it's many-to-many.
753            if ($relationshipObject->{arity} eq 'MM') {
754                # Create the tag lists for the relationship object.
755                my (@object_refs, @scalars, @indexes);
756                # The relationship will be created as an object with object
757                # references for its links to the participating entities.
758                my %links = ( from_link => $relationshipObject->{from},
759                              to_link => $relationshipObject->{to} );
760                for my $link (keys %links) {
761                    # Create an object_ref tag for this piece of the
762                    # relationship (from or to).
763                    my $object_ref = { label => $link,
764                                       type => $links{$link} };
765                    push @object_refs, $object_ref;
766                }
767                # Loop through the intersection data fields, creating scalar tags.
768                # There are no fancy array tags in a relationship.
769                for my $fieldName (keys %{$relationshipObject->{Fields}}) {
770                    my $fieldObject = $relationshipObject->{Fields}->{$fieldName};
771                    push @scalars, _CreatePPOField($fieldName, $fieldObject);
772                }
773                # Finally, the indexes: currently we cannot support the to-index and
774                # from-index in PPO, so we just process the alternate indexes.
775                my $indexList = $relationshipObject->{Indexes};
776                push @indexes, map { _CreatePPOIndex($_) } @{$indexList};
777                # Wrap up all the stuff about this relationship.
778                my $object = { label => $relationshipName,
779                               scalar => \@scalars,
780                               object_ref => \@object_refs,
781                               index => \@indexes
782                             };
783                # Push it into the object list.
784                push @objects, $object;
785            }
786        }
787        # Compute a title.
788        my $title;
789        if ($erdbXMLFile =~ /(\/|^)([^\/]+)DBD\.xml/) {
790            # Here we have a standard file name we can use for a title.
791            $title = $2;
792        } else {
793            # Here the file name is non-standard, so we carve up the
794            # database title.
795            $title = $xml->{Title}->{content};
796            $title =~ s/\s\.,//g;
797        }
798        # Wrap up the XML as a project.
799        my $ppoXML = { project => { label => $title,
800                                    object => \@objects }};
801        # Write out the results.
802        my $ppoString = XML::Simple::XMLout($ppoXML,
803                                            AttrIndent => 1,
804                                            KeepRoot => 1);
805        Tracer::PutFile($ppoXMLFile, [ $ppoString ]);
806    }
807    
808  =head3 FindIndexForEntity  =head3 FindIndexForEntity
809    
810  C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >>  C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >>
# Line 2053  Line 2203 
2203    
2204  =item RETURN  =item RETURN
2205    
2206  Returns a list of B<DBObject>s that satisfy the query conditions.  Returns a list of B<ERDBObject>s that satisfy the query conditions.
2207    
2208  =back  =back
2209    
# Line 2551  Line 2701 
2701          # leave extra room. We postulate a minimum row count of 1000 to          # leave extra room. We postulate a minimum row count of 1000 to
2702          # prevent problems with incoming empty load files.          # prevent problems with incoming empty load files.
2703          my $rowSize = $self->EstimateRowSize($relationName);          my $rowSize = $self->EstimateRowSize($relationName);
2704          my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);          my $estimate = $fileSize * 1.5 / $rowSize;
2705            if ($estimate < 1000) {
2706                $estimate = 1000;
2707            }
2708          # Re-create the table without its index.          # Re-create the table without its index.
2709          $self->CreateTable($relationName, 0, $estimate);          $self->CreateTable($relationName, 0, $estimate);
2710          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
# Line 2773  Line 2926 
2926    
2927  =item RETURN  =item RETURN
2928    
2929  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a B<ERDBObject> representing the desired entity instance, or an undefined value if no
2930  instance is found with the specified key.  instance is found with the specified key.
2931    
2932  =back  =back
# Line 3376  Line 3529 
3529      $self->{_dbh}->roll_tran();      $self->{_dbh}->roll_tran();
3530  }  }
3531    
3532    =head3 UpdateField
3533    
3534    C<< my $count = $erdb->UpdateField($objectNames, $fieldName, $oldValue, $newValue, $filter, $parms); >>
3535    
3536    Update all occurrences of a specific field value to a new value. The number of rows changed will be
3537    returned.
3538    
3539    =over 4
3540    
3541    =item fieldName
3542    
3543    Name of the field in standard I<objectName>C<(>I<fieldName>C<)> format.
3544    
3545    =item oldValue
3546    
3547    Value to be modified. All occurrences of this value in the named field will be replaced by the
3548    new value.
3549    
3550    =item newValue
3551    
3552    New value to be substituted for the old value when it's found.
3553    
3554    =item filter
3555    
3556    A standard ERDB filter clause (see L</Get>). The filter will be applied before any substitutions take place.
3557    
3558    =item parms
3559    
3560    Reference to a list of parameter values in the filter.
3561    
3562    =item RETURN
3563    
3564    Returns the number of rows modified.
3565    
3566    =back
3567    
3568    =cut
3569    
3570    sub UpdateField {
3571        # Get the parameters.
3572        my ($self, $fieldName, $oldValue, $newValue, $filter, $parms) = @_;
3573        # Get the object and field names from the field name parameter.
3574        $fieldName =~ /^([^(]+)\(([^)]+)\)/;
3575        my $objectName = $1;
3576        my $realFieldName = _FixName($2);
3577        # Add the old value to the filter. Note we allow the possibility that no
3578        # filter was specified.
3579        my $realFilter = "$fieldName = ?";
3580        if ($filter) {
3581            $realFilter .= " AND $filter";
3582        }
3583        # Format the query filter.
3584        my ($suffix, $mappedNameListRef, $mappedNameHashRef) =
3585            $self->_SetupSQL([$objectName], $realFilter);
3586        # Create the query. Since there is only one object name, the mapped-name data is not
3587        # necessary. Neither is the FROM clause.
3588        $suffix =~ s/^FROM.+WHERE\s+//;
3589        # Create the update statement.
3590        my $command = "UPDATE $objectName SET $realFieldName = ? WHERE $suffix";
3591        # Get the database handle.
3592        my $dbh = $self->{_dbh};
3593        # Add the old and new values to the parameter list. Note we allow the possibility that
3594        # there are no user-supplied parameters.
3595        my @params = ($newValue, $oldValue);
3596        if (defined $parms) {
3597            push @params, @{$parms};
3598        }
3599        # Execute the update.
3600        my $retVal = $dbh->SQL($command, 0, @params);
3601        # Make the funky zero a real zero.
3602        if ($retVal == 0) {
3603            $retVal = 0;
3604        }
3605        # Return the result.
3606        return $retVal;
3607    }
3608    
3609    
3610  =head2 Data Mining Methods  =head2 Data Mining Methods
3611    
# Line 3532  Line 3762 
3762    
3763  =head2 Virtual Methods  =head2 Virtual Methods
3764    
3765    =head3 _CreatePPOIndex
3766    
3767    C<< my $index = ERDB::_CreatePPOIndex($indexObject); >>
3768    
3769    Convert the XML for an ERDB index to the XML structure for a PPO
3770    index.
3771    
3772    =over 4
3773    
3774    ERDB XML structure for an index.
3775    
3776    =item RETURN
3777    
3778    PPO XML structure for the same index.
3779    
3780    =back
3781    
3782    =cut
3783    
3784    sub _CreatePPOIndex {
3785        # Get the parameters.
3786        my ($indexObject) = @_;
3787        # The incoming index contains a list of the index fields in the IndexFields
3788        # member. We loop through it to create the index tags.
3789        my @fields = map { { label => _FixName($_->{name}) } } @{$indexObject->{IndexFields}};
3790        # Wrap the fields in attribute tags.
3791        my $retVal = { attribute => \@fields };
3792        # Return the result.
3793        return $retVal;
3794    }
3795    
3796    =head3 _CreatePPOField
3797    
3798    C<< my $fieldXML = ERDB::_CreatePPOField($fieldName, $fieldObject); >>
3799    
3800    Convert the ERDB XML structure for a field to a PPO scalar XML structure.
3801    
3802    =over 4
3803    
3804    =item fieldName
3805    
3806    Name of the scalar field.
3807    
3808    =item fieldObject
3809    
3810    ERDB XML structure describing the field.
3811    
3812    =item RETURN
3813    
3814    Returns a PPO XML structure for the same field.
3815    
3816    =back
3817    
3818    =cut
3819    
3820    sub _CreatePPOField {
3821        # Get the parameters.
3822        my ($fieldName, $fieldObject) = @_;
3823        # Get the field type.
3824        my $type = $TypeTable{$fieldObject->{type}}->{sqlType};
3825        # Fix up the field name.
3826        $fieldName = _FixName($fieldName);
3827        # Build the scalar tag.
3828        my $retVal = { label => $fieldName, type => $type };
3829        # Return the result.
3830        return $retVal;
3831    }
3832    
3833  =head3 CleanKeywords  =head3 CleanKeywords
3834    
3835  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>
# Line 3583  Line 3881 
3881    
3882  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>
3883    
3884  Create the relation map for an SQL query. The relation map is used by B<DBObject>  Create the relation map for an SQL query. The relation map is used by B<ERDBObject>
3885  to determine how to interpret the results of the query.  to determine how to interpret the results of the query.
3886    
3887  =over 4  =over 4
# Line 3600  Line 3898 
3898  =item RETURN  =item RETURN
3899    
3900  Returns a list of 2-tuples. Each tuple consists of an object name as used in the  Returns a list of 2-tuples. Each tuple consists of an object name as used in the
3901  query followed by the actual name of that object. This enables the B<DBObject> to  query followed by the actual name of that object. This enables the B<ERDBObject> to
3902  determine the order of the tables in the query and which object name belongs to each  determine the order of the tables in the query and which object name belongs to each
3903  mapped object name. Most of the time these two values are the same; however, if a  mapped object name. Most of the time these two values are the same; however, if a
3904  relation occurs twice in the query, the relation name in the field list and WHERE  relation occurs twice in the query, the relation name in the field list and WHERE

Legend:
Removed from v.1.87  
changed lines
  Added in v.1.90

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3