[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.86, Mon Feb 12 19:23:32 2007 UTC revision 1.89, Thu Apr 12 05:59:41 2007 UTC
# Line 6  Line 6 
6      use Data::Dumper;      use Data::Dumper;
7      use XML::Simple;      use XML::Simple;
8      use DBQuery;      use DBQuery;
9      use DBObject;      use ERDBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12      use Digest::MD5 qw(md5_base64);      use Digest::MD5 qw(md5_base64);
# Line 655  Line 655 
655      return Data::Dumper::Dumper($self->{_metaData});      return Data::Dumper::Dumper($self->{_metaData});
656  }  }
657    
658    =head3 CreatePPO
659    
660    C<< ERDB::CreatePPO($erdbXMLFile, $ppoXMLFile); >>
661    
662    Create a PPO XML file from an ERDB data definition XML file. At the
663    current time, the PPO XML file can be used to create a database with
664    similar functionality. Eventually, the PPO will be able to use the
665    created XML to access the live ERDB database.
666    
667    =over 4
668    
669    =item erdbXMLFile
670    
671    Name of the XML data definition file for the ERDB database. This
672    file must exist.
673    
674    =item ppoXMLFile
675    
676    Output file for the PPO XML definition. If this file exists, it
677    will be overwritten.
678    
679    =back
680    
681    =cut
682    
683    sub CreatePPO {
684        # Get the parameters.
685        my ($erdbXMLFile, $ppoXMLFile) = @_;
686        # First, we want to slurp in the ERDB XML file in its raw form.
687        my $xml = ReadMetaXML($erdbXMLFile);
688        # Create a variable to hold all of the objects in the PPO project.
689        my @objects = ();
690        # Get the relationship hash.
691        my $relationships = $xml->{Relationships};
692        # Loop through the entities.
693        my $entities = $xml->{Entities};
694        for my $entityName (keys %{$entities}) {
695            # Get the entity's data structures.
696            my $entityObject = $entities->{$entityName};
697            # We put the object's fields in here, according to their type.
698            my (@object_refs, @scalars, @indexes, @arrays);
699            # Create the ID field for the entity. We get the key type from the
700            # entity object and compute the corresponding SQL type.
701            my $type = $TypeTable{$entityObject->{keyType}}->{sqlType};
702            push @scalars, { label => 'id', type => $type };
703            # Loop through the entity fields.
704            for my $fieldName ( keys %{$entityObject->{Fields}} ) {
705                # Get the field object.
706                my $fieldObject = $entityObject->{Fields}->{$fieldName};
707                # Convert it to a scalar tag.
708                my $scalar = _CreatePPOField($fieldName, $fieldObject);
709                # If we have a relation, this field is stored in an array.
710                # otherwise, it is a scalar. The array tag has scalars
711                # stored as an XML array. In ERDB, there is only ever one,
712                # but PPO can have more.
713                my $relation = $fieldObject->{relation};
714                if ($relation) {
715                    push @arrays, { scalar => [$scalar] };
716                } else {
717                    push @scalars, $scalar;
718                }
719            }
720            # Loop through the relationships. If this entity is the to-entity
721            # on a relationship of 1M arity, then it is implemented as a PPO
722            # object reference.
723            for my $relationshipName (keys %{$relationships}) {
724                # Get the relationship data.
725                my $relationshipData = $relationships->{$relationshipName};
726                # If we have a from for this entity and an arity of 1M, we
727                # have an object reference.
728                if ($relationshipData->{to} eq $entityName &&
729                    $relationshipData->{arity} eq '1M') {
730                    # Build the object reference tag.
731                    push @object_refs, { label => $relationshipName,
732                                         type => $relationshipData->{from} };
733                }
734            }
735            # Create the indexes.
736            my $indexList = $entityObject->{Indexes};
737            push @indexes, map { _CreatePPOIndex($_) } @{$indexList};
738            # Build the object XML tree.
739            my $object = { label => $entityName,
740                           object_ref => \@object_refs,
741                           scalar => \@scalars,
742                           index => \@indexes,
743                           array => \@arrays
744                          };
745            # Push the object onto the objects list.
746            push @objects, $object;
747        }
748        # Loop through the relationships, searching for MMs. The 1Ms were
749        # already handled by the entity search above.
750        for my $relationshipName (keys %{$relationships}) {
751            # Get this relationship's object.
752            my $relationshipObject = $relationships->{$relationshipName};
753            # Only proceed if it's many-to-many.
754            if ($relationshipObject->{arity} eq 'MM') {
755                # Create the tag lists for the relationship object.
756                my (@object_refs, @scalars, @indexes);
757                # The relationship will be created as an object with object
758                # references for its links to the participating entities.
759                my %links = ( from => $relationshipObject->{from},
760                              to => $relationshipObject->{to} );
761                for my $link (keys %links) {
762                    # Create an object_ref tag for this piece of the
763                    # relationship (from or to).
764                    my $object_ref = { label => $link,
765                                       type => $links{$link} };
766                    push @object_refs, $object_ref;
767                }
768                # Loop through the intersection data fields, creating scalar tags.
769                # There are no fancy array tags in a relationship.
770                for my $fieldName (keys %{$relationshipObject->{Fields}}) {
771                    my $fieldObject = $relationshipObject->{Fields}->{$fieldName};
772                    push @scalars, _CreatePPOField($fieldName, $fieldObject);
773                }
774                # Finally, the indexes: currently we cannot support the to-index and
775                # from-index in PPO, so we just process the alternate indexes.
776                my $indexList = $relationshipObject->{Indexes};
777                push @indexes, map { _CreatePPOIndex($_) } @{$indexList};
778                # Wrap up all the stuff about this relationship.
779                my $object = { label => $relationshipName,
780                               scalar => \@scalars,
781                               object_ref => \@object_refs,
782                               index => \@indexes
783                             };
784                # Push it into the object list.
785                push @objects, $object;
786            }
787        }
788        # Compute a title.
789        my $title;
790        if ($erdbXMLFile =~ /\/([^\/]+)DBD\.xml/) {
791            # Here we have a standard file name we can use for a title.
792            $title = $1;
793        } else {
794            # Here the file name is non-standard, so we carve up the
795            # database title.
796            $title = $xml->{Title}->{content};
797            $title =~ s/\s\.,//g;
798        }
799        # Wrap up the XML as a project.
800        my $ppoXML = { project => { label => $title,
801                                    object => \@objects }};
802        # Write out the results.
803        my $ppoString = XML::Simple::XMLout($ppoXML,
804                                            AttrIndent => 1,
805                                            KeepRoot => 1);
806        Tracer::PutFile($ppoXMLFile, [ $ppoString ]);
807    }
808    
809    
810    
811  =head3 FindIndexForEntity  =head3 FindIndexForEntity
812    
813  C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >>  C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >>
# Line 744  Line 897 
897      # Loop through the relations.      # Loop through the relations.
898      for my $relationName (@relNames) {      for my $relationName (@relNames) {
899          # Create a table for this relation.          # Create a table for this relation.
900          $self->CreateTable($relationName);          $self->CreateTable($relationName, 1);
901          Trace("Relation $relationName created.") if T(2);          Trace("Relation $relationName created.") if T(2);
902      }      }
903  }  }
# Line 2053  Line 2206 
2206    
2207  =item RETURN  =item RETURN
2208    
2209  Returns a list of B<DBObject>s that satisfy the query conditions.  Returns a list of B<ERDBObject>s that satisfy the query conditions.
2210    
2211  =back  =back
2212    
# Line 2773  Line 2926 
2926    
2927  =item RETURN  =item RETURN
2928    
2929  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a B<ERDBObject> representing the desired entity instance, or an undefined value if no
2930  instance is found with the specified key.  instance is found with the specified key.
2931    
2932  =back  =back
# Line 3336  Line 3489 
3489      return $retVal;      return $retVal;
3490  }  }
3491    
3492    =head3 BeginTran
3493    
3494    C<< $erdb->BeginTran(); >>
3495    
3496    Start a database transaction.
3497    
3498    =cut
3499    
3500    sub BeginTran {
3501        my ($self) = @_;
3502        $self->{_dbh}->begin_tran();
3503    
3504    }
3505    
3506    =head3 CommitTran
3507    
3508    C<< $erdb->CommitTran(); >>
3509    
3510    Commit an active database transaction.
3511    
3512    =cut
3513    
3514    sub CommitTran {
3515        my ($self) = @_;
3516        $self->{_dbh}->commit_tran();
3517    }
3518    
3519    =head3 RollbackTran
3520    
3521    C<< $erdb->RollbackTran(); >>
3522    
3523    Roll back an active database transaction.
3524    
3525    =cut
3526    
3527    sub RollbackTran {
3528        my ($self) = @_;
3529        $self->{_dbh}->roll_tran();
3530    }
3531    
3532    
3533  =head2 Data Mining Methods  =head2 Data Mining Methods
3534    
# Line 3492  Line 3685 
3685    
3686  =head2 Virtual Methods  =head2 Virtual Methods
3687    
3688    =head3 _CreatePPOIndex
3689    
3690    C<< my $index = ERDB::_CreatePPOIndex($indexObject); >>
3691    
3692    Convert the XML for an ERDB index to the XML structure for a PPO
3693    index.
3694    
3695    =over 4
3696    
3697    ERDB XML structure for an index.
3698    
3699    =item RETURN
3700    
3701    PPO XML structure for the same index.
3702    
3703    =back
3704    
3705    =cut
3706    
3707    sub _CreatePPOIndex {
3708        # Get the parameters.
3709        my ($indexObject) = @_;
3710        # The incoming index contains a list of the index fields in the IndexFields
3711        # member. We loop through it to create the index tags.
3712        my @fields = map { { label => _FixName($_->{name}) } } @{$indexObject->{IndexFields}};
3713        # Wrap the fields in attribute tags.
3714        my $retVal = { attribute => \@fields };
3715        # Return the result.
3716        return $retVal;
3717    }
3718    
3719    =head3 _CreatePPOField
3720    
3721    C<< my $fieldXML = ERDB::_CreatePPOField($fieldName, $fieldObject); >>
3722    
3723    Convert the ERDB XML structure for a field to a PPO scalar XML structure.
3724    
3725    =over 4
3726    
3727    =item fieldName
3728    
3729    Name of the scalar field.
3730    
3731    =item fieldObject
3732    
3733    ERDB XML structure describing the field.
3734    
3735    =item RETURN
3736    
3737    Returns a PPO XML structure for the same field.
3738    
3739    =back
3740    
3741    =cut
3742    
3743    sub _CreatePPOField {
3744        # Get the parameters.
3745        my ($fieldName, $fieldObject) = @_;
3746        # Get the field type.
3747        my $type = $TypeTable{$fieldObject->{type}}->{sqlType};
3748        # Fix up the field name.
3749        $fieldName = _FixName($fieldName);
3750        # Build the scalar tag.
3751        my $retVal = { label => $fieldName, type => $type };
3752        # Return the result.
3753        return $retVal;
3754    }
3755    
3756  =head3 CleanKeywords  =head3 CleanKeywords
3757    
3758  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>
# Line 3543  Line 3804 
3804    
3805  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>
3806    
3807  Create the relation map for an SQL query. The relation map is used by B<DBObject>  Create the relation map for an SQL query. The relation map is used by B<ERDBObject>
3808  to determine how to interpret the results of the query.  to determine how to interpret the results of the query.
3809    
3810  =over 4  =over 4
# Line 3560  Line 3821 
3821  =item RETURN  =item RETURN
3822    
3823  Returns a list of 2-tuples. Each tuple consists of an object name as used in the  Returns a list of 2-tuples. Each tuple consists of an object name as used in the
3824  query followed by the actual name of that object. This enables the B<DBObject> to  query followed by the actual name of that object. This enables the B<ERDBObject> to
3825  determine the order of the tables in the query and which object name belongs to each  determine the order of the tables in the query and which object name belongs to each
3826  mapped object name. Most of the time these two values are the same; however, if a  mapped object name. Most of the time these two values are the same; however, if a
3827  relation occurs twice in the query, the relation name in the field list and WHERE  relation occurs twice in the query, the relation name in the field list and WHERE
# Line 4254  Line 4515 
4515              if ($found == 0) {              if ($found == 0) {
4516                  push @{$indexList}, { IndexFields => [ {name => 'id', order => 'ascending'} ] };                  push @{$indexList}, { IndexFields => [ {name => 'id', order => 'ascending'} ] };
4517              }              }
4518              # Now we need to convert the relation's index list to an index table. We begin by creating              # Attach all the indexes to the relation.
4519              # an empty table in the relation structure.              _ProcessIndexes($indexList, $relation);
             $relation->{Indexes} = { };  
             # Loop through the indexes.  
             my $count = 0;  
             for my $index (@{$indexList}) {  
                 # Add this index to the index table.  
                 _AddIndex("idx$count", $relation, $index);  
                 # Increment the counter so that the next index has a different name.  
                 $count++;  
             }  
4520          }          }
4521          # Finally, we add the relation structure to the entity.          # Finally, we add the relation structure to the entity.
4522          $entityStructure->{Relations} = $relationTable;          $entityStructure->{Relations} = $relationTable;
# Line 4300  Line 4552 
4552          my $thisRelation = { Fields => _ReOrderRelationTable($relationshipStructure->{Fields}),          my $thisRelation = { Fields => _ReOrderRelationTable($relationshipStructure->{Fields}),
4553                               Indexes => { } };                               Indexes => { } };
4554          $relationshipStructure->{Relations} = { $relationshipName => $thisRelation };          $relationshipStructure->{Relations} = { $relationshipName => $thisRelation };
4555    
4556            # Add the alternate indexes (if any). This MUST be done before the FROM and
4557            # TO indexes, because it erases the relation's index list.
4558            if (exists $relationshipStructure->{Indexes}) {
4559                _ProcessIndexes($relationshipStructure->{Indexes}, $thisRelation);
4560            }
4561            # Add the relation to the master table.
4562          # Create the FROM and TO indexes.          # Create the FROM and TO indexes.
4563          _CreateRelationshipIndex("From", $relationshipName, $relationshipStructure);          _CreateRelationshipIndex("From", $relationshipName, $relationshipStructure);
4564          _CreateRelationshipIndex("To", $relationshipName, $relationshipStructure);          _CreateRelationshipIndex("To", $relationshipName, $relationshipStructure);
         # Add the relation to the master table.  
4565          $masterRelationTable{$relationshipName} = $thisRelation;          $masterRelationTable{$relationshipName} = $thisRelation;
4566      }      }
4567      # Now store the master relation table in the metadata structure.      # Now store the master relation table in the metadata structure.
# Line 4462  Line 4720 
4720      _AddIndex("idx$indexKey", $relationStructure, $newIndex);      _AddIndex("idx$indexKey", $relationStructure, $newIndex);
4721  }  }
4722    
4723    =head3 _ProcessIndexes
4724    
4725    C<< ERDB::_ProcessIndexes($indexList, $relation); >>
4726    
4727    Build the data structures for the specified indexes in the specified relation.
4728    
4729    =over 4
4730    
4731    =item indexList
4732    
4733    Reference to a list of indexes. Each index is a hash reference containing an optional
4734    C<Notes> value that describes the index and an C<IndexFields> value that is a reference
4735    to a list of index field structures. An index field structure, in turn, is a reference
4736    to a hash that contains a C<name> attribute for the field name and an C<order>
4737    attribute that specifies either C<ascending> or C<descending>. In this sense the
4738    index list encapsulates the XML C<Indexes> structure in the database definition.
4739    
4740    =item relation
4741    
4742    The structure that describes the current relation. The new index descriptors will
4743    be stored in the structure's C<Indexes> member. Any previous data in the structure
4744    will be lost.
4745    
4746    =back
4747    
4748    =cut
4749    
4750    sub _ProcessIndexes {
4751        # Get the parameters.
4752        my ($indexList, $relation) = @_;
4753        # Now we need to convert the relation's index list to an index table. We begin by creating
4754        # an empty table in the relation structure.
4755        $relation->{Indexes} = { };
4756        # Loop through the indexes.
4757        my $count = 0;
4758        for my $index (@{$indexList}) {
4759            # Add this index to the index table.
4760            _AddIndex("idx$count", $relation, $index);
4761            # Increment the counter so that the next index has a different name.
4762            $count++;
4763        }
4764    }
4765    
4766  =head3 _AddIndex  =head3 _AddIndex
4767    
4768  Add an index to a relation structure.  Add an index to a relation structure.

Legend:
Removed from v.1.86  
changed lines
  Added in v.1.89

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3