[Bio] / Sprout / CustomAttributes.pm Repository:
ViewVC logotype

Diff of /Sprout/CustomAttributes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Fri Nov 3 16:50:04 2006 UTC revision 1.3, Thu Nov 9 21:19:53 2006 UTC
# Line 5  Line 5 
5      require Exporter;      require Exporter;
6      use ERDB;      use ERDB;
7      @ISA = qw(Exporter ERDB);      @ISA = qw(Exporter ERDB);
8        @EXPORT = qw(GetAttributes AddAttribute DeleteAttribute ChangeAttribute MatchSqlPattern);
9      use strict;      use strict;
10      use Tracer;      use Tracer;
11      use FIG;      use FIG;
# Line 24  Line 25 
25  the values of the attribute C<essential> in the B<Feature> entity, you  the values of the attribute C<essential> in the B<Feature> entity, you
26  would code  would code
27    
28      my @values = $attrDB->GetAttributes($fid, Feature => 'essential');      my @values = $attrDB->GetAttributeValues($fid, Feature => 'essential');
29    
30  where I<$fid> contains the ID of the desired feature. Each attribute has  where I<$fid> contains the ID of the desired feature. Each attribute has
31  an alternate index to allow searching for attributes by value.  an alternate index to allow searching for attributes by value.
# Line 77  Line 78 
78    
79  =back  =back
80    
81  =head2 Impliementation Note  =head2 Implementation Note
82    
83  The L</Refresh> method reloads the entities in the database. If new  The L</Refresh> method reloads the entities in the database. If new
84  entity types are added, that method will need to be adjusted accordingly.  entity types are added, that method will need to be adjusted accordingly.
# Line 86  Line 87 
87    
88  =head3 new  =head3 new
89    
90  C<< my $attrDB = CustomAttributes->new(); >>  C<< my $attrDB = CustomAttributes->new($splitter); >>
91    
92  Construct a new CustomAttributes object. This object is only used to load  Construct a new CustomAttributes object. This object cannot be used to add or
93  or access data. To add new attributes, use the static L</NewAttribute>  delete keys because that requires modifying the database design. To do that,
94  method.  you need to use the static L</StoreAttributeKey> or L</DeleteAttributeKey>
95    methods.
96    
97    =over 4
98    
99    =item splitter
100    
101    Value to be used to split attribute values into sections in the
102    L</Fig Replacement Methods>. The default is a double colon C<::>.
103    If you do not use the replacement methods, you do not need to
104    worry about this parameter.
105    
106    =back
107    
108  =cut  =cut
109    
110  sub new {  sub new {
111      # Get the parameters.      # Get the parameters.
112      my ($class) = @_;      my ($class, $splitter) = @_;
113      # Connect to the database.      # Connect to the database.
114      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,
115                              $FIG_Config::attrUser, $FIG_Config::attrPass,                              $FIG_Config::attrUser, $FIG_Config::attrPass,
# Line 105  Line 118 
118      # Create the ERDB object.      # Create the ERDB object.
119      my $xmlFileName = $FIG_Config::attrDBD;      my $xmlFileName = $FIG_Config::attrDBD;
120      my $retVal = ERDB::new($class, $dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
121        # Store the splitter value.
122        $retVal->{splitter} = (defined($splitter) ? $splitter : '::');
123      # Return the result.      # Return the result.
124      return $retVal;      return $retVal;
125  }  }
126    
127  =head3 GetAttributes  =head3 GetAttributeValues
128    
129  C<< my @values = $attrDB->GetAttributes($id, $entityName => $attributeName); >>  C<< my @values = $attrDB->GetAttributeValues($id, $entityName => $attributeName); >>
130    
131  Return all the values of the specified attribute for the specified entity instance.  Return all the values of the specified attribute for the specified entity instance.
132  A list of vaues will be returned. If the entity instance does not exist or the  A list of vaues will be returned. If the entity instance does not exist or the
# Line 120  Line 135 
135    
136  A typical invocation would look like this:  A typical invocation would look like this:
137    
138      my @values = $sttrDB->GetAttributes($fid, Feature => 'essential');      my @values = $sttrDB->GetAttributeValues($fid, Feature => 'essential');
139    
140  Here the user is asking for the values of the C<essential> attribute for the  Here the user is asking for the values of the C<essential> attribute for the
141  B<Feature> with the specified ID. If the identified feature is not essential,  B<Feature> with the specified ID. If the identified feature is not essential,
# Line 152  Line 167 
167    
168  =cut  =cut
169    
170  sub GetAttributes {  sub GetAttributeValues {
171      # Get the parameters.      # Get the parameters.
172      my ($self, $id, $entityName, $attributeName) = @_;      my ($self, $id, $entityName, $attributeName) = @_;
173      # Get the data.      # Get the data.
# Line 161  Line 176 
176      return @retVal;      return @retVal;
177  }  }
178    
179  =head3 StoreAttribute  =head3 StoreAttributeKey
180    
181  C<< my $attrDB = CustomAttributes::StoreAttribute($entityName, $attributeName, $type, $notes); >>  C<< my $attrDB = CustomAttributes::StoreAttributeKey($entityName, $attributeName, $type, $notes); >>
182    
183  Create or update an attribute for the database. This method will update the database definition  Create or update an attribute for the database. This method will update the database definition
184  XML, but it will not create the table. It will connect to the database so that the caller  XML, but it will not create the table. It will connect to the database so that the caller
# Line 198  Line 213 
213    
214  =cut  =cut
215    
216  sub StoreAttribute {  sub StoreAttributeKey {
217      # Get the parameters.      # Get the parameters.
218      my ($entityName, $attributeName, $type, $notes) = @_;      my ($entityName, $attributeName, $type, $notes) = @_;
219      # Get the data type hash.      # Get the data type hash.
# Line 222  Line 237 
237          my $entityData = $entityHash->{$entityName};          my $entityData = $entityHash->{$entityName};
238          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);
239          # Compute the attribute's relation name.          # Compute the attribute's relation name.
240          my $relName = join("", $entityName, map { ucfirst $_ } split(/-/, $attributeName));          my $relName = join("", $entityName, map { ucfirst $_ } split(/-|_/, $attributeName));
241          # Store the attribute's field data. Note the use of the "content" hash for          # Store the attribute's field data. Note the use of the "content" hash for
242          # the notes. This is how the XML writer knows Notes is a text tag instead of          # the notes. This is how the XML writer knows Notes is a text tag instead of
243          # an attribute.          # an attribute.
# Line 244  Line 259 
259    
260  =head3 Refresh  =head3 Refresh
261    
262  C<< $attrDB->Refresh(); >>  C<< $attrDB->Refresh($fig); >>
263    
264  Refresh the primary entity tables from the FIG data store. This method basically  Refresh the primary entity tables from the FIG data store. This method basically
265  drops and reloads the main tables of the custom attributes database.  drops and reloads the main tables of the custom attributes database.
266    
267    =over 4
268    
269    =item fig
270    
271    FIG-like object that can be used to find genomes and features.
272    
273    =back
274    
275  =cut  =cut
276    
277  sub Refresh {  sub Refresh {
278      # Get the parameters.      # Get the parameters.
279      my ($self) = @_;      my ($self, $fig) = @_;
280      # Create load objects for the genomes and the features.      # Create load objects for the genomes and the features.
281      my $loadGenome = ERDBLoad->new($self, 'Genome', $FIG_Config::temp);      my $loadGenome = ERDBLoad->new($self, 'Genome', $FIG_Config::temp);
282      my $loadFeature = ERDBLoad->new($self, 'Feature', $FIG_Config::temp);      my $loadFeature = ERDBLoad->new($self, 'Feature', $FIG_Config::temp);
     # Get a FIG object. We'll use this to create the data.  
     my $fig = FIG->new();  
283      # Get the genome list.      # Get the genome list.
284      my @genomes = $fig->genomes();      my @genomes = $fig->genomes();
285      # Loop through the genomes.      # Loop through the genomes.
# Line 285  Line 306 
306      Trace("Feature table load statistics:\n" . $stats->Show()) if T(3);      Trace("Feature table load statistics:\n" . $stats->Show()) if T(3);
307  }  }
308    
309  =head3 LoadAttribute  =head3 LoadAttributeKey
310    
311  C<< my $stats = $attrDB->LoadAttribute($entityName, $fieldName, $fh, $keyCol, $dataCol); >>  C<< my $stats = $attrDB->LoadAttributeKey($entityName, $fieldName, $fh, $keyCol, $dataCol); >>
312    
313  Load the specified attribute from the specified file. The file should be a  Load the specified attribute from the specified file. The file should be a
314  tab-delimited file with internal tab and new-line characters escaped. This is  tab-delimited file with internal tab and new-line characters escaped. This is
# Line 326  Line 347 
347    
348  =cut  =cut
349    
350  sub LoadAttribute {  sub LoadAttributeKey {
351      # Get the parameters.      # Get the parameters.
352      my ($self, $entityName, $fieldName, $fh, $keyCol, $dataCol) = @_;      my ($self, $entityName, $fieldName, $fh, $keyCol, $dataCol) = @_;
353      # Create the return variable.      # Create the return variable.
# Line 340  Line 361 
361          my $fieldHash = $self->GetFieldTable($entityName);          my $fieldHash = $self->GetFieldTable($entityName);
362          # Verify that the attribute exists.          # Verify that the attribute exists.
363          if (! exists $fieldHash->{$fieldName}) {          if (! exists $fieldHash->{$fieldName}) {
364              Confess("Attribute \"$fieldName\" does not exist in entity $entityName.");              Confess("Attribute key \"$fieldName\" does not exist in entity $entityName.");
365          } else {          } else {
366              # Create a loader for the specified attribute. We need the              # Create a loader for the specified attribute. We need the
367              # relation name first.              # relation name first.
# Line 370  Line 391 
391      return $retVal;      return $retVal;
392  }  }
393    
 =head3 DeleteAttribute  
394    
395  C<< CustomAttributes::DeleteAttribute($entityName, $attributeName); >>  =head3 DeleteAttributeKey
396    
397    C<< CustomAttributes::DeleteAttributeKey($entityName, $attributeName); >>
398    
399  Delete an attribute from the custom attributes database.  Delete an attribute from the custom attributes database.
400    
# Line 390  Line 412 
412    
413  =cut  =cut
414    
415  sub DeleteAttribute {  sub DeleteAttributeKey {
416      # Get the parameters.      # Get the parameters.
417      my ($entityName, $attributeName) = @_;      my ($entityName, $attributeName) = @_;
418      # Read in the XML for the database defintion. We need to verify that      # Read in the XML for the database defintion. We need to verify that
# Line 403  Line 425 
425          # Get the field hash.          # Get the field hash.
426          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);
427          if (! exists $fieldHash->{$attributeName}) {          if (! exists $fieldHash->{$attributeName}) {
428              Confess("Attribute \"$attributeName\" not found in entity $entityName.");              Confess("Attribute key \"$attributeName\" not found in entity $entityName.");
429          } else {          } else {
430              # Get the attribute's relation name.              # Get the attribute's relation name.
431              my $relName = $fieldHash->{$attributeName}->{relation};              my $relName = $fieldHash->{$attributeName}->{relation};
# Line 690  Line 712 
712      return $retVal;      return $retVal;
713  }  }
714    
715    =head3 MatchSqlPattern
716    
717    C<< my $matched = MatchSqlPattern($value, $pattern); >>
718    
719    Determine whether or not a specified value matches an SQL pattern. An SQL
720    pattern has two wild card characters: C<%> that matches multiple characters,
721    and C<_> that matches a single character. These can be escaped using a
722    backslash (C<\>). We pull this off by converting the SQL pattern to a
723    PERL regular expression. As per SQL rules, the match is case-insensitive.
724    
725    =over 4
726    
727    =item value
728    
729    Value to be matched against the pattern. Note that an undefined value will
730    not match anything.
731    
732    =item pattern
733    
734    SQL pattern against which to match the value. An undefined pattern will
735    match everything.
736    
737    =item RETURN
738    
739    Returns TRUE if the value and pattern match, else FALSE.
740    
741    =back
742    
743    =cut
744    
745    sub MatchSqlPattern {
746        # Get the parameters.
747        my ($value, $pattern) = @_;
748        # Declare the return variable.
749        my $retVal;
750        # Insure we have a pattern.
751        if (! defined($pattern)) {
752            $retVal = 1;
753        } else {
754            # Break the pattern into pieces around the wildcard characters. Because we
755            # use parentheses in the split function's delimiter expression, we'll get
756            # list elements for the delimiters as well as the rest of the string.
757            my @pieces = split /([_%]|\\[_%])/, $pattern;
758            # Check some fast special cases.
759            if ($pattern eq '%') {
760                # A null pattern matches everything.
761                $retVal = 1;
762            } elsif (@pieces == 1) {
763                # No wildcards, so we have a literal comparison. Note we're case-insensitive.
764                $retVal = (lc($value) eq lc($pattern));
765            } elsif (@pieces == 2 && $pieces[1] eq '%') {
766                # A wildcard at the end, so we have a substring match. This is also case-insensitive.
767                $retVal = (lc(substr($value, 0, length($pieces[0]))) eq lc($pieces[0]));
768            } else {
769                # Okay, we have to do it the hard way. Convert each piece to a PERL pattern.
770                my $realPattern = "";
771                for my $piece (@pieces) {
772                    # Determine the type of piece.
773                    if ($piece eq "") {
774                        # Empty pieces are ignored.
775                    } elsif ($piece eq "%") {
776                        # Here we have a multi-character wildcard. Note that it can match
777                        # zero or more characters.
778                        $realPattern .= ".*"
779                    } elsif ($piece eq "_") {
780                        # Here we have a single-character wildcard.
781                        $realPattern .= ".";
782                    } elsif ($piece eq "\\%" || $piece eq "\\_") {
783                        # This is an escape sequence (which is a rare thing, actually).
784                        $realPattern .= substr($piece, 1, 1);
785                    } else {
786                        # Here we have raw text.
787                        $realPattern .= quotemeta($piece);
788                    }
789                }
790                # Do the match.
791                $retVal = ($value =~ /^$realPattern$/i ? 1 : 0);
792            }
793        }
794        # Return the result.
795        return $retVal;
796    }
797    
798    =head3 MigrateAttributes
799    
800    C<< CustomAttributes::MigrateAttributes($fig); >>
801    
802    Migrate all the attributes data from the specified FIG instance. This is a long, slow
803    method used to convert the old attribute data to the new system. Only attribute
804    keys that are not already in the database will be loaded, and only for entity instances
805    current in the database. To get an accurate capture of the attributes in the given
806    instance, you may want to clear the database and the DBD before starting and
807    run L</Refresh> to populate the entities.
808    
809    =over 4
810    
811    =item fig
812    
813    A FIG object that can be used to retrieve attributes for migration purposes.
814    
815    =back
816    
817    =cut
818    
819    sub MigrateAttributes {
820        # Get the parameters.
821        my ($fig) = @_;
822        # Get a list of the objects to migrate. This requires connecting. Note we
823        # will map each entity type to a file name. The file will contain a list
824        # of the object's IDs so we can get to them when we're not connected to
825        # the database.
826        my $ca = CustomAttributes->new();
827        my %objects = map { $_ => "$FIG_Config::temp/$_.keys.tbl" } $ca->GetEntityTypes();
828        # Set up hash of the existing attribute keys for each entity type.
829        my %oldKeys = ();
830        # Finally, we have a hash that counts the IDs for each entity type.
831        my %idCounts = map { $_ => 0 } keys %objects;
832        # Loop through the list, creating key files to read back in.
833        for my $entityType (keys %objects) {
834            Trace("Retrieving keys for $entityType.") if T(2);
835            # Create the key file.
836            my $idFile = Open(undef, ">$objects{$entityType}");
837            # Loop through the keys.
838            my @ids = $ca->GetFlat([$entityType], "", [], "$entityType(id)");
839            for my $id (@ids) {
840                print $idFile "$id\n";
841            }
842            close $idFile;
843            # In addition to the key file, we must get a list of attributes already
844            # in the database. This avoids a circularity problem that might occur if the $fig
845            # object is retrieving from the custom attributes database already.
846            my %fields = $ca->GetSecondaryFields($entityType);
847            $oldKeys{$entityType} = \%fields;
848            # Finally, we have the ID count.
849            $idCounts{$entityType} = scalar @ids;
850        }
851        # Release the custom attributes database so we can add attributes.
852        undef $ca;
853        # Loop through the objects.
854        for my $entityType (keys %objects) {
855            # Get a hash of all the attributes already in this database. These are
856            # left untouched.
857            my $myOldKeys = $oldKeys{$entityType};
858            # Create a hash to control the load file names for each attribute key we find.
859            my %keyHash = ();
860            # Set up some counters so we can trace our progress.
861            my ($totalIDs, $processedIDs, $keyCount, $valueCount) = ($idCounts{$entityType}, 0, 0, 0);
862            # Open this object's ID file.
863            Trace("Migrating data for $entityType. $totalIDs found.") if T(3);
864            my $keysIn = Open(undef, "<$objects{$entityType}");
865            while (my $id = <$keysIn>) {
866                # Remove the EOL characters.
867                chomp $id;
868                # Get this object's attributes.
869                my @allData = $fig->get_attributes($id);
870                Trace(scalar(@allData) . " attribute values found for $entityType($id).") if T(4);
871                # Loop through the attribute values one at a time.
872                for my $dataTuple (@allData) {
873                    # Get the key, value, and URL. We ignore the first element because that's the
874                    # object ID, and we already know the object ID.
875                    my (undef, $key, $value, $url) = @{$dataTuple};
876                    # Only proceed if this is not an old key.
877                    if (! $myOldKeys->{$key}) {
878                        # See if we've run into this key before.
879                        if (! exists $keyHash{$key}) {
880                            # Here we need to create the attribute key in the database.
881                            StoreAttributeKey($entityType, $key, 'text',
882                                              "Key migrated automatically from the FIG system. " .
883                                              "Please replace these notes as soon as possible " .
884                                              "with useful text."
885                                             );
886                            # Compute the attribute's load file name and open it for output.
887                            my $fileName = "$FIG_Config::temp/$entityType.$key.load.tbl";
888                            my $fh = Open(undef, ">$fileName");
889                            # Store the file name and handle.
890                            $keyHash{$key} = {h => $fh, name => $fileName};
891                            # Count this key.
892                            $keyCount++;
893                        }
894                        # Smash the value and the URL together.
895                        if (defined($url) && length($url) > 0) {
896                            $value .= "::$url";
897                        }
898                        # Write the attribute value to the load file.
899                        Tracer::PutLine($keyHash{$key}->{h}, [$id, $value]);
900                        $valueCount++;
901                    }
902                }
903                # Now we've finished all the attributes for this object. Count and trace it.
904                $processedIDs++;
905                if ($processedIDs % 500 == 0) {
906                    Trace("$processedIDs of $totalIDs ${entityType}s processed.") if T(3);
907                    Trace("$entityType has $keyCount keys and $valueCount values so far.") if T(3);
908                }
909            }
910            # Now we've finished all the attributes for all objects of this type.
911            Trace("$processedIDs ${entityType}s processed, with $keyCount keys and $valueCount values.") if T(2);
912            # Loop through the files, loading the keys into the database.
913            Trace("Connecting to database.") if T(2);
914            my $objectCA = CustomAttributes->new();
915            Trace("Loading key files.") if T(2);
916            for my $key (sort keys %keyHash) {
917                # Close the key's load file.
918                close $keyHash{$key}->{h};
919                # Reopen it for input.
920                my $fileName = $keyHash{$key}->{name};
921                my $fh = Open(undef, "<$fileName");
922                Trace("Loading $key from $fileName.") if T(3);
923                my $stats = $objectCA->LoadAttributeKey($entityType, $key, $fh, 0, 1);
924                Trace("Statistics for $key of $entityType:\n" . $stats->Show()) if T(3);
925            }
926            # All the keys for this entity type are now loaded.
927            Trace("Key files loaded for $entityType.") if T(2);
928        }
929        # All keys for all entity types are now loaded.
930        Trace("Migration complete.") if T(2);
931    }
932    
933    =head2 FIG Method Replacements
934    
935    The following methods are used by B<FIG.pm> to replace the previous attribute functionality.
936    Some of the old functionality is no longer present. Controlled vocabulary is no longer
937    supported and there is no longer any searching by URL. Fortunately, neither of these
938    capabilities were used in the old system.
939    
940    In the previous implementation, an attribute had a value and a URL. In the new implementation,
941    there is only a value. In this implementation, each attribute has only a value. These
942    methods will treat the value as a list with the individual elements separated by the
943    value of the splitter parameter on the constructor (L</new>). The default is double
944    colons C<::>.
945    
946    So, for example, an old-style keyword with a /value of C<essential> and a URL of
947    C<http://www.sciencemag.org/cgi/content/abstract/293/5538/2266> using the default
948    splitter value would be stored as
949    
950        essential::http://www.sciencemag.org/cgi/content/abstract/293/5538/2266
951    
952    The best performance is achieved by searching for a particular key for a specified
953    feature or genome.
954    
955    =head3 GetAttributes
956    
957    C<< my @attributeList = GetAttributes($objectID, $key, @valuePatterns); >>
958    
959    or
960    
961    C<< my @attributeList = $attrDB->GetAttributes($objectID, $key, @valuePatterns); >>
962    
963    The first form will connect to the database and release it. The second form
964    uses the database connection contained in the object.
965    
966    In the database, attribute values are sectioned into pieces using a splitter
967    value specified in the constructor (L</new>). This is not a requirement of
968    the attribute system as a whole, merely a convenience for the purpose of
969    these methods. If you are using the static method calls instead of the
970    object-based calls, the splitter will always be the default value of
971    double colons (C<::>). If a value has multiple sections, each section
972    is matched against the correspond criterion in the I<@valuePatterns> list.
973    
974    This method returns a series of tuples that match the specified criteria. Each tuple
975    will contain an object ID, a key, and one or more values. The parameters to this
976    method therefore correspond structurally to the values expected in each tuple.
977    
978        my @attributeList = GetAttributes('fig|100226.1.peg.1004', 'structure%', 1, 2);
979    
980    would return something like
981    
982        ['fig}100226.1.peg.1004', 'structure', 1, 2]
983        ['fig}100226.1.peg.1004', 'structure1', 1, 2]
984        ['fig}100226.1.peg.1004', 'structure2', 1, 2]
985        ['fig}100226.1.peg.1004', 'structureA', 1, 2]
986    
987    Use of C<undef> in any position acts as a wild card (all values). In addition,
988    the I<$key> and I<@valuePatterns> parameters can contain SQL pattern characters: C<%>, which
989    matches any sequence of characters, and C<_>, which matches any single character.
990    (You can use an escape sequence C<\%> or C<\_> to match an actual percent sign or
991    underscore.)
992    
993    In addition to values in multiple sections, a single attribute key can have multiple
994    values, so even
995    
996        my @attributeList = GetAttributes($peg, 'virulent');
997    
998    which has no wildcard in the key or the object ID, may return multiple tuples.
999    
1000    For reasons of backward compatability, we examine the structure of the object ID to
1001    determine the entity type. In that case the only two types allowed are C<Genome> and
1002    C<Feature>. An alternative method is to use a list reference, with the list consisting
1003    of an entity type name and the actual ID. Thus, the above example could equivalently
1004    be written as
1005    
1006        my @attributeList = GetAttributes([Feature => $peg], 'virulent');
1007    
1008    The list-reference approach allows us to add attributes to other entity types in
1009    the future. Doing so, however, will require modifying the L</Refresh> method and
1010    updated the database design XML.
1011    
1012    The list-reference approach also allows for a more fault-tolerant approach to
1013    getting all objects with a particular attribute.
1014    
1015        my @attributeList = GetAttributes([Feature => undef], 'virulent');
1016    
1017    will only return feature attributes, while
1018    
1019        my @attributeList = GetAttributes(undef, 'virulent');
1020    
1021    could at some point in the future get you attributes for genomes or even subsystems
1022    as well as features.
1023    
1024    =over 4
1025    
1026    =item objectID
1027    
1028    ID of the genome or feature whose attributes are desired. In general, an ID that
1029    starts with C<fig|> is treated as a feature ID, and an ID that is all digits with a
1030    single period is treated as a genome ID. For other entity types, use a list reference; in
1031    this case the first list element is the entity type and the second is the ID. A value of
1032    C<undef> here will match all objects.
1033    
1034    =item key
1035    
1036    Attribute key name. Since attributes are stored as fields in the database with a
1037    field name equal to the key name, it is very fast to find a list of all the
1038    matching keys. Each key's values require a separate query, however, which may
1039    be a performance problem if the pattern matches a lot of keys. Wild cards are
1040    acceptable here, and a value of C<undef> will match all attribute keys.
1041    
1042    =item valuePatterns
1043    
1044    List of the desired attribute values, section by section. If C<undef>
1045    is specified, all values in that section will match.
1046    
1047    =item RETURN
1048    
1049    Returns a list of tuples. The first element in the tuple is an object ID, the
1050    second is an attribute key, and the remaining elements are the sections of
1051    the attribute value. All of the tuples will match the criteria set forth in
1052    the parameter list.
1053    
1054    =back
1055    
1056    =cut
1057    
1058    sub GetAttributes {
1059        # Connect to the database. The tricky part is knowing whether or not we
1060        # are an instance method (in which case the first parameter is a
1061        # CustomAttributes object) or a static method (in which case we must
1062        # connect manually.
1063        my $self = (UNIVERSAL::isa($_[0],__PACKAGE__) ? shift @_ : CustomAttributes->new());
1064        # Get the remaining parameters.
1065        my ($objectID, $key, @valuePatterns) = @_;
1066        # Declare the return variable.
1067        my @retVal = ();
1068        # Determine the entity types for our search.
1069        my @objects = ();
1070        my ($actualObjectID, $computedType);
1071        if (! defined($objectID)) {
1072            push @objects, $self->GetEntityTypes();
1073        } else {
1074            ($computedType, $actualObjectID) = ComputeObjectTypeFromID($objectID);
1075            push @objects, $computedType;
1076        }
1077        # Loop through the entity types.
1078        for my $entityType (@objects) {
1079            # Now we need to find all the matching keys. The keys are actually stored in
1080            # our database object, so this process is fast. Note that our
1081            # MatchSqlPattern method
1082            my %secondaries = $self->GetSecondaryFields($entityType);
1083            my @fieldList = grep { MatchSqlPattern($_, $key) } keys %secondaries;
1084            # Now we figure out whether or not we need to filter by object.
1085            my $filter = "";
1086            my @params = ();
1087            if (defined($actualObjectID)) {
1088                # Here the caller wants to filter on object ID.
1089                $filter = "$entityType(id) = ?";
1090                push @params, $actualObjectID;
1091            }
1092            # It's time to begin making queries. We process one attribute key at a time, because
1093            # each attribute is actually a different field in the database. We know here that
1094            # all the keys we've collected are for the correct entity because we got them from
1095            # the DBD. That's a good thing, because an invalid key name will cause an SQL error.
1096            for my $key (@fieldList) {
1097                # Get all of the attribute values for this key.
1098                my @dataRows = $self->GetAll([$entityType], $filter, \@params,
1099                                             ["$entityType(id)", "$entityType($key)"]);
1100                # Process each value separately. We need to verify the values and reformat the
1101                # tuples. Note that GetAll will give us one row per matching object ID,
1102                # with the ID first followed by a list of the data values. This is very
1103                # different from the structure we'll be returning, which has one row
1104                # per value.
1105                for my $dataRow (@dataRows) {
1106                    # Get the object ID and the list of values.
1107                    my ($rowObjectID, @dataValues) = @{$dataRow};
1108                    # Loop through the values. There will be one result row per attribute value.
1109                    for my $dataValue (@dataValues) {
1110                        # Separate this value into sections.
1111                        my @sections = split("::", $dataValue);
1112                        # Loop through the value patterns, looking for a mismatch. Note that
1113                        # since we're working through parallel arrays, we are using an index
1114                        # loop. As soon as a match fails we stop checking. This means that
1115                        # if the value pattern list is longer than the number of sections,
1116                        # we will fail as soon as we run out of sections.
1117                        my $match = 1;
1118                        for (my $i = 0; $i <= $#valuePatterns && $match; $i++) {
1119                            $match = MatchSqlPattern($sections[$i], $valuePatterns[$i]);
1120                        }
1121                        # If we match, we save this value in the output list.
1122                        if ($match) {
1123                            push @retVal, [$rowObjectID, $key, @sections];
1124                        }
1125                    }
1126                    # Here we've processed all the attribute values for the current object ID.
1127                }
1128                # Here we've processed all the rows returned by GetAll. In general, there will
1129                # be one row per object ID.
1130            }
1131            # Here we've processed all the matching attribute keys.
1132        }
1133        # Here we've processed all the entity types. That means @retVal has all the matching
1134        # results.
1135        return @retVal;
1136    }
1137    
1138    =head3 AddAttribute
1139    
1140    C<< $attrDB->AddAttribute($objectID, $key, @values); >>
1141    
1142    or
1143    
1144    C<< AddAttribute($objectID, $key, @values); >>
1145    
1146    Add an attribute key/value pair to an object. This method cannot add a new key, merely
1147    add a value to an existing key. Use L</StoreAttributeKey> to create a new key.
1148    
1149    The first form will connect to the database and release it. The second form
1150    uses the database connection contained in the object.
1151    
1152    =over 4
1153    
1154    =item objectID
1155    
1156    ID of the genome or feature to which the attribute is to be added. In general, an ID that
1157    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1158    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1159    to a 2-tuple consisting of the entity type name followed by the object ID.
1160    
1161    =item key
1162    
1163    Attribute key name. This corresponds to the name of a field in the database.
1164    
1165    =item values
1166    
1167    One or more values to be associated with the key. The values are joined together with
1168    the splitter value before being stored as field values. This enables L</GetAttributes>
1169    to split them apart during retrieval. The splitter value defaults to double colons C<::>.
1170    
1171    =back
1172    
1173    =cut
1174    
1175    sub AddAttribute {
1176        # Connect to the database. The tricky part is knowing whether or not we
1177        # are an instance method (in which case the first parameter is a
1178        # CustomAttributes object) or a static method (in which case we must
1179        # connect manually.
1180        my $self = (UNIVERSAL::isa($_[0],__PACKAGE__) ? shift @_ : CustomAttributes->new());
1181        # Get the parameters.
1182        my ($objectID, $key, @values) = @_;
1183        # Don't allow undefs.
1184        if (! defined($objectID)) {
1185            Confess("No object ID specified for AddAttribute call.");
1186        } elsif (! defined($key)) {
1187            Confess("No attribute key specified for AddAttribute call.");
1188        } elsif (! @values) {
1189            Confess("No values specified in AddAttribute call for key $key.");
1190        } else {
1191            # Okay, now we have some reason to believe we can do this. Start by
1192            # computing the object type and ID.
1193            my ($entityName, $id) = ComputeObjectTypeFromID($objectID);
1194            # Form the values into a scalar.
1195            my $valueString = join($self->{splitter}, @values);
1196            # Insert the value.
1197            $self->InsertValue($id, "$entityName($key)", $valueString);
1198        }
1199        # Return a one. We do this for backward compatability.
1200        return 1;
1201    }
1202    
1203    =head3 DeleteAttribute
1204    
1205    C<< $attrDB->DeleteAttribute($objectID, $key, @values); >>
1206    
1207    or
1208    
1209    C<< DeleteAttribute($objectID, $key, @values); >>
1210    
1211    Delete the specified attribute key/value combination from the database.
1212    
1213    The first form will connect to the database and release it. The second form
1214    uses the database connection contained in the object.
1215    
1216    =over 4
1217    
1218    =item objectID
1219    
1220    ID of the genome or feature to which the attribute is to be added. In general, an ID that
1221    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1222    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1223    to a 2-tuple consisting of the entity type name followed by the object ID.
1224    
1225    =item key
1226    
1227    Attribute key name. This corresponds to the name of a field in the database.
1228    
1229    =item values
1230    
1231    One or more values to be associated with the key.
1232    
1233    =back
1234    
1235    =cut
1236    
1237    sub DeleteAttribute {
1238        # Connect to the database. The tricky part is knowing whether or not we
1239        # are an instance method (in which case the first parameter is a
1240        # CustomAttributes object) or a static method (in which case we must
1241        # connect manually.
1242        my $self = (UNIVERSAL::isa($_[0],__PACKAGE__) ? shift @_ : CustomAttributes->new());
1243        # Get the parameters.
1244        my ($objectID, $key, @values) = @_;
1245        # Don't allow undefs.
1246        if (! defined($objectID)) {
1247            Confess("No object ID specified for DeleteAttribute call.");
1248        } elsif (! defined($key)) {
1249            Confess("No attribute key specified for DeleteAttribute call.");
1250        } elsif (! @values) {
1251            Confess("No values specified in DeleteAttribute call for key $key.");
1252        } else {
1253            # Now compute the object type and ID.
1254            my ($entityName, $id) = ComputeObjectTypeFromID($objectID);
1255            # Form the values into a scalar.
1256            my $valueString = join($self->{splitter}, @values);
1257            # Delete the value.
1258            $self->DeleteValue($entityName, $id, $key, $valueString);
1259        }
1260        # Return a one. This is for backward compatability.
1261        return 1;
1262    }
1263    
1264    =head3 ComputeObjectTypeFromID
1265    
1266    C<< my ($entityName, $id) = CustomAttributes::ComputeObjectTypeFromID($objectID); >>
1267    
1268    This method will compute the entity type corresponding to a specified object ID.
1269    If the object ID begins with C<fig|>, it is presumed to be a feature ID. If it
1270    is all digits with a single period, it is presumed to by a genome ID. Otherwise,
1271    it must be a list reference. In this last case the first list element will be
1272    taken as the entity type and the second will be taken as the actual ID.
1273    
1274    =over 4
1275    
1276    =item objectID
1277    
1278    Object ID to examine.
1279    
1280    =item RETURN
1281    
1282    Returns a 2-element list consisting of the entity type followed by the specified ID.
1283    
1284    =back
1285    
1286    =cut
1287    
1288    sub ComputeObjectTypeFromID {
1289        # Get the parameters.
1290        my ($objectID) = @_;
1291        # Declare the return variables.
1292        my ($entityName, $id);
1293        # Only proceed if the object ID is defined. If it's not, we'll be returning a
1294        # pair of undefs.
1295        if (defined($objectID)) {
1296            if (ref $objectID eq 'ARRAY') {
1297                # Here we have the new-style list reference. Pull out its pieces.
1298                ($entityName, $id) = @{$objectID};
1299            } else {
1300                # Here the ID is the outgoing ID, and we need to look at its structure
1301                # to determine the entity type.
1302                $id = $objectID;
1303                if ($objectID =~ /^\d+\.\d+/) {
1304                    # Digits with a single period is a genome.
1305                    $entityName = 'Genome';
1306                } elsif ($objectID =~ /^fig\|/) {
1307                    # The "fig|" prefix indicates a feature.
1308                    $entityName = 'Feature';
1309                } else {
1310                    # Anything else is illegal!
1311                    Confess("Invalid attribute ID specification \"$objectID\".");
1312                }
1313            }
1314        }
1315        # Return the result.
1316        return ($entityName, $id);
1317    }
1318    
1319    =head3 ChangeAttribute
1320    
1321    C<< $attrDB->ChangeAttribute($objectID, $key, \@oldValues, \@newValues); >>
1322    
1323    or
1324    
1325    C<< ChangeAttribute($objectID, $key, \@oldValues, \@newValues); >>
1326    
1327    Change the value of an attribute key/value pair for an object.
1328    
1329    The first form will connect to the database and release it. The second form
1330    uses the database connection contained in the object.
1331    
1332    =over 4
1333    
1334    =item objectID
1335    
1336    ID of the genome or feature to which the attribute is to be changed. In general, an ID that
1337    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1338    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1339    to a 2-tuple consisting of the entity type name followed by the object ID.
1340    
1341    =item key
1342    
1343    Attribute key name. This corresponds to the name of a field in the database.
1344    
1345    =item oldValues
1346    
1347    One or more values identifying the key/value pair to change.
1348    
1349    =item newValues
1350    
1351    One or more values to be put in place of the old values.
1352    
1353    =back
1354    
1355    =cut
1356    
1357    sub ChangeAttribute {
1358        # Connect to the database. The tricky part is knowing whether or not we
1359        # are an instance method (in which case the first parameter is a
1360        # CustomAttributes object) or a static method (in which case we must
1361        # connect manually.
1362        my $self = (UNIVERSAL::isa($_[0],__PACKAGE__) ? shift @_ : CustomAttributes->new());
1363        # Get the parameters.
1364        my ($objectID, $key, $oldValues, $newValues) = @_;
1365        # Don't allow undefs.
1366        if (! defined($objectID)) {
1367            Confess("No object ID specified for ChangeAttribute call.");
1368        } elsif (! defined($key)) {
1369            Confess("No attribute key specified for ChangeAttribute call.");
1370        } elsif (! defined($oldValues) || ref $oldValues ne 'ARRAY') {
1371            Confess("No old values specified in ChangeAttribute call for key $key.");
1372        } elsif (! defined($newValues) || ref $newValues ne 'ARRAY') {
1373            Confess("No new values specified in ChangeAttribute call for key $key.");
1374        } else {
1375            # Okay, now we do the change as a delete/add.
1376            $self->DeleteAttribute($objectID, $key, @{$oldValues});
1377            $self->AddAttribute($objectID, $key, @{$newValues});
1378        }
1379        # Return a one. We do this for backward compatability.
1380        return 1;
1381    }
1382    
1383  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3