[Bio] / Sprout / CustomAttributes.pm Repository:
ViewVC logotype

Diff of /Sprout/CustomAttributes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Fri Nov 3 16:50:04 2006 UTC revision 1.7, Wed Nov 15 12:04:05 2006 UTC
# Line 4  Line 4 
4    
5      require Exporter;      require Exporter;
6      use ERDB;      use ERDB;
7      @ISA = qw(Exporter ERDB);      @ISA = qw(ERDB);
8      use strict;      use strict;
9      use Tracer;      use Tracer;
     use FIG;  
10      use ERDBLoad;      use ERDBLoad;
11    
12  =head1 Custom SEED Attribute Manager  =head1 Custom SEED Attribute Manager
# Line 21  Line 20 
20    
21  The full suite of ERDB retrieval capabilities is provided. In addition,  The full suite of ERDB retrieval capabilities is provided. In addition,
22  custom methods are provided specific to this application. To get all  custom methods are provided specific to this application. To get all
23  the values of the attribute C<essential> in the B<Feature> entity, you  the values of the attribute C<essential> in a specified B<Feature>, you
24  would code  would code
25    
26      my @values = $attrDB->GetAttributes($fid, Feature => 'essential');      my @values = $attrDB->GetAttributes([Feature => $fid], 'essential');
27    
28  where I<$fid> contains the ID of the desired feature. Each attribute has  where I<$fid> contains the ID of the desired feature. Each attribute has
29  an alternate index to allow searching for attributes by value.  an alternate index to allow searching for attributes by value.
# Line 77  Line 76 
76    
77  =back  =back
78    
79  =head2 Impliementation Note  The DBD file is critical, and must have reasonable contents before we can
80    begin using the system. In the old system, attributes were only provided
81    for Genomes and Features, so the initial XML file was the following.
82    
83        <Database>
84          <Title>SEED Custom Attribute Database</Title>
85          <Entities>
86            <Entity name="Feature" keyType="id-string">
87              <Notes>A [i]feature[/i] is a part of the genome
88              that is of special interest. Features may be spread
89              across multiple contigs of a genome, but never across
90              more than one genome. Features can be assigned to roles
91              via spreadsheet cells, and are the targets of
92              annotation.</Notes>
93            </Entity>
94            <Entity name="Genome" keyType="name-string">
95              <Notes>A [i]genome[/i] describes a particular individual
96              organism's DNA.</Notes>
97            </Entity>
98          </Entities>
99        </Database>
100    
101    It is not necessary to put any tables into the database; however, you should
102    run
103    
104        AttrDBRefresh
105    
106    periodically to insure it has the correct Genomes and Features in it. When
107    converting from the old system, use
108    
109        AttrDBRefresh -migrate
110    
111    to initialize the database and migrate the legacy data. You should only need
112    to do that once.
113    
114    =head2 Implementation Note
115    
116  The L</Refresh> method reloads the entities in the database. If new  The L</Refresh> method reloads the entities in the database. If new
117  entity types are added, that method will need to be adjusted accordingly.  entity types are added, that method will need to be adjusted accordingly.
# Line 86  Line 120 
120    
121  =head3 new  =head3 new
122    
123  C<< my $attrDB = CustomAttributes->new(); >>  C<< my $attrDB = CustomAttributes->new($splitter); >>
124    
125    Construct a new CustomAttributes object. This object cannot be used to add or
126    delete keys because that requires modifying the database design. To do that,
127    you need to use the static L</StoreAttributeKey> or L</DeleteAttributeKey>
128    methods.
129    
130    =over 4
131    
132    =item splitter
133    
134    Value to be used to split attribute values into sections in the
135    L</Fig Replacement Methods>. The default is a double colon C<::>.
136    If you do not use the replacement methods, you do not need to
137    worry about this parameter.
138    
139  Construct a new CustomAttributes object. This object is only used to load  =back
 or access data. To add new attributes, use the static L</NewAttribute>  
 method.  
140    
141  =cut  =cut
142    
143  sub new {  sub new {
144      # Get the parameters.      # Get the parameters.
145      my ($class) = @_;      my ($class, $splitter) = @_;
146      # Connect to the database.      # Connect to the database.
147      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,
148                              $FIG_Config::attrUser, $FIG_Config::attrPass,                              $FIG_Config::attrUser, $FIG_Config::attrPass,
# Line 105  Line 151 
151      # Create the ERDB object.      # Create the ERDB object.
152      my $xmlFileName = $FIG_Config::attrDBD;      my $xmlFileName = $FIG_Config::attrDBD;
153      my $retVal = ERDB::new($class, $dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
154        # Store the splitter value.
155        $retVal->{splitter} = (defined($splitter) ? $splitter : '::');
156      # Return the result.      # Return the result.
157      return $retVal;      return $retVal;
158  }  }
159    
160  =head3 GetAttributes  =head3 StoreAttributeKey
   
 C<< my @values = $attrDB->GetAttributes($id, $entityName => $attributeName); >>  
   
 Return all the values of the specified attribute for the specified entity instance.  
 A list of vaues will be returned. If the entity instance does not exist or the  
 attribute has no values, an empty list will be returned. If the attribute name  
 does not exist, an SQL error will occur.  
   
 A typical invocation would look like this:  
   
     my @values = $sttrDB->GetAttributes($fid, Feature => 'essential');  
   
 Here the user is asking for the values of the C<essential> attribute for the  
 B<Feature> with the specified ID. If the identified feature is not essential,  
 the list returned will be empty. If it is essential, then one or more values  
 will be returned that describe the essentiality.  
   
 =over 4  
   
 =item id  
   
 ID of the desired entity instance. This identifies the specific object to  
 be interrogated for attribute values.  
   
 =item entityName  
   
 Name of the entity. This identifies the the type of the object to be  
 interrogated for attribute values.  
   
 =item attributeName  
   
 Name of the desired attribute.  
   
 =item RETURN  
   
 Returns zero or more strings, each representing a value of the named attribute  
 for the specified entity instance.  
   
 =back  
   
 =cut  
   
 sub GetAttributes {  
     # Get the parameters.  
     my ($self, $id, $entityName, $attributeName) = @_;  
     # Get the data.  
     my @retVal = $self->GetEntityValues($entityName, $id, ["$entityName($attributeName)"]);  
     # Return the result.  
     return @retVal;  
 }  
   
 =head3 StoreAttribute  
161    
162  C<< my $attrDB = CustomAttributes::StoreAttribute($entityName, $attributeName, $type, $notes); >>  C<< my $attrDB = CustomAttributes::StoreAttributeKey($entityName, $attributeName, $type, $notes); >>
163    
164  Create or update an attribute for the database. This method will update the database definition  Create or update an attribute for the database. This method will update the database definition
165  XML, but it will not create the table. It will connect to the database so that the caller  XML, but it will not create the table. It will connect to the database so that the caller
# Line 198  Line 194 
194    
195  =cut  =cut
196    
197  sub StoreAttribute {  sub StoreAttributeKey {
198      # Get the parameters.      # Get the parameters.
199      my ($entityName, $attributeName, $type, $notes) = @_;      my ($entityName, $attributeName, $type, $notes) = @_;
200      # Get the data type hash.      # Get the data type hash.
# Line 222  Line 218 
218          my $entityData = $entityHash->{$entityName};          my $entityData = $entityHash->{$entityName};
219          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);
220          # Compute the attribute's relation name.          # Compute the attribute's relation name.
221          my $relName = join("", $entityName, map { ucfirst $_ } split(/-/, $attributeName));          my $relName = join("", $entityName, map { ucfirst $_ } split(/-|_/, $attributeName));
222          # Store the attribute's field data. Note the use of the "content" hash for          # Store the attribute's field data. Note the use of the "content" hash for
223          # the notes. This is how the XML writer knows Notes is a text tag instead of          # the notes. This is how the XML writer knows Notes is a text tag instead of
224          # an attribute.          # an attribute.
# Line 244  Line 240 
240    
241  =head3 Refresh  =head3 Refresh
242    
243  C<< $attrDB->Refresh(); >>  C<< $attrDB->Refresh($fig); >>
244    
245  Refresh the primary entity tables from the FIG data store. This method basically  Refresh the primary entity tables from the FIG data store. This method basically
246  drops and reloads the main tables of the custom attributes database.  drops and reloads the main tables of the custom attributes database.
247    
248    =over 4
249    
250    =item fig
251    
252    FIG-like object that can be used to find genomes and features.
253    
254    =back
255    
256  =cut  =cut
257    
258  sub Refresh {  sub Refresh {
259      # Get the parameters.      # Get the parameters.
260      my ($self) = @_;      my ($self, $fig) = @_;
261      # Create load objects for the genomes and the features.      # Create load objects for the genomes and the features.
262      my $loadGenome = ERDBLoad->new($self, 'Genome', $FIG_Config::temp);      my $loadGenome = ERDBLoad->new($self, 'Genome', $FIG_Config::temp);
263      my $loadFeature = ERDBLoad->new($self, 'Feature', $FIG_Config::temp);      my $loadFeature = ERDBLoad->new($self, 'Feature', $FIG_Config::temp);
     # Get a FIG object. We'll use this to create the data.  
     my $fig = FIG->new();  
264      # Get the genome list.      # Get the genome list.
265      my @genomes = $fig->genomes();      my @genomes = $fig->genomes();
266      # Loop through the genomes.      # Loop through the genomes.
# Line 285  Line 287 
287      Trace("Feature table load statistics:\n" . $stats->Show()) if T(3);      Trace("Feature table load statistics:\n" . $stats->Show()) if T(3);
288  }  }
289    
290  =head3 LoadAttribute  =head3 LoadAttributeKey
291    
292  C<< my $stats = $attrDB->LoadAttribute($entityName, $fieldName, $fh, $keyCol, $dataCol); >>  C<< my $stats = $attrDB->LoadAttributeKey($entityName, $fieldName, $fh, $keyCol, $dataCol); >>
293    
294  Load the specified attribute from the specified file. The file should be a  Load the specified attribute from the specified file. The file should be a
295  tab-delimited file with internal tab and new-line characters escaped. This is  tab-delimited file with internal tab and new-line characters escaped. This is
# Line 326  Line 328 
328    
329  =cut  =cut
330    
331  sub LoadAttribute {  sub LoadAttributeKey {
332      # Get the parameters.      # Get the parameters.
333      my ($self, $entityName, $fieldName, $fh, $keyCol, $dataCol) = @_;      my ($self, $entityName, $fieldName, $fh, $keyCol, $dataCol) = @_;
334      # Create the return variable.      # Create the return variable.
# Line 340  Line 342 
342          my $fieldHash = $self->GetFieldTable($entityName);          my $fieldHash = $self->GetFieldTable($entityName);
343          # Verify that the attribute exists.          # Verify that the attribute exists.
344          if (! exists $fieldHash->{$fieldName}) {          if (! exists $fieldHash->{$fieldName}) {
345              Confess("Attribute \"$fieldName\" does not exist in entity $entityName.");              Confess("Attribute key \"$fieldName\" does not exist in entity $entityName.");
346          } else {          } else {
347              # Create a loader for the specified attribute. We need the              # Create a loader for the specified attribute. We need the
348              # relation name first.              # relation name first.
# Line 370  Line 372 
372      return $retVal;      return $retVal;
373  }  }
374    
 =head3 DeleteAttribute  
375    
376  C<< CustomAttributes::DeleteAttribute($entityName, $attributeName); >>  =head3 DeleteAttributeKey
377    
378    C<< CustomAttributes::DeleteAttributeKey($entityName, $attributeName); >>
379    
380  Delete an attribute from the custom attributes database.  Delete an attribute from the custom attributes database.
381    
# Line 390  Line 393 
393    
394  =cut  =cut
395    
396  sub DeleteAttribute {  sub DeleteAttributeKey {
397      # Get the parameters.      # Get the parameters.
398      my ($entityName, $attributeName) = @_;      my ($entityName, $attributeName) = @_;
399      # Read in the XML for the database defintion. We need to verify that      # Read in the XML for the database defintion. We need to verify that
# Line 403  Line 406 
406          # Get the field hash.          # Get the field hash.
407          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);          my $fieldHash = ERDB::GetEntityFieldHash($metadata, $entityName);
408          if (! exists $fieldHash->{$attributeName}) {          if (! exists $fieldHash->{$attributeName}) {
409              Confess("Attribute \"$attributeName\" not found in entity $entityName.");              Confess("Attribute key \"$attributeName\" not found in entity $entityName.");
410          } else {          } else {
411              # Get the attribute's relation name.              # Get the attribute's relation name.
412              my $relName = $fieldHash->{$attributeName}->{relation};              my $relName = $fieldHash->{$attributeName}->{relation};
# Line 503  Line 506 
506                                                      -default => 1)                                                      -default => 1)
507                                     ),                                     ),
508                            );                            );
509      # Now the two buttons: UPDATE and DELETE.      # Now the three buttons: UPDATE, SHOW, and DELETE.
510      push @retVal, $cgi->Tr($cgi->th("&nbsp;"),      push @retVal, $cgi->Tr($cgi->th("&nbsp;"),
511                             $cgi->td({align => 'center'},                             $cgi->td({align => 'center'},
512                                      $cgi->submit(-name => 'Delete', -value => 'DELETE') . " " .                                      $cgi->submit(-name => 'Delete', -value => 'DELETE') . " " .
513                                      $cgi->submit(-name => 'Store',  -value => 'STORE')                                      $cgi->submit(-name => 'Store',  -value => 'STORE') . " " .
514                                        $cgi->submit(-name => 'Show',   -value => 'SHOW')
515                                     )                                     )
516                            );                            );
517      # Close the table and the form.      # Close the table and the form.
# Line 690  Line 694 
694      return $retVal;      return $retVal;
695  }  }
696    
697    =head3 MatchSqlPattern
698    
699    C<< my $matched = CustomAttributes::MatchSqlPattern($value, $pattern); >>
700    
701    Determine whether or not a specified value matches an SQL pattern. An SQL
702    pattern has two wild card characters: C<%> that matches multiple characters,
703    and C<_> that matches a single character. These can be escaped using a
704    backslash (C<\>). We pull this off by converting the SQL pattern to a
705    PERL regular expression. As per SQL rules, the match is case-insensitive.
706    
707    =over 4
708    
709    =item value
710    
711    Value to be matched against the pattern. Note that an undefined or empty
712    value will not match anything.
713    
714    =item pattern
715    
716    SQL pattern against which to match the value. An undefined or empty pattern will
717    match everything.
718    
719    =item RETURN
720    
721    Returns TRUE if the value and pattern match, else FALSE.
722    
723    =back
724    
725    =cut
726    
727    sub MatchSqlPattern {
728        # Get the parameters.
729        my ($value, $pattern) = @_;
730        # Declare the return variable.
731        my $retVal;
732        # Insure we have a pattern.
733        if (! defined($pattern) || $pattern eq "") {
734            $retVal = 1;
735        } else {
736            # Break the pattern into pieces around the wildcard characters. Because we
737            # use parentheses in the split function's delimiter expression, we'll get
738            # list elements for the delimiters as well as the rest of the string.
739            my @pieces = split /([_%]|\\[_%])/, $pattern;
740            # Check some fast special cases.
741            if ($pattern eq '%') {
742                # A null pattern matches everything.
743                $retVal = 1;
744            } elsif (@pieces == 1) {
745                # No wildcards, so we have a literal comparison. Note we're case-insensitive.
746                $retVal = (lc($value) eq lc($pattern));
747            } elsif (@pieces == 2 && $pieces[1] eq '%') {
748                # A wildcard at the end, so we have a substring match. This is also case-insensitive.
749                $retVal = (lc(substr($value, 0, length($pieces[0]))) eq lc($pieces[0]));
750            } else {
751                # Okay, we have to do it the hard way. Convert each piece to a PERL pattern.
752                my $realPattern = "";
753                for my $piece (@pieces) {
754                    # Determine the type of piece.
755                    if ($piece eq "") {
756                        # Empty pieces are ignored.
757                    } elsif ($piece eq "%") {
758                        # Here we have a multi-character wildcard. Note that it can match
759                        # zero or more characters.
760                        $realPattern .= ".*"
761                    } elsif ($piece eq "_") {
762                        # Here we have a single-character wildcard.
763                        $realPattern .= ".";
764                    } elsif ($piece eq "\\%" || $piece eq "\\_") {
765                        # This is an escape sequence (which is a rare thing, actually).
766                        $realPattern .= substr($piece, 1, 1);
767                    } else {
768                        # Here we have raw text.
769                        $realPattern .= quotemeta($piece);
770                    }
771                }
772                # Do the match.
773                $retVal = ($value =~ /^$realPattern$/i ? 1 : 0);
774            }
775        }
776        # Return the result.
777        return $retVal;
778    }
779    
780    =head3 MigrateAttributes
781    
782    C<< CustomAttributes::MigrateAttributes($fig); >>
783    
784    Migrate all the attributes data from the specified FIG instance. This is a long, slow
785    method used to convert the old attribute data to the new system. Only attribute
786    keys that are not already in the database will be loaded, and only for entity instances
787    current in the database. To get an accurate capture of the attributes in the given
788    instance, you may want to clear the database and the DBD before starting and
789    run L</Refresh> to populate the entities.
790    
791    =over 4
792    
793    =item fig
794    
795    A FIG object that can be used to retrieve attributes for migration purposes.
796    
797    =back
798    
799    =cut
800    
801    sub MigrateAttributes {
802        # Get the parameters.
803        my ($fig) = @_;
804        # Get a list of the objects to migrate. This requires connecting. Note we
805        # will map each entity type to a file name. The file will contain a list
806        # of the object's IDs so we can get to them when we're not connected to
807        # the database.
808        my $ca = CustomAttributes->new();
809        my %objects = map { $_ => "$FIG_Config::temp/$_.keys.tbl" } $ca->GetEntityTypes();
810        # Set up hash of the existing attribute keys for each entity type.
811        my %oldKeys = ();
812        # Finally, we have a hash that counts the IDs for each entity type.
813        my %idCounts = map { $_ => 0 } keys %objects;
814        # Loop through the list, creating key files to read back in.
815        for my $entityType (keys %objects) {
816            Trace("Retrieving keys for $entityType.") if T(2);
817            # Create the key file.
818            my $idFile = Open(undef, ">$objects{$entityType}");
819            # Loop through the keys.
820            my @ids = $ca->GetFlat([$entityType], "", [], "$entityType(id)");
821            for my $id (@ids) {
822                print $idFile "$id\n";
823            }
824            close $idFile;
825            # In addition to the key file, we must get a list of attributes already
826            # in the database. This avoids a circularity problem that might occur if the $fig
827            # object is retrieving from the custom attributes database already.
828            my %fields = $ca->GetSecondaryFields($entityType);
829            $oldKeys{$entityType} = \%fields;
830            # Finally, we have the ID count.
831            $idCounts{$entityType} = scalar @ids;
832        }
833        # Release the custom attributes database so we can add attributes.
834        undef $ca;
835        # Loop through the objects.
836        for my $entityType (keys %objects) {
837            # Get a hash of all the attributes already in this database. These are
838            # left untouched.
839            my $myOldKeys = $oldKeys{$entityType};
840            # Create a hash to control the load file names for each attribute key we find.
841            my %keyHash = ();
842            # Set up some counters so we can trace our progress.
843            my ($totalIDs, $processedIDs, $keyCount, $valueCount) = ($idCounts{$entityType}, 0, 0, 0);
844            # Open this object's ID file.
845            Trace("Migrating data for $entityType. $totalIDs found.") if T(3);
846            my $keysIn = Open(undef, "<$objects{$entityType}");
847            while (my $id = <$keysIn>) {
848                # Remove the EOL characters.
849                chomp $id;
850                # Get this object's attributes.
851                my @allData = $fig->get_attributes($id);
852                Trace(scalar(@allData) . " attribute values found for $entityType($id).") if T(4);
853                # Loop through the attribute values one at a time.
854                for my $dataTuple (@allData) {
855                    # Get the key, value, and URL. We ignore the first element because that's the
856                    # object ID, and we already know the object ID.
857                    my (undef, $key, $value, $url) = @{$dataTuple};
858                    # Remove the buggy "1" for $url.
859                    if ($url eq "1") {
860                        $url = undef;
861                    }
862                    # Only proceed if this is not an old key.
863                    if (! $myOldKeys->{$key}) {
864                        # See if we've run into this key before.
865                        if (! exists $keyHash{$key}) {
866                            # Here we need to create the attribute key in the database.
867                            StoreAttributeKey($entityType, $key, 'text',
868                                              "Key migrated automatically from the FIG system. " .
869                                              "Please replace these notes as soon as possible " .
870                                              "with useful text."
871                                             );
872                            # Compute the attribute's load file name and open it for output.
873                            my $fileName = "$FIG_Config::temp/$entityType.$key.load.tbl";
874                            my $fh = Open(undef, ">$fileName");
875                            # Store the file name and handle.
876                            $keyHash{$key} = {h => $fh, name => $fileName};
877                            # Count this key.
878                            $keyCount++;
879                        }
880                        # Smash the value and the URL together.
881                        if (defined($url) && length($url) > 0) {
882                            $value .= "::$url";
883                        }
884                        # Write the attribute value to the load file.
885                        Tracer::PutLine($keyHash{$key}->{h}, [$id, $value]);
886                        $valueCount++;
887                    }
888                }
889                # Now we've finished all the attributes for this object. Count and trace it.
890                $processedIDs++;
891                if ($processedIDs % 500 == 0) {
892                    Trace("$processedIDs of $totalIDs ${entityType}s processed.") if T(3);
893                    Trace("$entityType has $keyCount keys and $valueCount values so far.") if T(3);
894                }
895            }
896            # Now we've finished all the attributes for all objects of this type.
897            Trace("$processedIDs ${entityType}s processed, with $keyCount keys and $valueCount values.") if T(2);
898            # Loop through the files, loading the keys into the database.
899            Trace("Connecting to database.") if T(2);
900            my $objectCA = CustomAttributes->new();
901            Trace("Loading key files.") if T(2);
902            for my $key (sort keys %keyHash) {
903                # Close the key's load file.
904                close $keyHash{$key}->{h};
905                # Reopen it for input.
906                my $fileName = $keyHash{$key}->{name};
907                my $fh = Open(undef, "<$fileName");
908                Trace("Loading $key from $fileName.") if T(3);
909                my $stats = $objectCA->LoadAttributeKey($entityType, $key, $fh, 0, 1);
910                Trace("Statistics for $key of $entityType:\n" . $stats->Show()) if T(3);
911            }
912            # All the keys for this entity type are now loaded.
913            Trace("Key files loaded for $entityType.") if T(2);
914        }
915        # All keys for all entity types are now loaded.
916        Trace("Migration complete.") if T(2);
917    }
918    
919    =head3 ComputeObjectTypeFromID
920    
921    C<< my ($entityName, $id) = CustomAttributes::ComputeObjectTypeFromID($objectID); >>
922    
923    This method will compute the entity type corresponding to a specified object ID.
924    If the object ID begins with C<fig|>, it is presumed to be a feature ID. If it
925    is all digits with a single period, it is presumed to by a genome ID. Otherwise,
926    it must be a list reference. In this last case the first list element will be
927    taken as the entity type and the second will be taken as the actual ID.
928    
929    =over 4
930    
931    =item objectID
932    
933    Object ID to examine.
934    
935    =item RETURN
936    
937    Returns a 2-element list consisting of the entity type followed by the specified ID.
938    
939    =back
940    
941    =cut
942    
943    sub ComputeObjectTypeFromID {
944        # Get the parameters.
945        my ($objectID) = @_;
946        # Declare the return variables.
947        my ($entityName, $id);
948        # Only proceed if the object ID is defined. If it's not, we'll be returning a
949        # pair of undefs.
950        if ($objectID) {
951            if (ref $objectID eq 'ARRAY') {
952                # Here we have the new-style list reference. Pull out its pieces.
953                ($entityName, $id) = @{$objectID};
954            } else {
955                # Here the ID is the outgoing ID, and we need to look at its structure
956                # to determine the entity type.
957                $id = $objectID;
958                if ($objectID =~ /^\d+\.\d+/) {
959                    # Digits with a single period is a genome.
960                    $entityName = 'Genome';
961                } elsif ($objectID =~ /^fig\|/) {
962                    # The "fig|" prefix indicates a feature.
963                    $entityName = 'Feature';
964                } else {
965                    # Anything else is illegal!
966                    Confess("Invalid attribute ID specification \"$objectID\".");
967                }
968            }
969        }
970        # Return the result.
971        return ($entityName, $id);
972    }
973    
974    =head2 FIG Method Replacements
975    
976    The following methods are used by B<FIG.pm> to replace the previous attribute functionality.
977    Some of the old functionality is no longer present. Controlled vocabulary is no longer
978    supported and there is no longer any searching by URL. Fortunately, neither of these
979    capabilities were used in the old system.
980    
981    The methods here are the only ones supported by the B<RemoteCustomAttributes> object.
982    The idea is that these methods represent attribute manipulation allowed by all users, while
983    the others are only for privileged users with access to the attribute server.
984    
985    In the previous implementation, an attribute had a value and a URL. In the new implementation,
986    there is only a value. In this implementation, each attribute has only a value. These
987    methods will treat the value as a list with the individual elements separated by the
988    value of the splitter parameter on the constructor (L</new>). The default is double
989    colons C<::>.
990    
991    So, for example, an old-style keyword with a /value of C<essential> and a URL of
992    C<http://www.sciencemag.org/cgi/content/abstract/293/5538/2266> using the default
993    splitter value would be stored as
994    
995        essential::http://www.sciencemag.org/cgi/content/abstract/293/5538/2266
996    
997    The best performance is achieved by searching for a particular key for a specified
998    feature or genome.
999    
1000    =head3 GetAttributes
1001    
1002    C<< my @attributeList = $attrDB->GetAttributes($objectID, $key, @valuePatterns); >>
1003    
1004    In the database, attribute values are sectioned into pieces using a splitter
1005    value specified in the constructor (L</new>). This is not a requirement of
1006    the attribute system as a whole, merely a convenience for the purpose of
1007    these methods. If you are using the static method calls instead of the
1008    object-based calls, the splitter will always be the default value of
1009    double colons (C<::>). If a value has multiple sections, each section
1010    is matched against the correspond criterion in the I<@valuePatterns> list.
1011    
1012    This method returns a series of tuples that match the specified criteria. Each tuple
1013    will contain an object ID, a key, and one or more values. The parameters to this
1014    method therefore correspond structurally to the values expected in each tuple.
1015    
1016        my @attributeList = GetAttributes('fig|100226.1.peg.1004', 'structure%', 1, 2);
1017    
1018    would return something like
1019    
1020        ['fig}100226.1.peg.1004', 'structure', 1, 2]
1021        ['fig}100226.1.peg.1004', 'structure1', 1, 2]
1022        ['fig}100226.1.peg.1004', 'structure2', 1, 2]
1023        ['fig}100226.1.peg.1004', 'structureA', 1, 2]
1024    
1025    Use of C<undef> in any position acts as a wild card (all values). In addition,
1026    the I<$key> and I<@valuePatterns> parameters can contain SQL pattern characters: C<%>, which
1027    matches any sequence of characters, and C<_>, which matches any single character.
1028    (You can use an escape sequence C<\%> or C<\_> to match an actual percent sign or
1029    underscore.)
1030    
1031    In addition to values in multiple sections, a single attribute key can have multiple
1032    values, so even
1033    
1034        my @attributeList = GetAttributes($peg, 'virulent');
1035    
1036    which has no wildcard in the key or the object ID, may return multiple tuples.
1037    
1038    For reasons of backward compatability, we examine the structure of the object ID to
1039    determine the entity type. In that case the only two types allowed are C<Genome> and
1040    C<Feature>. An alternative method is to use a list reference, with the list consisting
1041    of an entity type name and the actual ID. Thus, the above example could equivalently
1042    be written as
1043    
1044        my @attributeList = GetAttributes([Feature => $peg], 'virulent');
1045    
1046    The list-reference approach allows us to add attributes to other entity types in
1047    the future. Doing so, however, will require modifying the L</Refresh> method and
1048    updated the database design XML.
1049    
1050    The list-reference approach also allows for a more fault-tolerant approach to
1051    getting all objects with a particular attribute.
1052    
1053        my @attributeList = GetAttributes([Feature => undef], 'virulent');
1054    
1055    will only return feature attributes, while
1056    
1057        my @attributeList = GetAttributes(undef, 'virulent');
1058    
1059    could at some point in the future get you attributes for genomes or even subsystems
1060    as well as features.
1061    
1062    =over 4
1063    
1064    =item objectID
1065    
1066    ID of the genome or feature whose attributes are desired. In general, an ID that
1067    starts with C<fig|> is treated as a feature ID, and an ID that is all digits with a
1068    single period is treated as a genome ID. For other entity types, use a list reference; in
1069    this case the first list element is the entity type and the second is the ID. A value of
1070    C<undef> or an empty string here will match all objects.
1071    
1072    =item key
1073    
1074    Attribute key name. Since attributes are stored as fields in the database with a
1075    field name equal to the key name, it is very fast to find a list of all the
1076    matching keys. Each key's values require a separate query, however, which may
1077    be a performance problem if the pattern matches a lot of keys. Wild cards are
1078    acceptable here, and a value of C<undef> or an empty string will match all
1079    attribute keys.
1080    
1081    =item valuePatterns
1082    
1083    List of the desired attribute values, section by section. If C<undef>
1084    or an empty string is specified, all values in that section will match.
1085    
1086    =item RETURN
1087    
1088    Returns a list of tuples. The first element in the tuple is an object ID, the
1089    second is an attribute key, and the remaining elements are the sections of
1090    the attribute value. All of the tuples will match the criteria set forth in
1091    the parameter list.
1092    
1093    =back
1094    
1095    =cut
1096    
1097    sub GetAttributes {
1098        # Get the parameters.
1099        my ($self, $objectID, $key, @valuePatterns) = @_;
1100        # Declare the return variable.
1101        my @retVal = ();
1102        # Determine the entity types for our search.
1103        my @objects = ();
1104        my ($actualObjectID, $computedType);
1105        if (! $objectID) {
1106            push @objects, $self->GetEntityTypes();
1107        } else {
1108            ($computedType, $actualObjectID) = ComputeObjectTypeFromID($objectID);
1109            push @objects, $computedType;
1110        }
1111        # Loop through the entity types.
1112        for my $entityType (@objects) {
1113            # Now we need to find all the matching keys. The keys are actually stored in
1114            # our database object, so this process is fast. Note that our
1115            # MatchSqlPattern method
1116            my %secondaries = $self->GetSecondaryFields($entityType);
1117            my @fieldList = grep { MatchSqlPattern($_, $key) } keys %secondaries;
1118            # Now we figure out whether or not we need to filter by object. We will always
1119            # filter by key to a limited extent, so if we're filtering by object we need an
1120            # AND to join the object ID filter with the key filter.
1121            my $filter = "";
1122            my @params = ();
1123            if (defined($actualObjectID)) {
1124                # Here the caller wants to filter on object ID.
1125                $filter = "$entityType(id) = ? AND ";
1126                push @params, $actualObjectID;
1127            }
1128            # It's time to begin making queries. We process one attribute key at a time, because
1129            # each attribute is actually a different field in the database. We know here that
1130            # all the keys we've collected are for the correct entity because we got them from
1131            # the DBD. That's a good thing, because an invalid key name will cause an SQL error.
1132            for my $key (@fieldList) {
1133                # Get all of the attribute values for this key.
1134                my @dataRows = $self->GetAll([$entityType], "$filter$entityType($key) IS NOT NULL",
1135                                             \@params, ["$entityType(id)", "$entityType($key)"]);
1136                # Process each value separately. We need to verify the values and reformat the
1137                # tuples. Note that GetAll will give us one row per matching object ID,
1138                # with the ID first followed by a list of the data values. This is very
1139                # different from the structure we'll be returning, which has one row
1140                # per value.
1141                for my $dataRow (@dataRows) {
1142                    # Get the object ID and the list of values.
1143                    my ($rowObjectID, @dataValues) = @{$dataRow};
1144                    # Loop through the values. There will be one result row per attribute value.
1145                    for my $dataValue (@dataValues) {
1146                        # Separate this value into sections.
1147                        my @sections = split("::", $dataValue);
1148                        # Loop through the value patterns, looking for a mismatch. Note that
1149                        # since we're working through parallel arrays, we are using an index
1150                        # loop. As soon as a match fails we stop checking. This means that
1151                        # if the value pattern list is longer than the number of sections,
1152                        # we will fail as soon as we run out of sections.
1153                        my $match = 1;
1154                        for (my $i = 0; $i <= $#valuePatterns && $match; $i++) {
1155                            $match = MatchSqlPattern($sections[$i], $valuePatterns[$i]);
1156                        }
1157                        # If we match, we save this value in the output list.
1158                        if ($match) {
1159                            push @retVal, [$rowObjectID, $key, @sections];
1160                        }
1161                    }
1162                    # Here we've processed all the attribute values for the current object ID.
1163                }
1164                # Here we've processed all the rows returned by GetAll. In general, there will
1165                # be one row per object ID.
1166            }
1167            # Here we've processed all the matching attribute keys.
1168        }
1169        # Here we've processed all the entity types. That means @retVal has all the matching
1170        # results.
1171        return @retVal;
1172    }
1173    
1174    =head3 AddAttribute
1175    
1176    C<< $attrDB->AddAttribute($objectID, $key, @values); >>
1177    
1178    Add an attribute key/value pair to an object. This method cannot add a new key, merely
1179    add a value to an existing key. Use L</StoreAttributeKey> to create a new key.
1180    
1181    =over 4
1182    
1183    =item objectID
1184    
1185    ID of the genome or feature to which the attribute is to be added. In general, an ID that
1186    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1187    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1188    to a 2-tuple consisting of the entity type name followed by the object ID.
1189    
1190    =item key
1191    
1192    Attribute key name. This corresponds to the name of a field in the database.
1193    
1194    =item values
1195    
1196    One or more values to be associated with the key. The values are joined together with
1197    the splitter value before being stored as field values. This enables L</GetAttributes>
1198    to split them apart during retrieval. The splitter value defaults to double colons C<::>.
1199    
1200    =back
1201    
1202    =cut
1203    
1204    sub AddAttribute {
1205        # Get the parameters.
1206        my ($self, $objectID, $key, @values) = @_;
1207        # Don't allow undefs.
1208        if (! defined($objectID)) {
1209            Confess("No object ID specified for AddAttribute call.");
1210        } elsif (! defined($key)) {
1211            Confess("No attribute key specified for AddAttribute call.");
1212        } elsif (! @values) {
1213            Confess("No values specified in AddAttribute call for key $key.");
1214        } else {
1215            # Okay, now we have some reason to believe we can do this. Start by
1216            # computing the object type and ID.
1217            my ($entityName, $id) = ComputeObjectTypeFromID($objectID);
1218            # Form the values into a scalar.
1219            my $valueString = join($self->{splitter}, @values);
1220            # Insert the value.
1221            $self->InsertValue($id, "$entityName($key)", $valueString);
1222        }
1223        # Return a one. We do this for backward compatability.
1224        return 1;
1225    }
1226    
1227    =head3 DeleteAttribute
1228    
1229    C<< $attrDB->DeleteAttribute($objectID, $key, @values); >>
1230    
1231    Delete the specified attribute key/value combination from the database.
1232    
1233    The first form will connect to the database and release it. The second form
1234    uses the database connection contained in the object.
1235    
1236    =over 4
1237    
1238    =item objectID
1239    
1240    ID of the genome or feature to which the attribute is to be added. In general, an ID that
1241    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1242    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1243    to a 2-tuple consisting of the entity type name followed by the object ID.
1244    
1245    =item key
1246    
1247    Attribute key name. This corresponds to the name of a field in the database.
1248    
1249    =item values
1250    
1251    One or more values to be associated with the key.
1252    
1253    =back
1254    
1255    =cut
1256    
1257    sub DeleteAttribute {
1258        # Get the parameters.
1259        my ($self, $objectID, $key, @values) = @_;
1260        # Don't allow undefs.
1261        if (! defined($objectID)) {
1262            Confess("No object ID specified for DeleteAttribute call.");
1263        } elsif (! defined($key)) {
1264            Confess("No attribute key specified for DeleteAttribute call.");
1265        } elsif (! @values) {
1266            Confess("No values specified in DeleteAttribute call for key $key.");
1267        } else {
1268            # Now compute the object type and ID.
1269            my ($entityName, $id) = ComputeObjectTypeFromID($objectID);
1270            # Form the values into a scalar.
1271            my $valueString = join($self->{splitter}, @values);
1272            # Delete the value.
1273            $self->DeleteValue($entityName, $id, $key, $valueString);
1274        }
1275        # Return a one. This is for backward compatability.
1276        return 1;
1277    }
1278    
1279    =head3 ChangeAttribute
1280    
1281    C<< $attrDB->ChangeAttribute($objectID, $key, \@oldValues, \@newValues); >>
1282    
1283    Change the value of an attribute key/value pair for an object.
1284    
1285    =over 4
1286    
1287    =item objectID
1288    
1289    ID of the genome or feature to which the attribute is to be changed. In general, an ID that
1290    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1291    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1292    to a 2-tuple consisting of the entity type name followed by the object ID.
1293    
1294    =item key
1295    
1296    Attribute key name. This corresponds to the name of a field in the database.
1297    
1298    =item oldValues
1299    
1300    One or more values identifying the key/value pair to change.
1301    
1302    =item newValues
1303    
1304    One or more values to be put in place of the old values.
1305    
1306    =back
1307    
1308    =cut
1309    
1310    sub ChangeAttribute {
1311        # Get the parameters.
1312        my ($self, $objectID, $key, $oldValues, $newValues) = @_;
1313        # Don't allow undefs.
1314        if (! defined($objectID)) {
1315            Confess("No object ID specified for ChangeAttribute call.");
1316        } elsif (! defined($key)) {
1317            Confess("No attribute key specified for ChangeAttribute call.");
1318        } elsif (! defined($oldValues) || ref $oldValues ne 'ARRAY') {
1319            Confess("No old values specified in ChangeAttribute call for key $key.");
1320        } elsif (! defined($newValues) || ref $newValues ne 'ARRAY') {
1321            Confess("No new values specified in ChangeAttribute call for key $key.");
1322        } else {
1323            # Okay, now we do the change as a delete/add.
1324            $self->DeleteAttribute($objectID, $key, @{$oldValues});
1325            $self->AddAttribute($objectID, $key, @{$newValues});
1326        }
1327        # Return a one. We do this for backward compatability.
1328        return 1;
1329    }
1330    
1331    =head3 EraseAttribute
1332    
1333    C<< $attrDB->EraseAttribute($entityName, $key); >>
1334    
1335    Erase all values for the specified attribute key. This does not remove the
1336    key from the database; it merely removes all the values.
1337    
1338    =over 4
1339    
1340    =item entityName
1341    
1342    Name of the entity to which the key belongs. If undefined, all entities will be
1343    examined for the desired key.
1344    
1345    =item key
1346    
1347    Key to erase.
1348    
1349    =back
1350    
1351    =cut
1352    
1353    sub EraseAttribute {
1354        # Get the parameters.
1355        my ($self, $entityName, $key) = @_;
1356        # Determine the relevant entity types.
1357        my @objects = ();
1358        if (! $entityName) {
1359            push @objects, $self->GetEntityTypes();
1360        } else {
1361            push @objects, $entityName;
1362        }
1363        # Loop through the entity types.
1364        for my $entityType (@objects) {
1365            # Now check for this key in this entity.
1366            my %secondaries = $self->GetSecondaryFields($entityType);
1367            if (exists $secondaries{$key}) {
1368                # We found it, so delete all the values of the key.
1369                $self->DeleteValue($entityName, undef, $key);
1370            }
1371        }
1372        # Return a 1, for backward compatability.
1373        return 1;
1374    }
1375    
1376  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3