[Bio] / Sprout / CustomAttributes.pm Repository:
ViewVC logotype

Diff of /Sprout/CustomAttributes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14, Wed Dec 20 20:04:23 2006 UTC revision 1.32, Fri Jan 25 19:00:58 2008 UTC
# Line 9  Line 9 
9      use Tracer;      use Tracer;
10      use ERDBLoad;      use ERDBLoad;
11      use Stats;      use Stats;
12        use Time::HiRes qw(time);
13    
14  =head1 Custom SEED Attribute Manager  =head1 Custom SEED Attribute Manager
15    
# Line 28  Line 29 
29  The actual attribute values are stored as a relationship between the attribute  The actual attribute values are stored as a relationship between the attribute
30  keys and the objects. There can be multiple values for a single key/object pair.  keys and the objects. There can be multiple values for a single key/object pair.
31    
32    =head3 Object IDs
33    
34    The object ID is normally represented as
35    
36        I<type>:I<id>
37    
38    where I<type> is the object type (C<Role>, C<Coupling>, etc.) and I<id> is
39    the actual object ID. Note that the object type must consist of only upper- and
40    lower-case letters! Thus, C<GenomeGroup> is a valid object type, but
41    C<genome_group> is not. Given that restriction, the object ID
42    
43        Family:aclame|cluster10
44    
45    would represent the FIG family C<aclame|cluster10>. For historical reasons,
46    there are three exceptions: subsystems, genomes, and features do not need
47    a type. So, for PEG 3361 of Streptomyces coelicolor A3(2), you simply code
48    
49        fig|100226.1.peg.3361
50    
51    The methods L</ParseID> and L</FormID> can be used to make this all seem
52    more consistent. Given any object ID string, L</ParseID> will convert it to an
53    object type and ID, and given any object type and ID, L</FormID> will
54    convert it to an object ID string. The attribute database is pretty
55    freewheeling about what it will allow for an ID; however, for best
56    results, the type should match an entity type from a Sprout genetics
57    database. If this rule is followed, then the database object
58    corresponding to an ID in the attribute database could be retrieved using
59    L</GetTargetObject> method.
60    
61        my $object = CustomAttributes::GetTargetObject($sprout, $idValue);
62    
63    =head3 Retrieval and Logging
64    
65  The full suite of ERDB retrieval capabilities is provided. In addition,  The full suite of ERDB retrieval capabilities is provided. In addition,
66  custom methods are provided specific to this application. To get all  custom methods are provided specific to this application. To get all
67  the values of the attribute C<essential> in a specified B<Feature>, you  the values of the attribute C<essential> in a specified B<Feature>, you
# Line 37  Line 71 
71    
72  where I<$fid> contains the ID of the desired feature.  where I<$fid> contains the ID of the desired feature.
73    
74  New attribute keys must be defined before they can be used. A web interface  Keys can be split into two pieces using the splitter value defined in the
75  is provided for this purpose.  constructor (the default is C<::>). The first piece of the key is called
76    the I<real key>. This portion of the key must be defined using the
77    web interface (C<Attributes.cgi>). The second portion of the key is called
78    the I<sub key>, and can take any value.
79    
80    Major attribute activity is recorded in a log (C<attributes.log>) in the
81    C<$FIG_Config::var> directory. The log reports the user name, time, and
82    the details of the operation. The user name will almost always be unknown,
83    the exception being when it is specified in this object's constructor
84    (see L</new>).
85    
86  =head2 FIG_Config Parameters  =head2 FIG_Config Parameters
87    
# Line 88  Line 131 
131    
132  =head3 new  =head3 new
133    
134  C<< my $attrDB = CustomAttributes->new($splitter); >>      my $attrDB = CustomAttributes->new(%options);
135    
136  Construct a new CustomAttributes object.  Construct a new CustomAttributes object. The following options are
137    supported.
138    
139  =over 4  =over 4
140    
141  =item splitter  =item splitter
142    
143  Value to be used to split attribute values into sections in the  Value to be used to split attribute values into sections in the
144  L</Fig Replacement Methods>. The default is a double colon C<::>.  L</Fig Replacement Methods>. The default is a double colon C<::>,
145  If you do not use the replacement methods, you do not need to  and should only be overridden in extreme circumstances.
146  worry about this parameter.  
147    =item user
148    
149    Name of the current user. This will appear in the attribute log.
150    
151  =back  =back
152    
# Line 107  Line 154 
154    
155  sub new {  sub new {
156      # Get the parameters.      # Get the parameters.
157      my ($class, $splitter) = @_;      my ($class, %options) = @_;
158      # Connect to the database.      # Connect to the database.
159      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,
160                              $FIG_Config::attrUser, $FIG_Config::attrPass,                              $FIG_Config::attrUser, $FIG_Config::attrPass,
# Line 117  Line 164 
164      my $xmlFileName = $FIG_Config::attrDBD;      my $xmlFileName = $FIG_Config::attrDBD;
165      my $retVal = ERDB::new($class, $dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
166      # Store the splitter value.      # Store the splitter value.
167      $retVal->{splitter} = (defined($splitter) ? $splitter : '::');      $retVal->{splitter} = $options{splitter} || '::';
168        # Store the user name.
169        $retVal->{user} = $options{user} || '<unknown>';
170        Trace("User $retVal->{user} selected for attribute object.") if T(3);
171      # Return the result.      # Return the result.
172      return $retVal;      return $retVal;
173  }  }
174    
175  =head3 StoreAttributeKey  =head3 StoreAttributeKey
176    
177  C<< $attrDB->StoreAttributeKey($attributeName, $type, $notes, \@groups); >>      $attrDB->StoreAttributeKey($attributeName, $type, $notes, \@groups);
178    
179  Create or update an attribute for the database.  Create or update an attribute for the database.
180    
# Line 132  Line 182 
182    
183  =item attributeName  =item attributeName
184    
185  Name of the attribute. It must be a valid ERDB field name, consisting entirely of  Name of the attribute (the real key). If it does not exist already, it will be created.
 letters, digits, and hyphens, with a letter at the beginning. If it does not  
 exist already, it will be created.  
186    
187  =item type  =item type
188    
# Line 161  Line 209 
209      # Get the data type hash.      # Get the data type hash.
210      my %types = ERDB::GetDataTypes();      my %types = ERDB::GetDataTypes();
211      # Validate the initial input values.      # Validate the initial input values.
212      if (! ERDB::ValidateFieldName($attributeName)) {      if ($attributeName =~ /$self->{splitter}/) {
213          Confess("Invalid attribute name \"$attributeName\" specified.");          Confess("Invalid attribute name \"$attributeName\" specified.");
214      } elsif (! $notes || length($notes) < 25) {      } elsif (! $notes || length($notes) < 25) {
215          Confess("Missing or incomplete description for $attributeName.");          Confess("Missing or incomplete description for $attributeName.");
216      } elsif (! exists $types{$type}) {      } elsif (! exists $types{$type}) {
217          Confess("Invalid data type \"$type\" for $attributeName.");          Confess("Invalid data type \"$type\" for $attributeName.");
218      } else {      } else {
219            # Create a variable to hold the action to be displayed for the log (Add or Update).
220            my $action;
221          # Okay, we're ready to begin. See if this key exists.          # Okay, we're ready to begin. See if this key exists.
222          my $attribute = $self->GetEntity('AttributeKey', $attributeName);          my $attribute = $self->GetEntity('AttributeKey', $attributeName);
223          if (defined($attribute)) {          if (defined($attribute)) {
224              # It does, so we do an update.              # It does, so we do an update.
225                $action = "Update Key";
226              $self->UpdateEntity('AttributeKey', $attributeName,              $self->UpdateEntity('AttributeKey', $attributeName,
227                                  { description => $notes, 'data-type' => $type });                                  { description => $notes, 'data-type' => $type });
228              # Detach the key from its current groups.              # Detach the key from its current groups.
229              $self->Disconnect('IsInGroup', 'AttributeKey', $attributeName);              $self->Disconnect('IsInGroup', 'AttributeKey', $attributeName);
230          } else {          } else {
231              # It doesn't, so we do an insert.              # It doesn't, so we do an insert.
232                $action = "Insert Key";
233              $self->InsertObject('AttributeKey', { id => $attributeName,              $self->InsertObject('AttributeKey', { id => $attributeName,
234                                  description => $notes, 'data-type' => $type });                                  description => $notes, 'data-type' => $type });
235          }          }
# Line 187  Line 239 
239              $self->InsertObject('IsInGroup', { 'from-link' => $attributeName,              $self->InsertObject('IsInGroup', { 'from-link' => $attributeName,
240                                                 'to-link'   => $group });                                                 'to-link'   => $group });
241          }          }
242            # Log the operation.
243            $self->LogOperation($action, $attributeName, "Group list is " . join(" ", @{$groups}));
244      }      }
245  }  }
246    
 =head3 LoadAttributeKey  
   
 C<< my $stats = $attrDB->LoadAttributeKey($keyName, $fh, $keyCol, $dataCol, %options); >>  
   
 Load the specified attribute from the specified file. The file should be a  
 tab-delimited file with internal tab and new-line characters escaped. This is  
 the typical TBL-style file used by most FIG applications. One of the columns  
 in the input file must contain the appropriate object id value and the other the  
 corresponding attribute value.  
   
 =over 4  
   
 =item keyName  
   
 Key of the attribute to load.  
   
 =item fh  
   
 Open file handle for the input file.  
   
 =item idCol  
   
 Index (0-based) of the column containing the ID field. The ID field should  
 contain the ID of an instance of the named entity.  
   
 =item dataCol  
   
 Index (0-based) of the column containing the data value field.  
   
 =item options  
   
 Hash specifying the options for this load.  
   
 =item RETURN  
   
 Returns a statistics object for the load process.  
   
 =back  
   
 The available options are as follows.  
   
 =over 4  
   
 =item erase  
   
 If TRUE, the key's values will all be erased before loading. (Doing so  
 makes for a faster load.)  
   
 =back  
   
 =cut  
   
 sub LoadAttributeKey {  
     # Get the parameters.  
     my ($self, $keyName, $fh, $idCol, $dataCol, %options) = @_;  
     # Create the return variable.  
     my $retVal = Stats->new("lineIn", "shortLine", "newObject");  
     # Compute the minimum number of fields required in each input line.  
     my $minCols = ($idCol < $dataCol ? $idCol : $idCol) + 1;  
     # Insure the attribute key exists.  
     my $found = $self->GetEntity('AttributeKey', $keyName);  
     if (! defined $found) {  
         Confess("Attribute key \"$keyName\" not found in database.");  
     } else {  
         # Erase the key's current values.  
         $self->EraseAttribute($keyName);  
         # Save a list of the object IDs we need to add.  
         my %objectIDs = ();  
         # Loop through the input file.  
         while (! eof $fh) {  
             # Get the next line of the file.  
             my @fields = Tracer::GetLine($fh);  
             $retVal->Add(lineIn => 1);  
             # Now we need to validate the line.  
             if (scalar(@fields) < $minCols) {  
                 $retVal->Add(shortLine => 1);  
             } else {  
                 # It's valid, so get the ID and value.  
                 my ($id, $value) = ($fields[$idCol], $fields[$dataCol]);  
                 # Denote we're using this input line.  
                 $retVal->Add(lineUsed => 1);  
                 # Now the fun begins. Find out if we need to create a target object record for this object ID.  
                 if (! exists $objectIDs{$id}) {  
                     my $found = $self->Exists('TargetObject', $id);  
                     if (! $found) {  
                         $self->InsertObject('TargetObject', { id => $id });  
                     }  
                     $objectIDs{$id} = 1;  
                     $retVal->Add(newObject => 1);  
                 }  
                 # Now we insert the attribute.  
                 $self->InsertObject('HasValueFor', { from => $keyName, to => $id, value => $value });  
                 $retVal->Add(newValue => 1);  
             }  
         }  
     }  
     # Return the statistics.  
     return $retVal;  
 }  
   
247    
248  =head3 DeleteAttributeKey  =head3 DeleteAttributeKey
249    
250  C<< my $stats = $attrDB->DeleteAttributeKey($attributeName); >>      my $stats = $attrDB->DeleteAttributeKey($attributeName);
251    
252  Delete an attribute from the custom attributes database.  Delete an attribute from the custom attributes database.
253    
# Line 316  Line 270 
270      my ($self, $attributeName) = @_;      my ($self, $attributeName) = @_;
271      # Delete the attribute key.      # Delete the attribute key.
272      my $retVal = $self->Delete('AttributeKey', $attributeName);      my $retVal = $self->Delete('AttributeKey', $attributeName);
273        # Log this operation.
274        $self->LogOperation("Delete Key", $attributeName, "Key will no longer be available for use by anyone.");
275      # Return the result.      # Return the result.
276      return $retVal;      return $retVal;
277    
# Line 323  Line 279 
279    
280  =head3 NewName  =head3 NewName
281    
282  C<< my $text = CustomAttributes::NewName(); >>      my $text = CustomAttributes::NewName();
283    
284  Return the string used to indicate the user wants to add a new attribute.  Return the string used to indicate the user wants to add a new attribute.
285    
# Line 335  Line 291 
291    
292  =head3 ControlForm  =head3 ControlForm
293    
294  C<< my $formHtml = $attrDB->ControlForm($cgi, $name, \%keys); >>      my $formHtml = $attrDB->ControlForm($cgi, $name, \%keys);
295    
296  Return a form that can be used to control the creation and modification of  Return a form that can be used to control the creation and modification of
297  attributes. Only a subset of the attribute keys will be displayed, as  attributes. Only a subset of the attribute keys will be displayed, as
# Line 389  Line 345 
345                                      -labels => \%labelMap,                                      -labels => \%labelMap,
346                                      -default => 'string');                                      -default => 'string');
347      # Allow the user to specify a new field name. This is required if the      # Allow the user to specify a new field name. This is required if the
348      # user has selected the "(new)" marker. We put a little scriptlet in here that      # user has selected the "(new)" marker.
     # selects the (new) marker when the user enters the field.  
     push @retVal, "<script language=\"javaScript\">";  
349      my $fieldField = "document.$name.fieldName";      my $fieldField = "document.$name.fieldName";
350      my $newName = "\"" . NewName() . "\"";      my $newName = "\"" . NewName() . "\"";
351      push @retVal, $cgi->Tr($cgi->th("New Field Name"),      push @retVal, $cgi->Tr($cgi->th("New Field Name"),
# Line 414  Line 368 
368                             $cgi->td($cgi->checkbox_group(-name=>'groups',                             $cgi->td($cgi->checkbox_group(-name=>'groups',
369                                      -values=> \@groups))                                      -values=> \@groups))
370                            );                            );
371      # If the user wants to upload new values for the field, then we have      # Now the four buttons: STORE, SHOW, ERASE, and DELETE.
     # an upload file name and column indicators.  
     push @retVal, $cgi->Tr($cgi->th("Upload Values"),  
                            $cgi->td($cgi->filefield(-name => 'newValueFile',  
                                                     -size => 20) .  
                                     " Key&nbsp;" .  
                                     $cgi->textfield(-name => 'keyCol',  
                                                     -size => 3,  
                                                     -default => 0) .  
                                     " Value&nbsp;" .  
                                     $cgi->textfield(-name => 'valueCol',  
                                                     -size => 3,  
                                                     -default => 1)  
                                    ),  
                           );  
     # Now the three buttons: STORE, SHOW, and DELETE.  
372      push @retVal, $cgi->Tr($cgi->th("&nbsp;"),      push @retVal, $cgi->Tr($cgi->th("&nbsp;"),
373                             $cgi->td({align => 'center'},                             $cgi->td({align => 'center'}, join(" ",
374                                      $cgi->submit(-name => 'Delete', -value => 'DELETE') . " " .                                      $cgi->submit(-name => 'Delete', -value => 'DELETE'),
375                                      $cgi->submit(-name => 'Store',  -value => 'STORE') . " " .                                      $cgi->submit(-name => 'Store',  -value => 'STORE'),
376                                        $cgi->submit(-name => 'Erase',  -value => 'ERASE'),
377                                      $cgi->submit(-name => 'Show',   -value => 'SHOW')                                      $cgi->submit(-name => 'Show',   -value => 'SHOW')
378                                     )                                     ))
379                            );                            );
380      # Close the table and the form.      # Close the table and the form.
381      push @retVal, $cgi->end_table();      push @retVal, $cgi->end_table();
# Line 450  Line 390 
390  Load attributes from the specified tab-delimited file. Each line of the file must  Load attributes from the specified tab-delimited file. Each line of the file must
391  contain an object ID in the first column, an attribute key name in the second  contain an object ID in the first column, an attribute key name in the second
392  column, and attribute values in the remaining columns. The attribute values will  column, and attribute values in the remaining columns. The attribute values will
393  be assembled into a single value using the splitter code.  be assembled into a single value using the splitter code. In addition, the key names may
394    contain a splitter. If this is the case, the portion of the key after the splitter is
395    treated as a subkey.
396    
397  =over 4  =over 4
398    
399  =item fileName  =item fileName
400    
401  Name of the file from which to load the attributes.  Name of the file from which to load the attributes, or an open handle for the file.
402    (This last enables the method to be used in conjunction with the CGI form upload
403    control.)
404    
405  =item options  =item options
406    
# Line 477  Line 421 
421  If TRUE, then the attributes will be appended to existing data; otherwise, the  If TRUE, then the attributes will be appended to existing data; otherwise, the
422  first time a key name is encountered, it will be erased.  first time a key name is encountered, it will be erased.
423    
424    =item archive
425    
426    If specified, the name of a file into which the incoming data should be saved.
427    If I<resume> is also specified, only the lines actually loaded will be put
428    into this file.
429    
430    =item objectType
431    
432    If specified, the specified object type will be prefixed to each object ID.
433    
434    =item resume
435    
436    If specified, key-value pairs already in the database will not be reinserted.
437    Specify a number to start checking after the specified number of lines and
438    then admit everything after the first line not yet loaded. Specify C<careful>
439    to check every single line. Specify C<none> to ignore this option. The default
440    is C<none>. So, if you believe that a previous load failed somewhere after 50000
441    lines, a resume value of C<50000> would skip 50000 lines in the file, then
442    check each line after that until it finds one not already in the database. The
443    first such line found and all lines after that will be loaded. On the other
444    hand, if you have a file of 100000 records, and some have been loaded and some
445    not, you would use the word C<careful>, so that every line would be checked before
446    it is inserted. A resume of C<0> will start checking the first line of the
447    input file and then begin loading once it finds a line not in the database.
448    
449    =item chunkSize
450    
451    Number of lines to load in each burst. The default is 10,000.
452    
453  =back  =back
454    
455  =cut  =cut
# Line 485  Line 458 
458      # Get the parameters.      # Get the parameters.
459      my ($self, $fileName, %options) = @_;      my ($self, $fileName, %options) = @_;
460      # Declare the return variable.      # Declare the return variable.
461      my $retVal = Stats->new('keys', 'values');      my $retVal = Stats->new('keys', 'values', 'linesOut');
462        # Initialize the timers.
463        my ($insertTime, $eraseTime, $archiveTime, $checkTime) = (0, 0, 0, 0);
464      # Check for append mode.      # Check for append mode.
465      my $append = ($options{append} ? 1 : 0);      my $append = ($options{append} ? 1 : 0);
466        # Check for resume mode.
467        my $resume = (defined($options{resume}) ? $options{resume} : 'none');
468      # Create a hash of key names found.      # Create a hash of key names found.
469      my %keyHash = ();      my %keyHash = ();
470      # Open the file for input.      # Compute the chunk size.
471      my $fh = Open(undef, "<$fileName");      my $chunkSize = ($options{chunkSize} ? $options{chunkSize} : 10000);
472        # Open the file for input. Note we must anticipate the possibility of an
473        # open filehandle being passed in.
474        my $fh;
475        if (ref $fileName) {
476            Trace("Using file opened by caller.") if T(3);
477            $fh = $fileName;
478        } else {
479            Trace("Attributes will be loaded from $fileName.") if T(3);
480            $fh = Open(undef, "<$fileName");
481        }
482        # Trace the mode.
483        Trace("Mode is $options{mode}.") if $options{mode} && T(3);
484        Trace("No mode specified.") if T(3) && ! $options{mode};
485        # Now check to see if we need to archive.
486        my $ah;
487        if (exists $options{archive}) {
488            my $ah = Open(undef, ">$options{archive}");
489            Trace("Load file will be archived to $options{archive}.") if T(3);
490        }
491        # This next file is used to cache the attribute data before loading it.
492        # To avoid problems, we use a series of small files instead of one
493        # big one.
494        my $tempFileName = "$FIG_Config::temp/attributeLoadFile$$.tbl";
495        # Insure we recover from errors.
496        eval {
497            # Open the temporary file and start a counter.
498            my $th = Tracer::Open(undef, ">$tempFileName");
499            my $chunkLinesLeft = $chunkSize;
500            # If we have a resume number, process it here.
501            if ($resume =~ /\d+/) {
502                Trace("Skipping $resume lines.") if T(2);
503                my $startTime = time();
504                # Skip the specified number of lines.
505                for (my $skipped = 0; ! eof($fh) && $skipped < $resume; $skipped++) {
506                    my $line = <$fh>;
507                    $retVal->Add(skipped => 1);
508                }
509                $checkTime += time() - $startTime;
510            }
511      # Loop through the file.      # Loop through the file.
512            Trace("Starting load.") if T(2);
513      while (! eof $fh) {      while (! eof $fh) {
514                # Read the current line.
515          my ($id, $key, @values) = Tracer::GetLine($fh);          my ($id, $key, @values) = Tracer::GetLine($fh);
516          $retVal->Add(linesIn => 1);          $retVal->Add(linesIn => 1);
517          # Do some validation.          # Do some validation.
518          if (! defined($id)) {              if (! $id) {
519              # We ignore blank lines.              # We ignore blank lines.
520              $retVal->Add(blankLines => 1);              $retVal->Add(blankLines => 1);
521                } elsif (substr($id, 0, 1) eq '#') {
522                    # A line beginning with a pound sign is a comment.
523                    $retVal->Add(comments => 1);
524          } elsif (! defined($key)) {          } elsif (! defined($key)) {
525              # An ID without a key is a serious error.              # An ID without a key is a serious error.
526              my $lines = $retVal->Ask('linesIn');              my $lines = $retVal->Ask('linesIn');
527              Confess("Line $lines in $fileName has no attribute key.");              Confess("Line $lines in $fileName has no attribute key.");
528                } elsif (! @values) {
529                    # A line with no values is not allowed.
530                    my $lines = $retVal->Ask('linesIn');
531                    Trace("Line $lines for key $key has no attribute values.") if T(1);
532                    $retVal->Add(skipped => 1);
533          } else {          } else {
534                    # Check to see if we need to fix up the object ID.
535                    if ($options{objectType}) {
536                        $id = "$options{objectType}:$id";
537                    }
538                    # The key contains a real part and an optional sub-part. We need the real part.
539                    my ($realKey, $subKey) = $self->SplitKey($key);
540              # Now we need to check for a new key.              # Now we need to check for a new key.
541              if (! exists $keyHash{$key}) {                  if (! exists $keyHash{$realKey}) {
542                  # This is a new key. Verify that it exists.                      my $keyObject = $self->GetEntity(AttributeKey => $realKey);
543                  if (! $self->Exists('AttributeKey', $key)) {                      if (! defined($keyObject)) {
544                            # Here the specified key does not exist, which is an error.
545                      my $line = $retVal->Ask('linesIn');                      my $line = $retVal->Ask('linesIn');
546                      Confess("Attribute \"$key\" on line $line of $fileName not found in database.");                          Confess("Attribute \"$realKey\" on line $line of $fileName not found in database.");
547                  } else {                  } else {
548                      # Make sure we know this is no longer a new key.                      # Make sure we know this is no longer a new key.
549                      $keyHash{$key} = 1;                          $keyHash{$realKey} = 1;
550                      $retVal->Add(keys => 1);                      $retVal->Add(keys => 1);
551                      # If this is NOT append mode, erase the key.                          # If this is NOT append mode, erase the key. This does not delete the key
552                            # itself; it just clears out all the values.
553                      if (! $append) {                      if (! $append) {
554                          $self->EraseAttribute($key);                              my $startTime = time();
555                      }                              $self->EraseAttribute($realKey);
556                                $eraseTime += time() - $startTime;
557                                Trace("Attribute $realKey erased.") if T(3);
558                            }
559                        }
560                        Trace("Key $realKey found.") if T(3);
561                    }
562                    # If we're in resume mode, check to see if this insert is redundant.
563                    my $ok = 1;
564                    if ($resume ne 'none') {
565                        my $startTime = time();
566                        my $count = $self->GetAttributes($id, $key, @values);
567                        if ($count) {
568                            # Here the record is found, so we skip it.
569                            $ok = 0;
570                            $retVal->Add(skipped => 1);
571                        } else {
572                            # Here the record is not found. If we're in non-careful mode, we
573                            # stop resume checking at this point.
574                            if ($resume ne 'careful') {
575                                $resume = 'none';
576                            }
577                        }
578                        $checkTime += time() - $startTime;
579                    }
580                    if ($ok) {
581                        # We're in business. First, archive this row.
582                        if (defined $ah) {
583                            my $startTime = time();
584                            Tracer::PutLine($ah, [$id, $key, @values]);
585                            $archiveTime += time() - $startTime;
586                        }
587                        # We need to format the attribute data so it will work
588                        # as if it were a load file. This means we join the
589                        # values.
590                        my $valueString = join('::', @values);
591                        # Everything is all set up, so put the value in the temporary file and
592                        # count it.
593                        my $startTime = time();
594                        Tracer::PutLine($th, [$realKey, $id, $subKey, $valueString]);
595                        $archiveTime += time() - $startTime;
596                        $retVal->Add(linesOut => 1);
597                        # Check to see if it's time to output a chunk.
598                        $chunkLinesLeft--;
599                        if ($chunkLinesLeft <= 0) {
600                            close $th;
601                            # Now we load the table from the file. Note that we don't do an analyze.
602                            # The analyze is done only after loading the residual.
603                            my $startTime = time();
604                            Trace("Loading attributes from $tempFileName: " . (-s $tempFileName) .
605                                  " characters.") if T(3);
606                            my $loadStats = $self->LoadTable($tempFileName, 'HasValueFor',
607                                                             mode => $options{mode}, partial => 1);
608                            $retVal->Add(insertTime => time() - $startTime);
609                            # Re-open the file and restart the counter.
610                            $th = Tracer::Open(undef, ">$tempFileName");
611                            $chunkLinesLeft = $chunkSize;
612                            $retVal->Add(chunks => 1);
613                  }                  }
614                  Trace("Key $key found.") if T(3);                  } else {
615                        # Here we skipped because of resume mode.
616                        $retVal->Add(resumeSkip => 1);
617              }              }
             # Now we know the key is valid. Add this value.  
             $self->AddAttribute($id, $key, @values);  
618              my $progress = $retVal->Add(values => 1);              my $progress = $retVal->Add(values => 1);
619              Trace("$progress values loaded.") if T(3) && ($progress % 1000 == 0);                  Trace("$progress values processed.") if T(3) && ($progress % 1000 == 0);
   
620          }          }
621      }      }
622            # Now we close the archive file. Note we undefine the handle so the error methods know
623            # not to worry.
624            if (defined $ah) {
625                close $ah;
626                undef $ah;
627            }
628            # Now we load the residual from the temporary file (if any). This time we'll do an
629            # analyze as well.
630            close $th;
631            my $startTime = time();
632            Trace("Loading residual attributes from $tempFileName: " . (-s $tempFileName) .
633                  " characters.") if T(3);
634            my $loadStats = $self->LoadTable($tempFileName, 'HasValueFor', mode => $options{mode}, partial => 1);
635            $retVal->Add(insertTime => time() - $startTime);
636            $retVal->Add(chunks => 1);
637            Trace("Attribute load successful.") if T(2);
638        };
639        # Check for an error.
640        if ($@) {
641            # Here we have an error. Display the error message.
642            my $message = $@;
643            Trace("Error during attribute load: $message") if T(0);
644            $retVal->AddMessage($message);
645            # Close the archive file if it's open. The archive file can sometimes provide
646            # clues as to what happened.
647            if (defined $ah) {
648                close $ah;
649            }
650        }
651        # Store the timers.
652        $retVal->Add(eraseTime   => $eraseTime);
653        $retVal->Add(insertTime  => $insertTime);
654        $retVal->Add(archiveTime => $archiveTime);
655        $retVal->Add(checkTime   => $checkTime);
656      # Return the result.      # Return the result.
657      return $retVal;      return $retVal;
658  }  }
659    
660  =head3 BackupKeys  =head3 BackupKeys
661    
662  C<< my $stats = $attrDB->BackupKeys($fileName, %options); >>      my $stats = $attrDB->BackupKeys($fileName, %options);
663    
664  Backup the attribute key information from the attribute database.  Backup the attribute key information from the attribute database.
665    
# Line 589  Line 713 
713          # is nonempty.          # is nonempty.
714          Tracer::PutLine($fh, ['#GROUPS', @groups]);          Tracer::PutLine($fh, ['#GROUPS', @groups]);
715      }      }
716        # Log the operation.
717        $self->LogOperation("Backup Keys", $fileName, $retVal->Display());
718      # Return the result.      # Return the result.
719      return $retVal;      return $retVal;
720  }  }
721    
722  =head3 RestoreKeys  =head3 RestoreKeys
723    
724  C<< my $stats = $attrDB->RestoreKeys($fileName, %options); >>      my $stats = $attrDB->RestoreKeys($fileName, %options);
725    
726  Restore the attribute keys and groups from a backup file.  Restore the attribute keys and groups from a backup file.
727    
# Line 659  Line 785 
785              }              }
786          }          }
787      }      }
788        # Log the operation.
789        $self->LogOperation("Backup Keys", $fileName, $retVal->Display());
790      # Return the result.      # Return the result.
791      return $retVal;      return $retVal;
792  }  }
793    
794    =head3 ArchiveFileName
795    
796        my $fileName = $ca->ArchiveFileName();
797    
798    Compute a file name for archiving attribute input data. The file will be in the attribute log directory
799    
800    =cut
801    
802    sub ArchiveFileName {
803        # Get the parameters.
804        my ($self) = @_;
805        # Declare the return variable.
806        my $retVal;
807        # We start by turning the timestamp into something usable as a file name.
808        my $now = Tracer::Now();
809        $now =~ tr/ :\//___/;
810        # Next we get the directory name.
811        my $dir = "$FIG_Config::var/attributes";
812        if (! -e $dir) {
813            Trace("Creating attribute file directory $dir.") if T(1);
814            mkdir $dir;
815        }
816        # Put it together with the field name and the time stamp.
817        $retVal = "$dir/upload.$now";
818        # Modify the file name to insure it's unique.
819        my $seq = 0;
820        while (-e "$retVal.$seq.tbl") { $seq++ }
821        # Use the computed sequence number to get the correct file name.
822        $retVal .= ".$seq.tbl";
823        # Return the result.
824        return $retVal;
825    }
826    
827  =head3 BackupAllAttributes  =head3 BackupAllAttributes
828    
829  C<< my $stats = $attrDB->BackupAllAttributes($fileName, %options); >>      my $stats = $attrDB->BackupAllAttributes($fileName, %options);
830    
831  Backup all of the attributes to a file. The attributes will be stored in a  Backup all of the attributes to a file. The attributes will be stored in a
832  tab-delimited file suitable for reloading via L</LoadAttributesFrom>.  tab-delimited file suitable for reloading via L</LoadAttributesFrom>.
# Line 711  Line 871 
871          while (my $line = $query->Fetch()) {          while (my $line = $query->Fetch()) {
872              $valuesFound++;              $valuesFound++;
873              # Get this row's data.              # Get this row's data.
874              my @row = $line->Values(['HasValueFor(to-link)', 'HasValueFor(from-link)',              my ($id, $key, $subKey, $value) = $line->Values(['HasValueFor(to-link)',
875                                                                 'HasValueFor(from-link)',
876                                                                 'HasValueFor(subkey)',
877                                       'HasValueFor(value)']);                                       'HasValueFor(value)']);
878                # Check for a subkey.
879                if ($subKey ne '') {
880                    $key = "$key$self->{splitter}$subKey";
881                }
882              # Write it to the file.              # Write it to the file.
883              Tracer::PutLine($fh, \@row);              Tracer::PutLine($fh, [$id, $key, $value]);
884          }          }
885          Trace("$valuesFound values backed up for key $key.") if T(3);          Trace("$valuesFound values backed up for key $key.") if T(3);
886          $retVal->Add(values => $valuesFound);          $retVal->Add(values => $valuesFound);
887      }      }
888        # Log the operation.
889        $self->LogOperation("Backup Data", $fileName, $retVal->Display());
890      # Return the result.      # Return the result.
891      return $retVal;      return $retVal;
892  }  }
893    
894  =head3 FieldMenu  =head3 FieldMenu
895    
896  C<< my $menuHtml = $attrDB->FieldMenu($cgi, $height, $name, $keys, %options); >>      my $menuHtml = $attrDB->FieldMenu($cgi, $height, $name, $keys, %options);
897    
898  Return the HTML for a menu to select an attribute field. The menu will  Return the HTML for a menu to select an attribute field. The menu will
899  be a standard SELECT/OPTION thing which is called "popup menu" in the  be a standard SELECT/OPTION thing which is called "popup menu" in the
# Line 899  Line 1067 
1067    
1068  =head3 GetGroups  =head3 GetGroups
1069    
1070  C<< my @groups = $attrDB->GetGroups(); >>      my @groups = $attrDB->GetGroups();
1071    
1072  Return a list of the available groups.  Return a list of the available groups.
1073    
# Line 916  Line 1084 
1084    
1085  =head3 GetAttributeData  =head3 GetAttributeData
1086    
1087  C<< my %keys = $attrDB->GetAttributeData($type, @list); >>      my %keys = $attrDB->GetAttributeData($type, @list);
1088    
1089  Return attribute data for the selected attributes. The attribute  Return attribute data for the selected attributes. The attribute
1090  data is a hash mapping each attribute key name to a n-tuple containing the  data is a hash mapping each attribute key name to a n-tuple containing the
# Line 983  Line 1151 
1151      return %retVal;      return %retVal;
1152  }  }
1153    
1154    =head3 LogOperation
1155    
1156        $ca->LogOperation($action, $target, $description);
1157    
1158    Write an operation description to the attribute activity log (C<$FIG_Config::var/attributes.log>).
1159    
1160    =over 4
1161    
1162    =item action
1163    
1164    Action being logged (e.g. C<Delete Group> or C<Load Key>).
1165    
1166    =item target
1167    
1168    ID of the key or group affected.
1169    
1170    =item description
1171    
1172    Short description of the action.
1173    
1174    =back
1175    
1176    =cut
1177    
1178    sub LogOperation {
1179        # Get the parameters.
1180        my ($self, $action, $target, $description) = @_;
1181        # Get the user ID.
1182        my $user = $self->{user};
1183        # Get a timestamp.
1184        my $timeString = Tracer::Now();
1185        # Open the log file for appending.
1186        my $oh = Open(undef, ">>$FIG_Config::var/attributes.log");
1187        # Write the data to it.
1188        Tracer::PutLine($oh, [$timeString, $user, $action, $target, $description]);
1189        # Close the log file.
1190        close $oh;
1191    }
1192    
1193    =head2 Internal Utility Methods
1194    
1195    =head3 _KeywordString
1196    
1197        my $keywordString = $ca->_KeywordString($key, $value);
1198    
1199    Compute the keyword string for a specified key/value pair. This consists of the
1200    key name and value converted to lower case with underscores translated to spaces.
1201    
1202    This method is for internal use only. It is called whenever we need to update or
1203    insert a B<HasValueFor> record.
1204    
1205    =over 4
1206    
1207    =item key
1208    
1209    Name of the relevant attribute key.
1210    
1211    =item target
1212    
1213    ID of the target object to which this key/value pair will be associated.
1214    
1215    =item value
1216    
1217    The value to store for this key/object combination.
1218    
1219    =item RETURN
1220    
1221    Returns the value that should be stored as the keyword string for the specified
1222    key/value pair.
1223    
1224    =back
1225    
1226    =cut
1227    
1228    sub _KeywordString {
1229        # Get the parameters.
1230        my ($self, $key, $value) = @_;
1231        # Get a copy of the key name and convert underscores to spaces.
1232        my $keywordString = $key;
1233        $keywordString =~ s/_/ /g;
1234        # Add the value convert it all to lower case.
1235        my $retVal = lc "$keywordString $value";
1236        # Return the result.
1237        return $retVal;
1238    }
1239    
1240    =head3 _QueryResults
1241    
1242        my @attributeList = $attrDB->_QueryResults($query, @values);
1243    
1244    Match the results of a B<HasValueFor> query against value criteria and return
1245    the results. This is an internal method that splits the values coming back
1246    and matches the sections against the specified section patterns. It serves
1247    as the back end to L</GetAttributes> and L</FindAttributes>.
1248    
1249    =over 4
1250    
1251    =item query
1252    
1253    A query object that will return the desired B<HasValueFor> records.
1254    
1255    =item values
1256    
1257    List of the desired attribute values, section by section. If C<undef>
1258    or an empty string is specified, all values in that section will match. A
1259    generic match can be requested by placing a percent sign (C<%>) at the end.
1260    In that case, all values that match up to and not including the percent sign
1261    will match. You may also specify a regular expression enclosed
1262    in slashes. All values that match the regular expression will be returned. For
1263    performance reasons, only values have this extra capability.
1264    
1265    =item RETURN
1266    
1267    Returns a list of tuples. The first element in the tuple is an object ID, the
1268    second is an attribute key, and the remaining elements are the sections of
1269    the attribute value. All of the tuples will match the criteria set forth in
1270    the parameter list.
1271    
1272    =back
1273    
1274    =cut
1275    
1276    sub _QueryResults {
1277        # Get the parameters.
1278        my ($self, $query, @values) = @_;
1279        # Declare the return value.
1280        my @retVal = ();
1281        # Get the number of value sections we have to match.
1282        my $sectionCount = scalar(@values);
1283        # Loop through the assignments found.
1284        while (my $row = $query->Fetch()) {
1285            # Get the current row's data.
1286            my ($id, $realKey, $subKey, $valueString) = $row->Values(['HasValueFor(to-link)',
1287                                                                      'HasValueFor(from-link)',
1288                                                                      'HasValueFor(subkey)',
1289                                                                      'HasValueFor(value)'
1290                                                                    ]);
1291            # Form the key from the real key and the sub key.
1292            my $key = $self->JoinKey($realKey, $subKey);
1293            # Break the value into sections.
1294            my @sections = split($self->{splitter}, $valueString);
1295            # Match each section against the incoming values. We'll assume we're
1296            # okay unless we learn otherwise.
1297            my $matching = 1;
1298            for (my $i = 0; $i < $sectionCount && $matching; $i++) {
1299                # We need to check to see if this section is generic.
1300                my $value = $values[$i];
1301                Trace("Current value pattern is \"$value\".") if T(4);
1302                if (substr($value, -1, 1) eq '%') {
1303                    Trace("Generic match used.") if T(4);
1304                    # Here we have a generic match.
1305                    my $matchLen = length($values[$i]) - 1;
1306                    $matching = substr($sections[$i], 0, $matchLen) eq
1307                                substr($values[$i], 0, $matchLen);
1308                } elsif ($value =~ m#^/(.+)/[a-z]*$#) {
1309                    Trace("Regular expression detected.") if T(4);
1310                    # Here we have a regular expression match.
1311                    my $section = $sections[$i];
1312                    $matching = eval("\$section =~ $value");
1313                } else {
1314                    # Here we have a strict match.
1315                    Trace("Strict match used.") if T(4);
1316                    $matching = ($sections[$i] eq $values[$i]);
1317                }
1318            }
1319            # If we match, output this row to the return list.
1320            if ($matching) {
1321                push @retVal, [$id, $key, @sections];
1322            }
1323        }
1324        # Return the rows found.
1325        return @retVal;
1326    }
1327    
1328  =head2 FIG Method Replacements  =head2 FIG Method Replacements
1329    
1330  The following methods are used by B<FIG.pm> to replace the previous attribute functionality.  The following methods are used by B<FIG.pm> to replace the previous attribute functionality.
# Line 994  Line 1336 
1336  The idea is that these methods represent attribute manipulation allowed by all users, while  The idea is that these methods represent attribute manipulation allowed by all users, while
1337  the others are only for privileged users with access to the attribute server.  the others are only for privileged users with access to the attribute server.
1338    
1339  In the previous implementation, an attribute had a value and a URL. In the new implementation,  In the previous implementation, an attribute had a value and a URL. In this implementation,
1340  there is only a value. In this implementation, each attribute has only a value. These  each attribute has only a value. These methods will treat the value as a list with the individual
1341  methods will treat the value as a list with the individual elements separated by the  elements separated by the value of the splitter parameter on the constructor (L</new>). The default
1342  value of the splitter parameter on the constructor (L</new>). The default is double  is double colons C<::>.
 colons C<::>.  
1343    
1344  So, for example, an old-style keyword with a value of C<essential> and a URL of  So, for example, an old-style keyword with a value of C<essential> and a URL of
1345  C<http://www.sciencemag.org/cgi/content/abstract/293/5538/2266> using the default  C<http://www.sciencemag.org/cgi/content/abstract/293/5538/2266> using the default
# Line 1011  Line 1352 
1352    
1353  =head3 GetAttributes  =head3 GetAttributes
1354    
1355  C<< my @attributeList = $attrDB->GetAttributes($objectID, $key, @values); >>      my @attributeList = $attrDB->GetAttributes($objectID, $key, @values);
1356    
1357  In the database, attribute values are sectioned into pieces using a splitter  In the database, attribute values are sectioned into pieces using a splitter
1358  value specified in the constructor (L</new>). This is not a requirement of  value specified in the constructor (L</new>). This is not a requirement of
# Line 1050  Line 1391 
1391  which has no wildcard in the key or the object ID, may return multiple tuples.  which has no wildcard in the key or the object ID, may return multiple tuples.
1392    
1393  Value matching in this system works very poorly, because of the way multiple values are  Value matching in this system works very poorly, because of the way multiple values are
1394  stored. For the object ID and key name, we create queries that filter for the desired  stored. For the object ID, key name, and first value, we create queries that filter for the
1395  results. For the values, we do a comparison after the attributes are retrieved from the  desired results. On any filtering by value, we must do a comparison after the attributes are
1396  database. As a result, queries in which filter only on value end up reading the entire  retrieved from the database, since the database has no notion of the multiple values, which
1397  attribute table to find the desired results.  are stored in a single string. As a result, queries in which filter only on value end up
1398    reading a lot more than they need to.
1399    
1400  =over 4  =over 4
1401    
# Line 1095  Line 1437 
1437  sub GetAttributes {  sub GetAttributes {
1438      # Get the parameters.      # Get the parameters.
1439      my ($self, $objectID, $key, @values) = @_;      my ($self, $objectID, $key, @values) = @_;
1440      # We will create one big honking query. The following hash will build the filter      # This hash will map "HasValueFor" fields to patterns. We use it to build the
1441      # clause and a parameter list.      # SQL statement.
1442      my %data = ('HasValueFor(from-link)' => $key, 'HasValueFor(to-link)' => $objectID);      my %data;
1443        # Before we do anything else, we must parse the key. The key is treated by the
1444        # user as a single field, but to us it's actually a real key and a subkey.
1445        # If the key has no splitter and is exact, the real key is the original key
1446        # and the subkey is an empty string. If the key has a splitter, it is
1447        # split into two pieces and each piece is processed separately. If the key has
1448        # no splitter and is generic, the real key is the incoming key and the subkey
1449        # is allowed to be wild. Of course, this only matters if an actual key has
1450        # been specified.
1451        if (defined $key) {
1452            if ($key =~ /$self->{splitter}/) {
1453                # Here we have a two-part key, so we split it normally.
1454                my ($realKey, $subKey) = $self->SplitKey($key);
1455                $data{'HasValueFor(from-link)'} = $realKey;
1456                $data{'HasValueFor(subkey)'} = $subKey;
1457            } elsif (substr($key, -1, 1) eq '%') {
1458                $data{'HasValueFor(from-link)'} = $key;
1459            } else {
1460                $data{'HasValueFor(from-link)'} = $key;
1461                $data{'HasValueFor(subkey)'} = '';
1462            }
1463        }
1464        # Add the object ID to the key information.
1465        $data{'HasValueFor(to-link)'} = $objectID;
1466        # The first value represents a problem, because we can search it using SQL, but not
1467        # in the normal way. If the user specifies a generic search or exact match for
1468        # every alternative value (remember, the values may be specified as a list),
1469        # then we can create SQL filtering for it. If any of the values are specified
1470        # as a regular expression, however, that's a problem, because we need to read
1471        # every value to verify a match.
1472        if (@values > 0) {
1473            # Get the first value and put its alternatives in an array.
1474            my $valueParm = $values[0];
1475            my @valueList;
1476            if (ref $valueParm eq 'ARRAY') {
1477                @valueList = @{$valueParm};
1478            } else {
1479                @valueList = ($valueParm);
1480            }
1481            # Okay, now we have all the possible criteria for the first value in the list
1482            # @valueList. We'll copy the values to a new array in which they have been
1483            # converted to generic requests. If we find a regular-expression match
1484            # anywhere in the list, we toss the whole thing.
1485            my @valuePatterns = ();
1486            my $okValues = 1;
1487            for my $valuePattern (@valueList) {
1488                # Check the pattern type.
1489                if (substr($valuePattern, 0, 1) eq '/') {
1490                    # Regular expressions invalidate the entire process.
1491                    $okValues = 0;
1492                } elsif (substr($valuePattern, -1, 1) eq '%') {
1493                    # A Generic pattern is passed in unmodified.
1494                    push @valuePatterns, $valuePattern;
1495                } else {
1496                    # An exact match is converted to generic.
1497                    push @valuePatterns, "$valuePattern%";
1498                }
1499            }
1500            # If everything works, add the value data to the filtering hash.
1501            if ($okValues) {
1502                $data{'HasValueFor(value)'} = \@valuePatterns;
1503            }
1504        }
1505        # Create some lists to contain the filter fragments and parameter values.
1506      my @filter = ();      my @filter = ();
1507      my @parms = ();      my @parms = ();
1508      # This next loop goes through the different fields that can be specified in the      # This next loop goes through the different fields that can be specified in the
1509      # parameter list and generates filters for each.      # parameter list and generates filters for each. The %data hash that we built above
1510        # contains all the necessary information to do this.
1511      for my $field (keys %data) {      for my $field (keys %data) {
1512          # Accumulate filter information for this field. We will OR together all the          # Accumulate filter information for this field. We will OR together all the
1513          # elements accumulated to create the final result.          # elements accumulated to create the final result.
# Line 1129  Line 1535 
1535                          push @fieldFilter, "$field = ?";                          push @fieldFilter, "$field = ?";
1536                          push @parms, $pattern;                          push @parms, $pattern;
1537                      } else {                      } else {
1538                          # Here we have a generate request, so we will use the LIKE operator to                          # Here we have a generic request, so we will use the LIKE operator to
1539                          # filter the field to this value pattern.                          # filter the field to this value pattern.
1540                          push @fieldFilter, "$field LIKE ?";                          push @fieldFilter, "$field LIKE ?";
1541                          # We must convert the pattern value to an SQL match pattern. First                          # We must convert the pattern value to an SQL match pattern. First
# Line 1151  Line 1557 
1557      # Now @filter contains one or more filter strings and @parms contains the parameter      # Now @filter contains one or more filter strings and @parms contains the parameter
1558      # values to bind to them.      # values to bind to them.
1559      my $actualFilter = join(" AND ", @filter);      my $actualFilter = join(" AND ", @filter);
1560      # Declare the return variable.      # Insure we have at least one filter.
1561      my @retVal = ();      if (! $actualFilter) {
1562      # Get the number of value sections we have to match.          Confess("No filter specified in GetAttributes query.");
1563      my $sectionCount = scalar(@values);      }
1564      # Now we're ready to make our query.      # Now we're ready to make our query.
1565      my $query = $self->Get(['HasValueFor'], $actualFilter, \@parms);      my $query = $self->Get(['HasValueFor'], $actualFilter, \@parms);
1566      # Loop through the assignments found.      # Format the results.
1567      while (my $row = $query->Fetch()) {      my @retVal = $self->_QueryResults($query, @values);
         # Get the current row's data.  
         my ($id, $key, $valueString) = $row->Values(['HasValueFor(to-link)', 'HasValueFor(from-link)',  
                                                       'HasValueFor(value)']);  
         # Break the value into sections.  
         my @sections = split($self->{splitter}, $valueString);  
         # Match each section against the incoming values. We'll assume we're  
         # okay unless we learn otherwise.  
         my $matching = 1;  
         for (my $i = 0; $i < $sectionCount && $matching; $i++) {  
             # We need to check to see if this section is generic.  
             my $value = $values[$i];  
             Trace("Current value pattern is \"$value\".") if T(4);  
             if (substr($value, -1, 1) eq '%') {  
                 Trace("Generic match used.") if T(4);  
                 # Here we have a generic match.  
                 my $matchLen = length($values[$i] - 1);  
                 $matching = substr($sections[$i], 0, $matchLen) eq  
                             substr($values[$i], 0, $matchLen);  
             } elsif ($value =~ m#^/(.+)/[a-z]*$#) {  
                 Trace("Regular expression detected.") if T(4);  
                 # Here we have a regular expression match.  
                 my $section = $sections[$i];  
                 $matching = eval("\$section =~ $value");  
             } else {  
                 # Here we have a strict match.  
                 Trace("Strict match used.") if T(4);  
                 $matching = ($sections[$i] eq $values[$i]);  
             }  
         }  
         # If we match, output this row to the return list.  
         if ($matching) {  
             push @retVal, [$id, $key, @sections];  
         }  
     }  
1568      # Return the rows found.      # Return the rows found.
1569      return @retVal;      return @retVal;
1570  }  }
1571    
1572  =head3 AddAttribute  =head3 AddAttribute
1573    
1574  C<< $attrDB->AddAttribute($objectID, $key, @values); >>      $attrDB->AddAttribute($objectID, $key, @values);
1575    
1576  Add an attribute key/value pair to an object. This method cannot add a new key, merely  Add an attribute key/value pair to an object. This method cannot add a new key, merely
1577  add a value to an existing key. Use L</StoreAttributeKey> to create a new key.  add a value to an existing key. Use L</StoreAttributeKey> to create a new key.
# Line 1238  Line 1610 
1610          # Okay, now we have some reason to believe we can do this. Form the values          # Okay, now we have some reason to believe we can do this. Form the values
1611          # into a scalar.          # into a scalar.
1612          my $valueString = join($self->{splitter}, @values);          my $valueString = join($self->{splitter}, @values);
1613            # Split up the key.
1614            my ($realKey, $subKey) = $self->SplitKey($key);
1615          # Connect the object to the key.          # Connect the object to the key.
1616          $self->InsertObject('HasValueFor', { 'from-link' => $key,          $self->InsertObject('HasValueFor', { 'from-link' => $realKey,
1617                                               'to-link'   => $objectID,                                               'to-link'   => $objectID,
1618                                                 'subkey'    => $subKey,
1619                                               'value'     => $valueString,                                               'value'     => $valueString,
1620                                         });                                         });
1621      }      }
# Line 1250  Line 1625 
1625    
1626  =head3 DeleteAttribute  =head3 DeleteAttribute
1627    
1628  C<< $attrDB->DeleteAttribute($objectID, $key, @values); >>      $attrDB->DeleteAttribute($objectID, $key, @values);
1629    
1630  Delete the specified attribute key/value combination from the database.  Delete the specified attribute key/value combination from the database.
1631    
# Line 1281  Line 1656 
1656          Confess("No object ID specified for DeleteAttribute call.");          Confess("No object ID specified for DeleteAttribute call.");
1657      } elsif (! defined($key)) {      } elsif (! defined($key)) {
1658          Confess("No attribute key specified for DeleteAttribute call.");          Confess("No attribute key specified for DeleteAttribute call.");
1659      } elsif (scalar(@values) == 0) {      } else {
1660          # Here we erase the entire key.          # Split the key into the real key and the subkey.
1661          $self->EraseAttribute($key);          my ($realKey, $subKey) = $self->SplitKey($key);
1662            if ($subKey eq '' && scalar(@values) == 0) {
1663                # Here we erase the entire key for this object.
1664                $self->DeleteRow('HasValueFor', $key, $objectID);
1665      } else {      } else {
1666          # Here we erase the matching values.          # Here we erase the matching values.
1667          my $valueString = join($self->{splitter}, @values);          my $valueString = join($self->{splitter}, @values);
1668          $self->DeleteRow('HasValueFor', $key, $objectID, { value => $valueString });              $self->DeleteRow('HasValueFor', $realKey, $objectID,
1669                                 { subkey => $subKey, value => $valueString });
1670            }
1671      }      }
1672      # Return a one. This is for backward compatability.      # Return a one. This is for backward compatability.
1673      return 1;      return 1;
1674  }  }
1675    
1676    =head3 DeleteMatchingAttributes
1677    
1678        my @deleted = $attrDB->DeleteMatchingAttributes($objectID, $key, @values);
1679    
1680    Delete all attributes that match the specified criteria. This is equivalent to
1681    calling L</GetAttributes> and then invoking L</DeleteAttribute> for each
1682    row found.
1683    
1684    =over 4
1685    
1686    =item objectID
1687    
1688    ID of object whose attributes are to be deleted. If the attributes for multiple
1689    objects are to be deleted, this parameter can be specified as a list reference. If
1690    attributes are to be deleted for all objects, specify C<undef> or an empty string.
1691    Finally, you can delete attributes for a range of object IDs by putting a percent
1692    sign (C<%>) at the end.
1693    
1694    =item key
1695    
1696    Attribute key name. A value of C<undef> or an empty string will match all
1697    attribute keys. If the values are to be deletedfor multiple keys, this parameter can be
1698    specified as a list reference. Finally, you can delete attributes for a range of
1699    keys by putting a percent sign (C<%>) at the end.
1700    
1701    =item values
1702    
1703    List of the desired attribute values, section by section. If C<undef>
1704    or an empty string is specified, all values in that section will match. A
1705    generic match can be requested by placing a percent sign (C<%>) at the end.
1706    In that case, all values that match up to and not including the percent sign
1707    will match. You may also specify a regular expression enclosed
1708    in slashes. All values that match the regular expression will be deleted. For
1709    performance reasons, only values have this extra capability.
1710    
1711    =item RETURN
1712    
1713    Returns a list of tuples for the attributes that were deleted, in the
1714    same form as L</GetAttributes>.
1715    
1716    =back
1717    
1718    =cut
1719    
1720    sub DeleteMatchingAttributes {
1721        # Get the parameters.
1722        my ($self, $objectID, $key, @values) = @_;
1723        # Get the matching attributes.
1724        my @retVal = $self->GetAttributes($objectID, $key, @values);
1725        # Loop through the attributes, deleting them.
1726        for my $tuple (@retVal) {
1727            $self->DeleteAttribute(@{$tuple});
1728        }
1729        # Log this operation.
1730        my $count = @retVal;
1731        $self->LogOperation("Mass Delete", $key, "$count matching attributes deleted.");
1732        # Return the deleted attributes.
1733        return @retVal;
1734    }
1735    
1736  =head3 ChangeAttribute  =head3 ChangeAttribute
1737    
1738  C<< $attrDB->ChangeAttribute($objectID, $key, \@oldValues, \@newValues); >>      $attrDB->ChangeAttribute($objectID, $key, \@oldValues, \@newValues);
1739    
1740  Change the value of an attribute key/value pair for an object.  Change the value of an attribute key/value pair for an object.
1741    
# Line 1347  Line 1787 
1787    
1788  =head3 EraseAttribute  =head3 EraseAttribute
1789    
1790  C<< $attrDB->EraseAttribute($key); >>      $attrDB->EraseAttribute($key);
1791    
1792  Erase all values for the specified attribute key. This does not remove the  Erase all values for the specified attribute key. This does not remove the
1793  key from the database; it merely removes all the values.  key from the database; it merely removes all the values.
# Line 1356  Line 1796 
1796    
1797  =item key  =item key
1798    
1799  Key to erase.  Key to erase. This must be a real key; that is, it cannot have a subkey
1800    component.
1801    
1802  =back  =back
1803    
# Line 1365  Line 1806 
1806  sub EraseAttribute {  sub EraseAttribute {
1807      # Get the parameters.      # Get the parameters.
1808      my ($self, $key) = @_;      my ($self, $key) = @_;
1809      # Delete everything connected to the key. The "keepRoot" option keeps the key in the      # Delete everything connected to the key.
1810      # datanase while deleting everything attached to it.      $self->Disconnect('HasValueFor', 'AttributeKey', $key);
1811      $self->Delete('AttributeKey', $key, keepRoot => 1);      # Log the operation.
1812        $self->LogOperation("Erase Data", $key);
1813      # Return a 1, for backward compatability.      # Return a 1, for backward compatability.
1814      return 1;      return 1;
1815  }  }
1816    
1817  =head3 GetAttributeKeys  =head3 GetAttributeKeys
1818    
1819  C<< my @keyList = $attrDB->GetAttributeKeys($groupName); >>      my @keyList = $attrDB->GetAttributeKeys($groupName);
1820    
1821  Return a list of the attribute keys for a particular group.  Return a list of the attribute keys for a particular group.
1822    
# Line 1402  Line 1844 
1844      return sort @groups;      return sort @groups;
1845  }  }
1846    
1847    =head3 QueryAttributes
1848    
1849        my @attributeData = $ca->QueryAttributes($filter, $filterParms);
1850    
1851    Return the attribute data based on an SQL filter clause. In the filter clause,
1852    the name C<$object> should be used for the object ID, C<$key> should be used for
1853    the key name, C<$subkey> for the subkey value, and C<$value> for the value field.
1854    
1855    =over 4
1856    
1857    =item filter
1858    
1859    Filter clause in the standard ERDB format, except that the field names are C<$object> for
1860    the object ID field, C<$key> for the key name field, C<$subkey> for the subkey field,
1861    and C<$value> for the value field. This abstraction enables us to hide the details of
1862    the database construction from the user.
1863    
1864    =item filterParms
1865    
1866    Parameters for the filter clause.
1867    
1868    =item RETURN
1869    
1870    Returns a list of tuples. Each tuple consists of an object ID, a key (with optional subkey), and
1871    one or more attribute values.
1872    
1873    =back
1874    
1875    =cut
1876    
1877    # This hash is used to drive the substitution process.
1878    my %AttributeParms = (object => 'HasValueFor(to-link)',
1879                          key    => 'HasValueFor(from-link)',
1880                          subkey => 'HasValueFor(subkey)',
1881                          value  => 'HasValueFor(value)');
1882    
1883    sub QueryAttributes {
1884        # Get the parameters.
1885        my ($self, $filter, $filterParms) = @_;
1886        # Declare the return variable.
1887        my @retVal = ();
1888        # Make sue we have filter parameters.
1889        my $realParms = (defined($filterParms) ? $filterParms : []);
1890        # Create the query by converting the filter.
1891        my $realFilter = $filter;
1892        for my $name (keys %AttributeParms) {
1893            $realFilter =~ s/\$$name/$AttributeParms{$name}/g;
1894        }
1895        my $query = $self->Get(['HasValueFor'], $realFilter, $realParms);
1896        # Loop through the results, forming the output attribute tuples.
1897        while (my $result = $query->Fetch()) {
1898            # Get the four values from this query result row.
1899            my ($objectID, $key, $subkey, $value) = $result->Values([$AttributeParms{object},
1900                                                                    $AttributeParms{key},
1901                                                                    $AttributeParms{subkey},
1902                                                                    $AttributeParms{value}]);
1903            # Combine the key and the subkey.
1904            my $realKey = ($subkey ? $key . $self->{splitter} . $subkey : $key);
1905            # Split the value.
1906            my @values = split $self->{splitter}, $value;
1907            # Output the result.
1908            push @retVal, [$objectID, $realKey, @values];
1909        }
1910        # Return the result.
1911        return @retVal;
1912    }
1913    
1914    =head2 Key and ID Manipulation Methods
1915    
1916    =head3 ParseID
1917    
1918        my ($type, $id) = CustomAttributes::ParseID($idValue);
1919    
1920    Determine the type and object ID corresponding to an ID value from the attribute database.
1921    Most ID values consist of a type name and an ID, separated by a colon (e.g. C<Family:aclame|cluster10>);
1922    however, Genomes, Features, and Subsystems are not stored with a type name, so we need to
1923    deduce the type from the ID value structure.
1924    
1925    The theory here is that you can plug the ID and type directly into a Sprout database method, as
1926    follows
1927    
1928        my ($type, $id) = CustomAttributes::ParseID($attrList[$num]->[0]);
1929        my $target = $sprout->GetEntity($type, $id);
1930    
1931    =over 4
1932    
1933    =item idValue
1934    
1935    ID value taken from the attribute database.
1936    
1937    =item RETURN
1938    
1939    Returns a two-element list. The first element is the type of object indicated by the ID value,
1940    and the second element is the actual object ID.
1941    
1942    =back
1943    
1944    =cut
1945    
1946    sub ParseID {
1947        # Get the parameters.
1948        my ($idValue) = @_;
1949        # Declare the return variables.
1950        my ($type, $id);
1951        # Parse the incoming ID. We first check for the presence of an entity name. Entity names
1952        # can only contain letters, which helps to insure typed object IDs don't collide with
1953        # subsystem names (which are untyped).
1954        if ($idValue =~ /^([A-Za-z]+):(.+)/) {
1955            # Here we have a typed ID.
1956            ($type, $id) = ($1, $2);
1957            # Fix the case sensitivity on PDB IDs.
1958            if ($type eq 'PDB') { $id = lc $id; }
1959        } elsif ($idValue =~ /fig\|/) {
1960            # Here we have a feature ID.
1961            ($type, $id) = (Feature => $idValue);
1962        } elsif ($idValue =~ /\d+\.\d+/) {
1963            # Here we have a genome ID.
1964            ($type, $id) = (Genome => $idValue);
1965        } else {
1966            # The default is a subsystem ID.
1967            ($type, $id) = (Subsystem => $idValue);
1968        }
1969        # Return the results.
1970        return ($type, $id);
1971    }
1972    
1973    =head3 FormID
1974    
1975        my $idValue = CustomAttributes::FormID($type, $id);
1976    
1977    Convert an object type and ID pair into an object ID string for the attribute system. Subsystems,
1978    genomes, and features are stored in the database without type information, but all other object IDs
1979    must be prefixed with the object type.
1980    
1981    =over 4
1982    
1983    =item type
1984    
1985    Relevant object type.
1986    
1987    =item id
1988    
1989    ID of the object in question.
1990    
1991    =item RETURN
1992    
1993    Returns a string that will be recognized as an object ID in the attribute database.
1994    
1995    =back
1996    
1997    =cut
1998    
1999    sub FormID {
2000        # Get the parameters.
2001        my ($type, $id) = @_;
2002        # Declare the return variable.
2003        my $retVal;
2004        # Compute the ID string from the type.
2005        if (grep { $type eq $_ } qw(Feature Genome Subsystem)) {
2006            $retVal = $id;
2007        } else {
2008            $retVal = "$type:$id";
2009        }
2010        # Return the result.
2011        return $retVal;
2012    }
2013    
2014    =head3 GetTargetObject
2015    
2016        my $object = CustomAttributes::GetTargetObject($erdb, $idValue);
2017    
2018    Return the database object corresponding to the specified attribute object ID. The
2019    object type associated with the ID value must correspond to an entity name in the
2020    specified database.
2021    
2022    =over 4
2023    
2024    =item erdb
2025    
2026    B<ERDB> object for accessing the target database.
2027    
2028    =item idValue
2029    
2030    ID value retrieved from the attribute database.
2031    
2032    =item RETURN
2033    
2034    Returns a B<ERDBObject> for the attribute value's target object.
2035    
2036    =back
2037    
2038    =cut
2039    
2040    sub GetTargetObject {
2041        # Get the parameters.
2042        my ($erdb, $idValue) = @_;
2043        # Declare the return variable.
2044        my $retVal;
2045        # Get the type and ID for the target object.
2046        my ($type, $id) = ParseID($idValue);
2047        # Plug them into the GetEntity method.
2048        $retVal = $erdb->GetEntity($type, $id);
2049        # Return the resulting object.
2050        return $retVal;
2051    }
2052    
2053    =head3 SplitKey
2054    
2055        my ($realKey, $subKey) = $ca->SplitKey($key);
2056    
2057    Split an external key (that is, one passed in by a caller) into the real key and the sub key.
2058    The real and sub keys are separated by a splitter value (usually C<::>). If there is no splitter,
2059    then the sub key is presumed to be an empty string.
2060    
2061    =over 4
2062    
2063    =item key
2064    
2065    Incoming key to be split.
2066    
2067    =item RETURN
2068    
2069    Returns a two-element list, the first element of which is the real key and the second element of
2070    which is the sub key.
2071    
2072    =back
2073    
2074    =cut
2075    
2076    sub SplitKey {
2077        # Get the parameters.
2078        my ($self, $key) = @_;
2079        # Do the split.
2080        my ($realKey, $subKey) = split($self->{splitter}, $key, 2);
2081        # Insure the subkey has a value.
2082        if (! defined $subKey) {
2083            $subKey = '';
2084        }
2085        # Return the results.
2086        return ($realKey, $subKey);
2087    }
2088    
2089    =head3 JoinKey
2090    
2091        my $key = $ca->JoinKey($realKey, $subKey);
2092    
2093    Join a real key and a subkey together to make an external key. The external key is the attribute key
2094    used by the caller. The real key and the subkey are how the keys are represented in the database. The
2095    real key is the key to the B<AttributeKey> entity. The subkey is an attribute of the B<HasValueFor>
2096    relationship.
2097    
2098    =over 4
2099    
2100    =item realKey
2101    
2102    The real attribute key.
2103    
2104    =item subKey
2105    
2106    The subordinate portion of the attribute key.
2107    
2108    =item RETURN
2109    
2110    Returns a single string representing both keys.
2111    
2112    =back
2113    
2114    =cut
2115    
2116    sub JoinKey {
2117        # Get the parameters.
2118        my ($self, $realKey, $subKey) = @_;
2119        # Declare the return variable.
2120        my $retVal;
2121        # Check for a subkey.
2122        if ($subKey eq '') {
2123            # No subkey, so the real key is the key.
2124            $retVal = $realKey;
2125        } else {
2126            # Subkey found, so the two pieces must be joined by a splitter.
2127            $retVal = "$realKey$self->{splitter}$subKey";
2128        }
2129        # Return the result.
2130        return $retVal;
2131    }
2132    
2133    
2134    =head3 AttributeTable
2135    
2136        my $tableHtml = CustomAttributes::AttributeTable($cgi, @attrList);
2137    
2138    Format the attribute data into an HTML table.
2139    
2140    =over 4
2141    
2142    =item cgi
2143    
2144    CGI query object used to generate the HTML
2145    
2146    =item attrList
2147    
2148    List of attribute results, in the format returned by the L</GetAttributes> or
2149    L</QueryAttributes> methods.
2150    
2151    =item RETURN
2152    
2153    Returns an HTML table displaying the attribute keys and values.
2154    
2155    =back
2156    
2157    =cut
2158    
2159    sub AttributeTable {
2160        # Get the parameters.
2161        my ($cgi, @attrList) = @_;
2162        # Accumulate the table rows.
2163        my @html = ();
2164        for my $attrData (@attrList) {
2165            # Format the object ID and key.
2166            my @columns = map { CGI::escapeHTML($_) } @{$attrData}[0,1];
2167            # Now we format the values. These remain unchanged unless one of them is a URL.
2168            my $lastValue = scalar(@{$attrData}) - 1;
2169            push @columns, map { $_ =~ /^http:/ ? $cgi->a({ href => $_ }, $_) : $_ } @{$attrData}[2 .. $lastValue];
2170            # Assemble the values into a table row.
2171            push @html, $cgi->Tr($cgi->td(\@columns));
2172        }
2173        # Format the table in the return variable.
2174        my $retVal = $cgi->table({ border => 2 }, $cgi->Tr($cgi->th(['Object', 'Key', 'Values'])), @html);
2175        # Return it.
2176        return $retVal;
2177    }
2178  1;  1;

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.32

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3