[Bio] / Sprout / CustomAttributes.pm Repository:
ViewVC logotype

Diff of /Sprout/CustomAttributes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11, Wed Nov 29 20:28:52 2006 UTC revision 1.34, Wed Aug 13 15:47:32 2008 UTC
# Line 8  Line 8 
8      use strict;      use strict;
9      use Tracer;      use Tracer;
10      use ERDBLoad;      use ERDBLoad;
11        use Stats;
12        use Time::HiRes qw(time);
13        use FIGRules;
14    
15  =head1 Custom SEED Attribute Manager  =head1 Custom SEED Attribute Manager
16    
# Line 27  Line 30 
30  The actual attribute values are stored as a relationship between the attribute  The actual attribute values are stored as a relationship between the attribute
31  keys and the objects. There can be multiple values for a single key/object pair.  keys and the objects. There can be multiple values for a single key/object pair.
32    
33    =head3 Object IDs
34    
35    The object ID is normally represented as
36    
37        I<type>:I<id>
38    
39    where I<type> is the object type (C<Role>, C<Coupling>, etc.) and I<id> is
40    the actual object ID. Note that the object type must consist of only upper- and
41    lower-case letters! Thus, C<GenomeGroup> is a valid object type, but
42    C<genome_group> is not. Given that restriction, the object ID
43    
44        Family:aclame|cluster10
45    
46    would represent the FIG family C<aclame|cluster10>. For historical reasons,
47    there are three exceptions: subsystems, genomes, and features do not need
48    a type. So, for PEG 3361 of Streptomyces coelicolor A3(2), you simply code
49    
50        fig|100226.1.peg.3361
51    
52    The methods L</ParseID> and L</FormID> can be used to make this all seem
53    more consistent. Given any object ID string, L</ParseID> will convert it to an
54    object type and ID, and given any object type and ID, L</FormID> will
55    convert it to an object ID string. The attribute database is pretty
56    freewheeling about what it will allow for an ID; however, for best
57    results, the type should match an entity type from a Sprout genetics
58    database. If this rule is followed, then the database object
59    corresponding to an ID in the attribute database could be retrieved using
60    L</GetTargetObject> method.
61    
62        my $object = CustomAttributes::GetTargetObject($sprout, $idValue);
63    
64    =head3 Retrieval and Logging
65    
66  The full suite of ERDB retrieval capabilities is provided. In addition,  The full suite of ERDB retrieval capabilities is provided. In addition,
67  custom methods are provided specific to this application. To get all  custom methods are provided specific to this application. To get all
68  the values of the attribute C<essential> in a specified B<Feature>, you  the values of the attribute C<essential> in a specified B<Feature>, you
# Line 36  Line 72 
72    
73  where I<$fid> contains the ID of the desired feature.  where I<$fid> contains the ID of the desired feature.
74    
75  New attribute keys must be defined before they can be used. A web interface  Keys can be split into two pieces using the splitter value defined in the
76  is provided for this purpose.  constructor (the default is C<::>). The first piece of the key is called
77    the I<real key>. This portion of the key must be defined using the
78    web interface (C<Attributes.cgi>). The second portion of the key is called
79    the I<sub key>, and can take any value.
80    
81    Major attribute activity is recorded in a log (C<attributes.log>) in the
82    C<$FIG_Config::var> directory. The log reports the user name, time, and
83    the details of the operation. The user name will almost always be unknown,
84    the exception being when it is specified in this object's constructor
85    (see L</new>).
86    
87  =head2 FIG_Config Parameters  =head2 FIG_Config Parameters
88    
# Line 81  Line 126 
126  functions as data to the attribute management process, so if the data is  functions as data to the attribute management process, so if the data is
127  moved, this file must go with it.  moved, this file must go with it.
128    
129    =item attr_default_table
130    
131    Name of the default relationship for attribute values. If not present,
132    C<HasValueFor> is used.
133    
134  =back  =back
135    
136  =head2 Public Methods  =head2 Public Methods
137    
138  =head3 new  =head3 new
139    
140  C<< my $attrDB = CustomAttributes->new($splitter); >>      my $attrDB = CustomAttributes->new(%options);
141    
142  Construct a new CustomAttributes object.  Construct a new CustomAttributes object. The following options are
143    supported.
144    
145  =over 4  =over 4
146    
147  =item splitter  =item splitter
148    
149  Value to be used to split attribute values into sections in the  Value to be used to split attribute values into sections in the
150  L</Fig Replacement Methods>. The default is a double colon C<::>.  L</Fig Replacement Methods>. The default is a double colon C<::>,
151  If you do not use the replacement methods, you do not need to  and should only be overridden in extreme circumstances.
152  worry about this parameter.  
153    =item user
154    
155    Name of the current user. This will appear in the attribute log.
156    
157  =back  =back
158    
# Line 106  Line 160 
160    
161  sub new {  sub new {
162      # Get the parameters.      # Get the parameters.
163      my ($class, $splitter) = @_;      my ($class, %options) = @_;
164        # Get the name ofthe default table.
165      # Connect to the database.      # Connect to the database.
166      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,      my $dbh = DBKernel->new($FIG_Config::attrDbms, $FIG_Config::attrDbName,
167                              $FIG_Config::attrUser, $FIG_Config::attrPass,                              $FIG_Config::attrUser, $FIG_Config::attrPass,
# Line 116  Line 171 
171      my $xmlFileName = $FIG_Config::attrDBD;      my $xmlFileName = $FIG_Config::attrDBD;
172      my $retVal = ERDB::new($class, $dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
173      # Store the splitter value.      # Store the splitter value.
174      $retVal->{splitter} = (defined($splitter) ? $splitter : '::');      $retVal->{splitter} = $options{splitter} || '::';
175        # Store the user name.
176        $retVal->{user} = $options{user} || '<unknown>';
177        Trace("User $retVal->{user} selected for attribute object.") if T(3);
178        # Compute the default value table name. If it's not overridden, the
179        # default is HasValueFor.
180        $retVal->{defaultRel} = $FIG_Config::attr_default_table || 'HasValueFor';
181      # Return the result.      # Return the result.
182      return $retVal;      return $retVal;
183  }  }
184    
185  =head3 StoreAttributeKey  =head3 StoreAttributeKey
186    
187  C<< $attrDB->StoreAttributeKey($attributeName, $type, $notes, \@groups); >>      $attrDB->StoreAttributeKey($attributeName, $notes, \@groups, $table);
188    
189  Create or update an attribute for the database.  Create or update an attribute for the database.
190    
# Line 131  Line 192 
192    
193  =item attributeName  =item attributeName
194    
195  Name of the attribute. It must be a valid ERDB field name, consisting entirely of  Name of the attribute (the real key). If it does not exist already, it will be created.
 letters, digits, and hyphens, with a letter at the beginning. If it does not  
 exist already, it will be created.  
   
 =item type  
   
 Data type of the attribute. This must be a valid ERDB data type name.  
196    
197  =item notes  =item notes
198    
# Line 148  Line 203 
203  Reference to a list of the groups to which the attribute should be associated.  Reference to a list of the groups to which the attribute should be associated.
204  This will replace any groups to which the attribute is currently attached.  This will replace any groups to which the attribute is currently attached.
205    
206    =item table
207    
208    The name of the relationship in which the attribute's values are to be stored.
209    If empty or undefined, the default relationship (usually C<HasValueFor>) will be
210    assumed.
211    
212  =back  =back
213    
214  =cut  =cut
215    
216  sub StoreAttributeKey {  sub StoreAttributeKey {
217      # Get the parameters.      # Get the parameters.
218      my ($self, $attributeName, $type, $notes, $groups) = @_;      my ($self, $attributeName, $notes, $groups, $table) = @_;
219      # Declare the return variable.      # Declare the return variable.
220      my $retVal;      my $retVal;
221        # Default the table name.
222        if (! $table) {
223            $table = $self->{defaultRel};
224        }
225      # Get the data type hash.      # Get the data type hash.
226      my %types = ERDB::GetDataTypes();      my %types = ERDB::GetDataTypes();
227      # Validate the initial input values.      # Validate the initial input values.
228      if (! ERDB::ValidateFieldName($attributeName)) {      if ($attributeName =~ /$self->{splitter}/) {
229          Confess("Invalid attribute name \"$attributeName\" specified.");          Confess("Invalid attribute name \"$attributeName\" specified.");
230      } elsif (! $notes || length($notes) < 25) {      } elsif (! $notes) {
231          Confess("Missing or incomplete description for $attributeName.");          Confess("Missing description for $attributeName.");
232      } elsif (! exists $types{$type}) {      } elsif (! grep { $_ eq $table } $self->GetConnectingRelationships('AttributeKey')) {
233          Confess("Invalid data type \"$type\" for $attributeName.");          Confess("Invalid relationship name \"$table\" specified as a custom attribute table.");
234      } else {      } else {
235            # Create a variable to hold the action to be displayed for the log (Add or Update).
236            my $action;
237          # Okay, we're ready to begin. See if this key exists.          # Okay, we're ready to begin. See if this key exists.
238          my $attribute = $self->GetEntity('AttributeKey', $attributeName);          my $attribute = $self->GetEntity('AttributeKey', $attributeName);
239          if (defined($attribute)) {          if (defined($attribute)) {
240              # It does, so we do an update.              # It does, so we do an update.
241                $action = "Update Key";
242              $self->UpdateEntity('AttributeKey', $attributeName,              $self->UpdateEntity('AttributeKey', $attributeName,
243                                  { description => $notes, 'data-type' => $type });                                  { description => $notes,
244                                      'relationship-name' => $table});
245              # Detach the key from its current groups.              # Detach the key from its current groups.
246              $self->Disconnect('IsInGroup', 'AttributeKey', $attributeName);              $self->Disconnect('IsInGroup', 'AttributeKey', $attributeName);
247          } else {          } else {
248              # It doesn't, so we do an insert.              # It doesn't, so we do an insert.
249                $action = "Insert Key";
250              $self->InsertObject('AttributeKey', { id => $attributeName,              $self->InsertObject('AttributeKey', { id => $attributeName,
251                                  description => $notes, 'data-type' => $type });                                  description => $notes,
252                                    'relationship-name' => $table});
253          }          }
254          # Attach the key to the specified groups. (We presume the groups already          # Attach the key to the specified groups. (We presume the groups already
255          # exist.)          # exist.)
# Line 186  Line 257 
257              $self->InsertObject('IsInGroup', { 'from-link' => $attributeName,              $self->InsertObject('IsInGroup', { 'from-link' => $attributeName,
258                                                 'to-link'   => $group });                                                 'to-link'   => $group });
259          }          }
260            # Log the operation.
261            $self->LogOperation($action, $attributeName, "Group list is " . join(" ", @{$groups}));
262      }      }
263  }  }
264    
 =head3 LoadAttributeKey  
   
 C<< my $stats = $attrDB->LoadAttributeKey($keyName, $fh, $keyCol, $dataCol, %options); >>  
   
 Load the specified attribute from the specified file. The file should be a  
 tab-delimited file with internal tab and new-line characters escaped. This is  
 the typical TBL-style file used by most FIG applications. One of the columns  
 in the input file must contain the appropriate object id value and the other the  
 corresponding attribute value.  
   
 =over 4  
   
 =item keyName  
   
 Key of the attribute to load.  
   
 =item fh  
   
 Open file handle for the input file.  
   
 =item idCol  
   
 Index (0-based) of the column containing the ID field. The ID field should  
 contain the ID of an instance of the named entity.  
   
 =item dataCol  
   
 Index (0-based) of the column containing the data value field.  
   
 =item options  
   
 Hash specifying the options for this load.  
   
 =item RETURN  
   
 Returns a statistics object for the load process.  
   
 =back  
   
 The available options are as follows.  
   
 =over 4  
   
 =item erase  
   
 If TRUE, the key's values will all be erased before loading. (Doing so  
 makes for a faster load.)  
   
 =back  
   
 =cut  
   
 sub LoadAttributeKey {  
     # Get the parameters.  
     my ($self, $keyName, $fh, $idCol, $dataCol, %options) = @_;  
     # Create the return variable.  
     my $retVal = Stats->new("lineIn", "shortLine", "newObject");  
     # Compute the minimum number of fields required in each input line.  
     my $minCols = ($idCol < $dataCol ? $idCol : $idCol) + 1;  
     # Insure the attribute key exists.  
     my $found = $self->GetEntity('AttributeKey', $keyName);  
     if (! defined $found) {  
         Confess("Attribute key \"$keyName\" not found in database.");  
     } else {  
         # Erase the key's current values.  
         $self->EraseAttribute($keyName);  
         # Save a list of the object IDs we need to add.  
         my %objectIDs = ();  
         # Loop through the input file.  
         while (! eof $fh) {  
             # Get the next line of the file.  
             my @fields = Tracer::GetLine($fh);  
             $retVal->Add(lineIn => 1);  
             # Now we need to validate the line.  
             if (scalar(@fields) < $minCols) {  
                 $retVal->Add(shortLine => 1);  
             } else {  
                 # It's valid, so get the ID and value.  
                 my ($id, $value) = ($fields[$idCol], $fields[$dataCol]);  
                 # Denote we're using this input line.  
                 $retVal->Add(lineUsed => 1);  
                 # Now the fun begins. Find out if we need to create a target object record for this object ID.  
                 if (! exists $objectIDs{$id}) {  
                     my $found = $self->Exists('TargetObject', $id);  
                     if (! $found) {  
                         $self->InsertObject('TargetObject', { id => $id });  
                     }  
                     $objectIDs{$id} = 1;  
                     $retVal->Add(newObject => 1);  
                 }  
                 # Now we insert the attribute.  
                 $self->InsertObject('HasValueFor', { from => $keyName, to => $id, value => $value });  
                 $retVal->Add(newValue => 1);  
             }  
         }  
     }  
     # Return the statistics.  
     return $retVal;  
 }  
   
265    
266  =head3 DeleteAttributeKey  =head3 DeleteAttributeKey
267    
268  C<< my $stats = $attrDB->DeleteAttributeKey($attributeName); >>      my $stats = $attrDB->DeleteAttributeKey($attributeName);
269    
270  Delete an attribute from the custom attributes database.  Delete an attribute from the custom attributes database.
271    
# Line 315  Line 288 
288      my ($self, $attributeName) = @_;      my ($self, $attributeName) = @_;
289      # Delete the attribute key.      # Delete the attribute key.
290      my $retVal = $self->Delete('AttributeKey', $attributeName);      my $retVal = $self->Delete('AttributeKey', $attributeName);
291        # Log this operation.
292        $self->LogOperation("Delete Key", $attributeName, "Key will no longer be available for use by anyone.");
293      # Return the result.      # Return the result.
294      return $retVal;      return $retVal;
295    
# Line 322  Line 297 
297    
298  =head3 NewName  =head3 NewName
299    
300  C<< my $text = CustomAttributes::NewName(); >>      my $text = CustomAttributes::NewName();
301    
302  Return the string used to indicate the user wants to add a new attribute.  Return the string used to indicate the user wants to add a new attribute.
303    
# Line 332  Line 307 
307      return "(new)";      return "(new)";
308  }  }
309    
 =head3 ControlForm  
   
 C<< my $formHtml = $attrDB->ControlForm($cgi, $name, \%keys); >>  
   
 Return a form that can be used to control the creation and modification of  
 attributes. Only a subset of the attribute keys will be displayed, as  
 determined by the incoming list.  
   
 =over 4  
   
 =item cgi  
   
 CGI query object used to create HTML.  
   
 =item name  
   
 Name to give to the form. This should be unique for the web page.  
   
 =item keys  
   
 Reference to a hash mapping attribute keys to n-tuples. Each tuple will contain the  
 attribute's data type, its description, and a list of the groups in which it participates.  
   
 =item RETURN  
   
 Returns the HTML for a form that can be used to  submit instructions to the C<Attributes.cgi> script  
 for loading, creating, displaying, changing, or deleting an attribute. Note that only the form  
 controls are generated. The form tags are left to the caller.  
   
 =back  
   
 =cut  
   
 sub ControlForm {  
     # Get the parameters.  
     my ($self, $cgi, $name, $keys) = @_;  
     # Declare the return list.  
     my @retVal = ();  
     # We'll put the controls in a table. Nothing else ever seems to look nice.  
     push @retVal, $cgi->start_table({ border => 2, cellpadding => 2 });  
     # The first row is for selecting the field name.  
     push @retVal, $cgi->Tr($cgi->th("Select a Field"),  
                            $cgi->td($self->FieldMenu($cgi, 10, 'fieldName', $keys,  
                                                      new => 1,  
                                                      notes => "document.$name.notes.value",  
                                                      type => "document.$name.dataType.value",  
                                                      groups => "document.$name.groups")));  
     # Now we set up a dropdown for the data types. The values will be the  
     # data type names, and the labels will be the descriptions.  
     my %types = ERDB::GetDataTypes();  
     my %labelMap = map { $_ => $types{$_}->{notes} } keys %types;  
     my $typeMenu = $cgi->popup_menu(-name   => 'dataType',  
                                     -values => [sort keys %types],  
                                     -labels => \%labelMap,  
                                     -default => 'string');  
     # Allow the user to specify a new field name. This is required if the  
     # user has selected the "(new)" marker. We put a little scriptlet in here that  
     # selects the (new) marker when the user enters the field.  
     push @retVal, "<script language=\"javaScript\">";  
     my $fieldField = "document.$name.fieldName";  
     my $newName = "\"" . NewName() . "\"";  
     push @retVal, $cgi->Tr($cgi->th("New Field Name"),  
                            $cgi->td($cgi->textfield(-name => 'newName',  
                                                     -size => 30,  
                                                     -value => "",  
                                                     -onFocus => "setIfEmpty($fieldField, $newName);")),  
                                     );  
     push @retVal, $cgi->Tr($cgi->th("Data type"),  
                            $cgi->td($typeMenu));  
     # The next row is for the notes.  
     push @retVal, $cgi->Tr($cgi->th("Description"),  
                            $cgi->td($cgi->textarea(-name => 'notes',  
                                                    -rows => 6,  
                                                    -columns => 80))  
                           );  
     # Now we have the groups, which are implemented as a checkbox group.  
     my @groups = $self->GetGroups();  
     push @retVal, $cgi->Tr($cgi->th("Groups"),  
                            $cgi->td($cgi->checkbox_group(-name=>'groups',  
                                     -values=> \@groups))  
                           );  
     # If the user wants to upload new values for the field, then we have  
     # an upload file name and column indicators.  
     push @retVal, $cgi->Tr($cgi->th("Upload Values"),  
                            $cgi->td($cgi->filefield(-name => 'newValueFile',  
                                                     -size => 20) .  
                                     " Key&nbsp;" .  
                                     $cgi->textfield(-name => 'keyCol',  
                                                     -size => 3,  
                                                     -default => 0) .  
                                     " Value&nbsp;" .  
                                     $cgi->textfield(-name => 'valueCol',  
                                                     -size => 3,  
                                                     -default => 1)  
                                    ),  
                           );  
     # Now the three buttons: STORE, SHOW, and DELETE.  
     push @retVal, $cgi->Tr($cgi->th("&nbsp;"),  
                            $cgi->td({align => 'center'},  
                                     $cgi->submit(-name => 'Delete', -value => 'DELETE') . " " .  
                                     $cgi->submit(-name => 'Store',  -value => 'STORE') . " " .  
                                     $cgi->submit(-name => 'Show',   -value => 'SHOW')  
                                    )  
                           );  
     # Close the table and the form.  
     push @retVal, $cgi->end_table();  
     # Return the assembled HTML.  
     return join("\n", @retVal, "");  
 }  
   
310  =head3 LoadAttributesFrom  =head3 LoadAttributesFrom
311    
312  C<< my $stats = $attrDB->LoadAttributesFrom($fileName, %options); >>  C<< my $stats = $attrDB->LoadAttributesFrom($fileName, %options); >>
313    
314  Load attributes from the specified tab-delimited file. Each line of the file must  Load attributes from the specified tab-delimited file. Each line of the file must
315  contain an object ID in the first column, an attribute key name in the second  contain an object ID in the first column, an attribute key name in the second
316  column, and attribute values in the remaining columns. The attribute values will  column, and attribute values in the remaining columns. The attribute values must
317  be assembled into a single value using the splitter code.  be assembled into a single value using the splitter code. In addition, the key names may
318    contain a splitter. If this is the case, the portion of the key after the splitter is
319    treated as a subkey.
320    
321  =over 4  =over 4
322    
323  =item fileName  =item fileName
324    
325  Name of the file from which to load the attributes.  Name of the file from which to load the attributes, or an open handle for the file.
326    (This last enables the method to be used in conjunction with the CGI form upload
327    control.)
328    
329  =item options  =item options
330    
# Line 471  Line 340 
340    
341  =over 4  =over 4
342    
343    =item mode
344    
345    Loading mode. Legal values are C<low_priority> (which reduces the task priority
346    of the load) and C<concurrent> (which reduces the locking cost of the load). The
347    default is a normal load.
348    
349  =item append  =item append
350    
351  If TRUE, then the attributes will be appended to existing data; otherwise, the  If TRUE, then the attributes will be appended to existing data; otherwise, the
352  first time a key name is encountered, it will be erased.  first time a key name is encountered, it will be erased.
353    
354    =item archive
355    
356    If specified, the name of a file into which the incoming data should be saved.
357    If I<resume> is also specified, only the lines actually loaded will be put
358    into this file.
359    
360    =item objectType
361    
362    If specified, the specified object type will be prefixed to each object ID.
363    
364    =item resume
365    
366    If specified, key-value pairs already in the database will not be reinserted.
367    Specify a number to start checking after the specified number of lines and
368    then admit everything after the first line not yet loaded. Specify C<careful>
369    to check every single line. Specify C<none> to ignore this option. The default
370    is C<none>. So, if you believe that a previous load failed somewhere after 50000
371    lines, a resume value of C<50000> would skip 50000 lines in the file, then
372    check each line after that until it finds one not already in the database. The
373    first such line found and all lines after that will be loaded. On the other
374    hand, if you have a file of 100000 records, and some have been loaded and some
375    not, you would use the word C<careful>, so that every line would be checked before
376    it is inserted. A resume of C<0> will start checking the first line of the
377    input file and then begin loading once it finds a line not in the database.
378    
379    =item chunkSize
380    
381    Number of lines to load in each burst. The default is 10,000.
382    
383  =back  =back
384    
385  =cut  =cut
# Line 484  Line 388 
388      # Get the parameters.      # Get the parameters.
389      my ($self, $fileName, %options) = @_;      my ($self, $fileName, %options) = @_;
390      # Declare the return variable.      # Declare the return variable.
391      my $retVal = Stats->new('keys', 'values');      my $retVal = Stats->new('keys', 'values', 'linesOut');
392        # Initialize the timers.
393        my ($eraseTime, $archiveTime, $checkTime) = (0, 0, 0);
394      # Check for append mode.      # Check for append mode.
395      my $append = ($options{append} ? 1 : 0);      my $append = ($options{append} ? 1 : 0);
396        # Check for resume mode.
397        my $resume = (defined($options{resume}) ? $options{resume} : 'none');
398      # Create a hash of key names found.      # Create a hash of key names found.
399      my %keyHash = ();      my %keyHash = ();
400      # Open the file for input.      # Create a hash of table names to files. Most attributes go into the HasValueFor
401      my $fh = Open(undef, "<$fileName");      # table, but some are put into other tables. Each table name will be mapped
402        # to a sub-hash with keys "fileName" (output file for the table) and "count"
403        # (number of lines in the file).
404        my %tableHash = ();
405        # Compute the chunk size.
406        my $chunkSize = ($options{chunkSize} ? $options{chunkSize} : 10000);
407        # Open the file for input. Note we must anticipate the possibility of an
408        # open filehandle being passed in. This occurs when the user is submitting
409        # the load file over the web.
410        my $fh;
411        if (ref $fileName) {
412            Trace("Using file opened by caller.") if T(3);
413            $fh = $fileName;
414        } else {
415            Trace("Attributes will be loaded from $fileName.") if T(3);
416            $fh = Open(undef, "<$fileName");
417        }
418        # Trace the mode.
419        if (T(3)) {
420            if ($options{mode}) {
421                Trace("Mode is $options{mode}.")
422            } else {
423                Trace("No mode specified.")
424            }
425        }
426        # Now check to see if we need to archive.
427        my $ah;
428        if (exists $options{archive}) {
429            my $ah = Open(undef, ">$options{archive}");
430            Trace("Load file will be archived to $options{archive}.") if T(3);
431        }
432        # Insure we recover from errors.
433        eval {
434            # If we have a resume number, process it here.
435            if ($resume =~ /\d+/) {
436                Trace("Skipping $resume lines.") if T(2);
437                my $startTime = time();
438                # Skip the specified number of lines.
439                for (my $skipped = 0; ! eof($fh) && $skipped < $resume; $skipped++) {
440                    my $line = <$fh>;
441                    $retVal->Add(skipped => 1);
442                }
443                $checkTime += time() - $startTime;
444            }
445      # Loop through the file.      # Loop through the file.
446            Trace("Starting load.") if T(2);
447      while (! eof $fh) {      while (! eof $fh) {
448                # Read the current line.
449          my ($id, $key, @values) = Tracer::GetLine($fh);          my ($id, $key, @values) = Tracer::GetLine($fh);
450          $retVal->Add(linesIn => 1);          $retVal->Add(linesIn => 1);
451          # Do some validation.          # Do some validation.
452          if (! defined($id)) {              if (! $id) {
453              # We ignore blank lines.              # We ignore blank lines.
454              $retVal->Add(blankLines => 1);              $retVal->Add(blankLines => 1);
455                } elsif (substr($id, 0, 1) eq '#') {
456                    # A line beginning with a pound sign is a comment.
457                    $retVal->Add(comments => 1);
458          } elsif (! defined($key)) {          } elsif (! defined($key)) {
459              # An ID without a key is a serious error.              # An ID without a key is a serious error.
460              my $lines = $retVal->Ask('linesIn');              my $lines = $retVal->Ask('linesIn');
461              Confess("Line $lines in $fileName has no attribute key.");              Confess("Line $lines in $fileName has no attribute key.");
462                } elsif (! @values) {
463                    # A line with no values is not allowed.
464                    my $lines = $retVal->Ask('linesIn');
465                    Trace("Line $lines for key $key has no attribute values.") if T(1);
466                    $retVal->Add(skipped => 1);
467          } else {          } else {
468                    # Check to see if we need to fix up the object ID.
469                    if ($options{objectType}) {
470                        $id = "$options{objectType}:$id";
471                    }
472                    # The key contains a real part and an optional sub-part. We need the real part.
473                    my ($realKey, $subKey) = $self->SplitKey($key);
474              # Now we need to check for a new key.              # Now we need to check for a new key.
475              if (! exists $keyHash{$key}) {                  if (! exists $keyHash{$realKey}) {
476                  # This is a new key. Verify that it exists.                      my $keyObject = $self->GetEntity(AttributeKey => $realKey);
477                  if (! $self->Exists('AttributeKey', $key)) {                      if (! defined($keyObject)) {
478                            # Here the specified key does not exist, which is an error.
479                      my $line = $retVal->Ask('linesIn');                      my $line = $retVal->Ask('linesIn');
480                      Confess("Attribute \"$key\" on line $line of $fileName not found in database.");                          Confess("Attribute \"$realKey\" on line $line of $fileName not found in database.");
481                  } else {                  } else {
482                      # Make sure we know this is no longer a new key.                          # Make sure we know this is no longer a new key. We do this by putting
483                      $keyHash{$key} = 1;                          # its table name in the key hash.
484                            $keyHash{$realKey} = $keyObject->PrimaryValue('AttributeKey(relationship-name)');
485                      $retVal->Add(keys => 1);                      $retVal->Add(keys => 1);
486                      # If this is NOT append mode, erase the key.                          # If this is NOT append mode, erase the key. This does not delete the key
487                            # itself; it just clears out all the values.
488                      if (! $append) {                      if (! $append) {
489                          $self->EraseAttribute($key);                              my $startTime = time();
490                      }                              $self->EraseAttribute($realKey);
491                                $eraseTime += time() - $startTime;
492                                Trace("Attribute $realKey erased.") if T(3);
493                            }
494                        }
495                        Trace("Key $realKey found.") if T(3);
496                    }
497                    # If we're in resume mode, check to see if this insert is redundant.
498                    my $ok = 1;
499                    if ($resume ne 'none') {
500                        my $startTime = time();
501                        my $count = $self->GetAttributes($id, $key, @values);
502                        if ($count) {
503                            # Here the record is found, so we skip it.
504                            $ok = 0;
505                            $retVal->Add(skipped => 1);
506                        } else {
507                            # Here the record is not found. If we're in non-careful mode, we
508                            # stop resume checking at this point.
509                            if ($resume ne 'careful') {
510                                $resume = 'none';
511                            }
512                        }
513                        $checkTime += time() - $startTime;
514                    }
515                    if ($ok) {
516                        # We're in business. First, archive this row.
517                        if (defined $ah) {
518                            my $startTime = time();
519                            Tracer::PutLine($ah, [$id, $key, @values]);
520                            $archiveTime += time() - $startTime;
521                        }
522                        # We need to format the attribute data so it will work
523                        # as if it were a load file. This means we join the
524                        # values.
525                        my $valueString = join('::', @values);
526                        # Now we need to get access to the key's load file. Check for it in the
527                        # table hash.
528                        my $keyTable = $keyHash{$realKey};
529                        if (! exists $tableHash{$keyTable}) {
530                            # This is a new table, so we need to set it up. First, we get
531                            # a temporary file for it.
532                            my $tempFileName = FIGRules::GetTempFileName(sessionID => $$ . $keyTable,
533                                                                         extension => 'dtx');
534                            my $oh = Open(undef, ">$tempFileName");
535                            # Now we create its descriptor in the table hash.
536                            $tableHash{$keyTable} = {fileName => $tempFileName, handle => $oh, count => 0};
537                        }
538                        # Everything is all set up, so we put the value in the temporary file and
539                        # count it.
540                        my $tableData = $tableHash{$keyTable};
541                        my $startTime = time();
542                        Tracer::PutLine($tableData->{handle}, [$realKey, $id, $subKey, $valueString]);
543                        $archiveTime += time() - $startTime;
544                        $retVal->Add(linesOut => 1);
545                        $tableData->{count}++;
546                        # See if it's time to load a chunk.
547                        if ($tableData->{count} >= $chunkSize) {
548                            # We've filled a chunk, so it's time.
549                            close $tableData->{handle};
550                            $self->_LoadAttributeTable($keyTable, $tableData->{fileName}, $retVal);
551                            # Reset for the next chunk.
552                            $tableData->{count} = 0;
553                            $tableData->{handle} = Open(undef, ">$tableData->{fileName}");
554                  }                  }
555                  Trace("Key $key found.") if T(3);                  } else {
556                        # Here we skipped because of resume mode.
557                        $retVal->Add(resumeSkip => 1);
558              }              }
559              # Now we know the key is valid. Add this value.                  Trace($retVal->Ask('values') . " values processed.") if $retVal->Check(values => 1000) && T(3);
             $self->AddAttribute($id, $key, @values);  
             my $progress = $retVal->Add(values => 1);  
             Trace("$progress values loaded.") if T(3) && ($progress % 1000 == 0);  
   
560          }          }
561      }      }
562            # Now we close the archive file. Note we undefine the handle so the error methods know
563            # not to worry.
564            if (defined $ah) {
565                close $ah;
566                undef $ah;
567            }
568            # Now we load the residual from the temporary files (if any). This time we'll do an
569            # analyze as well.
570            for my $tableName (keys %tableHash) {
571                # Get the data for this table.
572                my $tableData = $tableHash{$tableName};
573                # Close the handle. ERDB will re-open it for input later.
574                close $tableData->{handle};
575                # Check to see if there's anything left to load.
576                if ($tableData->{count} > 0) {
577                    # Yes, load the data.
578                    $self->_LoadAttributeTable($tableName, $tableData->{fileName}, $retVal);
579                }
580                # Regardless of whether additional loading was required, we need to
581                # analyze the table for performance.
582                my $startTime = time();
583                $self->Analyze($tableName);
584                $retVal->Add(analyzeTime => time() - $startTime);
585            }
586            Trace("Attribute load successful.") if T(2);
587        };
588        # Check for an error.
589        if ($@) {
590            # Here we have an error. Display the error message.
591            my $message = $@;
592            Trace("Error during attribute load: $message") if T(0);
593            $retVal->AddMessage($message);
594            # Close the archive file if it's open. The archive file can sometimes provide
595            # clues as to what happened.
596            if (defined $ah) {
597                close $ah;
598            }
599        }
600        # Store the timers.
601        $retVal->Add(eraseTime   => $eraseTime);
602        $retVal->Add(archiveTime => $archiveTime);
603        $retVal->Add(checkTime   => $checkTime);
604      # Return the result.      # Return the result.
605      return $retVal;      return $retVal;
606  }  }
607    
608  =head3 BackupAllAttributes  =head3 BackupKeys
609    
610  C<< my $stats = $attrDB->BackupAllAttributes($fileName, %options); >>      my $stats = $attrDB->BackupKeys($fileName, %options);
611    
612  Backup all of the attributes to a file. The attributes will be stored in a  Backup the attribute key information from the attribute database.
 tab-delimited file suitable for reloading via L</LoadAttributesFrom>.  
613    
614  =over 4  =over 4
615    
616  =item fileName  =item fileName
617    
618  Name of the file to which the attribute data should be backed up.  Name of the output file.
619    
620  =item options  =item options
621    
622  Hash of options for the backup.  Options for modifying the backup process.
623    
624  =item RETURN  =item RETURN
625    
626  Returns a statistics object describing the backup.  Returns a statistics object for the backup.
627    
628  =back  =back
629    
630  Currently there are no options defined.  Currently there are no options. The backup is straight to a text file in
631    tab-delimited format. Each key is backup up to two lines. The first line
632    is all of the data from the B<AttributeKey> table. The second is a
633    tab-delimited list of all the groups.
634    
635  =cut  =cut
636    
637  sub BackupAllAttributes {  sub BackupKeys {
638      # Get the parameters.      # Get the parameters.
639      my ($self, $fileName, %options) = @_;      my ($self, $fileName, %options) = @_;
640      # Declare the return variable.      # Declare the return variable.
641      my $retVal = Stats->new();      my $retVal = Stats->new();
642      # Get a list of the keys.      # Open the output file.
643      my @keys = $self->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');      my $fh = Open(undef, ">$fileName");
644      Trace(scalar(@keys) . " keys found during backup.") if T(2);      # Set up to read the keys.
645      # Open the file for output.      my $keyQuery = $self->Get(['AttributeKey'], "", []);
     my $fh = Open(undef, $fileName);  
646      # Loop through the keys.      # Loop through the keys.
647      for my $key (@keys) {      while (my $keyData = $keyQuery->Fetch()) {
648          Trace("Backing up attribute $key.") if T(3);          $retVal->Add(key => 1);
649          $retVal->Add(keys => 1);          # Get the fields.
650          # Loop through this key's values.          my ($id, $type, $tableName, $description) =
651          my $query = $self->Get(['HasValueFor'], "HasValueFor(to-link) = ?", [$key]);              $keyData->Values(['AttributeKey(id)', 'AttributeKey(relationship-name)',
652          my $valuesFound = 0;                                'AttributeKey(description)']);
653          while (my $line = $query->Fetch()) {          # Escape any tabs or new-lines in the description.
654              $valuesFound++;          my $escapedDescription = Tracer::Escape($description);
655              # Get this row's data.          # Write the key data to the output.
656              my @row = $line->Values(['HasValueFor(from-link)', 'HasValueFor(to-link)',          Tracer::PutLine($fh, [$id, $type, $tableName, $escapedDescription]);
657                                       'HasValueFor(value)']);          # Get the key's groups.
658              # Write it to the file.          my @groups = $self->GetFlat(['IsInGroup'], "IsInGroup(from-link) = ?", [$id],
659              Tracer::PutLine($fh, \@row);                                      'IsInGroup(to-link)');
660          }          $retVal->Add(memberships => scalar(@groups));
661          Trace("$valuesFound values backed up for key $key.") if T(3);          # Write them to the output. Note we put a marker at the beginning to insure the line
662          $retVal->Add(values => $valuesFound);          # is nonempty.
663            Tracer::PutLine($fh, ['#GROUPS', @groups]);
664      }      }
665        # Log the operation.
666        $self->LogOperation("Backup Keys", $fileName, $retVal->Display());
667      # Return the result.      # Return the result.
668      return $retVal;      return $retVal;
669  }  }
670    
671  =head3 FieldMenu  =head3 RestoreKeys
672    
673  C<< my $menuHtml = $attrDB->FieldMenu($cgi, $height, $name, $keys, %options); >>      my $stats = $attrDB->RestoreKeys($fileName, %options);
674    
675  Return the HTML for a menu to select an attribute field. The menu will  Restore the attribute keys and groups from a backup file.
 be a standard SELECT/OPTION thing which is called "popup menu" in the  
 CGI package, but actually looks like a list. The list will contain  
 one selectable row per field.  
676    
677  =over 4  =over 4
678    
679  =item cgi  =item fileName
   
 CGI query object used to generate HTML.  
680    
681  =item height  Name of the file containing the backed-up keys. Each key has a pair of lines,
682    one containing the key data and one listing its groups.
683    
684  Number of lines to display in the list.  =back
685    
686  =item name  =cut
687    
688  Name to give to the menu. This is the name under which the value will  sub RestoreKeys {
689  appear when the form is submitted.      # Get the parameters.
690        my ($self, $fileName, %options) = @_;
691        # Declare the return variable.
692        my $retVal = Stats->new();
693        # Set up a hash to hold the group IDs.
694        my %groups = ();
695        # Open the file.
696        my $fh = Open(undef, "<$fileName");
697        # Loop until we're done.
698        while (! eof $fh) {
699            # Get a key record.
700            my ($id, $tableName, $description) = Tracer::GetLine($fh);
701            if ($id eq '#GROUPS') {
702                Confess("Group record found when key record expected.");
703            } elsif (! defined($description)) {
704                Confess("Invalid format found for key record.");
705            } else {
706                $retVal->Add("keyIn" => 1);
707                # Add this key to the database.
708                $self->InsertObject('AttributeKey', { id => $id,
709                                                      description => Tracer::UnEscape($description),
710                                                      'relationship-name' => $tableName});
711                Trace("Attribute $id stored.") if T(3);
712                # Get the group line.
713                my ($marker, @groups) = Tracer::GetLine($fh);
714                if (! defined($marker)) {
715                    Confess("End of file found where group record expected.");
716                } elsif ($marker ne '#GROUPS') {
717                    Confess("Group record not found after key record.");
718                } else {
719                    $retVal->Add(memberships => scalar(@groups));
720                    # Connect the groups.
721                    for my $group (@groups) {
722                        # Find out if this is a new group.
723                        if (! $groups{$group}) {
724                            $retVal->Add(newGroup => 1);
725                            # Add the group.
726                            $self->InsertObject('AttributeGroup', { id => $group });
727                            Trace("Group $group created.") if T(3);
728                            # Make sure we know it's not new.
729                            $groups{$group} = 1;
730                        }
731                        # Connect the group to our key.
732                        $self->InsertObject('IsInGroup', { 'from-link' => $id, 'to-link' => $group });
733                    }
734                    Trace("$id added to " . scalar(@groups) . " groups.") if T(3);
735                }
736            }
737        }
738        # Log the operation.
739        $self->LogOperation("Backup Keys", $fileName, $retVal->Display());
740        # Return the result.
741        return $retVal;
742    }
743    
744  =item keys  =head3 ArchiveFileName
745    
746  Reference to a hash mapping each attribute key name to a list reference,      my $fileName = $ca->ArchiveFileName();
 the list itself consisting of the attribute data type, its description,  
 and a list of its groups.  
747    
748  =item options  Compute a file name for archiving attribute input data. The file will be in the attribute log directory
749    
750  Hash containing options that modify the generation of the menu.  =cut
751    
752  =item RETURN  sub ArchiveFileName {
753        # Get the parameters.
754        my ($self) = @_;
755        # Declare the return variable.
756        my $retVal;
757        # We start by turning the timestamp into something usable as a file name.
758        my $now = Tracer::Now();
759        $now =~ tr/ :\//___/;
760        # Next we get the directory name.
761        my $dir = "$FIG_Config::var/attributes";
762        if (! -e $dir) {
763            Trace("Creating attribute file directory $dir.") if T(1);
764            mkdir $dir;
765        }
766        # Put it together with the field name and the time stamp.
767        $retVal = "$dir/upload.$now";
768        # Modify the file name to insure it's unique.
769        my $seq = 0;
770        while (-e "$retVal.$seq.tbl") { $seq++ }
771        # Use the computed sequence number to get the correct file name.
772        $retVal .= ".$seq.tbl";
773        # Return the result.
774        return $retVal;
775    }
776    
777  Returns the HTML to create a form field that can be used to select an  =head3 BackupAllAttributes
 attribute from the custom attributes system.  
778    
779  =back      my $stats = $attrDB->BackupAllAttributes($fileName, %options);
780    
781  The permissible options are as follows.  Backup all of the attributes to a file. The attributes will be stored in a
782    tab-delimited file suitable for reloading via L</LoadAttributesFrom>.
783    
784  =over 4  =over 4
785    
786  =item new  =item fileName
   
 If TRUE, then extra rows will be provided to allow the user to select  
 a new attribute. In other words, the user can select an existing  
 attribute, or can choose a C<(new)> marker to indicate a field to  
 be created in the parent entity.  
   
 =item notes  
787    
788  If specified, the name of a variable for displaying the notes attached  Name of the file to which the attribute data should be backed up.
 to the field. This must be in Javascript form ready for assignment.  
 So, for example, if you have a variable called C<notes> that  
 represents a paragraph element, you should code C<notes.innerHTML>.  
 If it actually represents a form field you should code C<notes.value>.  
 If an C<innerHTML> coding is used, the text will be HTML-escaped before  
 it is copied in. Specifying this parameter generates Javascript for  
 displaying the field description when a field is selected.  
789    
790  =item type  =item options
791    
792  If specified, the name of a variable for displaying the field's  Hash of options for the backup.
 data type. Data types are a much more controlled vocabulary than  
 notes, so there is no worry about HTML translation. Instead, the  
 raw value is put into the specified variable. Otherwise, the same  
 rules apply to this value that apply to I<$noteControl>.  
793    
794  =item groups  =item RETURN
795    
796  If specified, the name of a multiple-selection list control (also called  Returns a statistics object describing the backup.
 a popup menu) which shall be used to display the selected groups.  
797    
798  =back  =back
799    
800    Currently there are no options defined.
801    
802  =cut  =cut
803    
804  sub FieldMenu {  sub BackupAllAttributes {
805      # Get the parameters.      # Get the parameters.
806      my ($self, $cgi, $height, $name, $keys, %options) = @_;      my ($self, $fileName, %options) = @_;
807      # Reformat the list of keys.      # Declare the return variable.
808      my %keys = %{$keys};      my $retVal = Stats->new();
809      # Add the (new) key, if needed.      # Get a list of the keys.
810      if ($options{new}) {      my %keys = map { $_->[0] => $_->[1] } $self->GetAll(['AttributeKey'],
811          $keys{NewName()} = ["string", ""];                                                          "", [], ['AttributeKey(id)',
812      }                                                                    'AttributeKey(relationship-name)']);
813      # Get a sorted list of key.      Trace(scalar(keys %keys) . " keys found during backup.") if T(2);
814      my @keys = sort keys %keys;      # Open the file for output.
815      # We need to create the name for the onChange function. This function      my $fh = Open(undef, ">$fileName");
     # may not do anything, but we need to know the name to generate the HTML  
     # for the menu.  
     my $changeName = "${name}_setNotes";  
     my $retVal = $cgi->popup_menu({name => $name,  
                                    size => $height,  
                                    onChange => "$changeName(this.value)",  
                                    values => \@keys,  
                                   });  
     # Create the change function.  
     $retVal .= "\n<script language=\"javascript\">\n";  
     $retVal .= "    function $changeName(fieldValue) {\n";  
     # The function only has a body if we have a control to store data about the  
     # attribute.  
     if ($options{notes} || $options{type} || $options{groups}) {  
         # Check to see if we're storing HTML or text into the note control.  
         my $noteControl = $options{notes};  
         my $htmlMode = ($noteControl && $noteControl =~ /innerHTML$/);  
         # We use a CASE statement based on the newly-selected field value. The  
         # field description will be stored in the JavaScript variable "myText"  
         # and the data type in "myType". Note the default data type is a normal  
         # string, but the default notes is an empty string.  
         $retVal .= "        var myText = \"\";\n";  
         $retVal .= "        var myType = \"string\";\n";  
         $retVal .= "        switch (fieldValue) {\n";  
816          # Loop through the keys.          # Loop through the keys.
817          for my $key (@keys) {      for my $key (sort keys %keys) {
818              # Generate this case.          Trace("Backing up attribute $key.") if T(3);
819              $retVal .= "        case \"$key\" :\n";          $retVal->Add(keys => 1);
820              # Here we either want to update the note display, the          # Get the key's relevant relationship name.
821              # type display, the group list, or a combination of them.          my $relName = $keys{$key};
822              my ($type, $notes, @groups) = @{$keys{$key}};          # Loop through this key's values.
823              if ($noteControl) {          my $query = $self->Get([$relName], "$relName(from-link) = ?", [$key]);
824                  # Insure it's in the proper form.          my $valuesFound = 0;
825                  if ($htmlMode) {          while (my $line = $query->Fetch()) {
826                      $notes = ERDB::HTMLNote($notes);              $valuesFound++;
827                  }              # Get this row's data.
828                  # Escape it for use as a string literal.              my ($id, $key, $subKey, $value) = $line->Values(["$relName(to-link)",
829                  $notes =~ s/\n/\\n/g;                                                               "$relName(from-link)",
830                  $notes =~ s/"/\\"/g;                                                               "$relName(subkey)",
831                  $retVal .= "           myText = \"$notes\";\n";                                                               "$relName(value)"]);
832              }              # Check for a subkey.
833              if ($options{type}) {              if ($subKey ne '') {
834                  # Here we want the type updated.                  $key = "$key$self->{splitter}$subKey";
835                  $retVal .= "           myType = \"$type\";\n";              }
836              }              # Write it to the file.
837              if ($options{groups}) {              Tracer::PutLine($fh, [$id, $key, Escape($value)]);
838                  # Here we want the groups shown. Get a list of this attribute's groups.          }
839                  # We'll search through this list for each group to see if it belongs with          Trace("$valuesFound values backed up for key $key.") if T(3);
840                  # our attribute.          $retVal->Add(values => $valuesFound);
841                  my $groupLiteral = "=" . join("=", @groups) . "=";      }
842                  # Now we need some variables containing useful code for the javascript. It's      # Log the operation.
843                  # worth knowing we go through a bit of pain to insure $groupField[i] isn't      $self->LogOperation("Backup Data", $fileName, $retVal->Display());
                 # parsed as an array element.  
                 my $groupField = $options{groups};  
                 my $currentField = $groupField . "[i]";  
                 # Do the javascript.  
                 $retVal .= "           var groupList = \"$groupLiteral\";\n";  
                 $retVal .= "           for (var i = 0; i < $groupField.length; i++) {\n";  
                 $retVal .= "              var srchString = \"=\" + $currentField.value + \"=\";\n";  
                 $retVal .= "              var srchLoc = groupList.indexOf(srchString);\n";  
                 $retVal .= "              $currentField.checked = (srchLoc >= 0);\n";  
                 $retVal .= "           }\n";  
             }  
             # Close this case.  
             $retVal .= "           break;\n";  
         }  
         # Close the CASE statement and make the appropriate assignments.  
         $retVal .= "        }\n";  
         if ($noteControl) {  
             $retVal .= "        $noteControl = myText;\n";  
         }  
         if ($options{type}) {  
             $retVal .= "        $options{type} = myType;\n";  
         }  
     }  
     # Terminate the change function.  
     $retVal .= "    }\n";  
     $retVal .= "</script>\n";  
844      # Return the result.      # Return the result.
845      return $retVal;      return $retVal;
846  }  }
847    
848    
849  =head3 GetGroups  =head3 GetGroups
850    
851  C<< my @groups = $attrDB->GetGroups(); >>      my @groups = $attrDB->GetGroups();
852    
853  Return a list of the available groups.  Return a list of the available groups.
854    
# Line 784  Line 865 
865    
866  =head3 GetAttributeData  =head3 GetAttributeData
867    
868  C<< my %keys = $attrDB->GetAttributeData($type, @list); >>      my %keys = $attrDB->GetAttributeData($type, @list);
869    
870  Return attribute data for the selected attributes. The attribute  Return attribute data for the selected attributes. The attribute
871  data is a hash mapping each attribute key name to a n-tuple containing the  data is a hash mapping each attribute key name to a n-tuple containing the
872  data type, the description, and the groups. This is the same format expected in  data type, the description, the table name, and the groups.
 the L</FieldMenu> and L</ControlForm> methods for the list of attributes to display.  
873    
874  =over 4  =over 4
875    
# Line 804  Line 884 
884    
885  =item RETURN  =item RETURN
886    
887  Returns a hash mapping each attribute key name to its data type, description, and  Returns a hash mapping each attribute key name to its description,
888  parent groups.  table name, and parent groups.
889    
890  =back  =back
891    
# Line 837  Line 917 
917          }          }
918          while (my $row = $query->Fetch()) {          while (my $row = $query->Fetch()) {
919              # Get this attribute's data.              # Get this attribute's data.
920              my ($key, $type, $notes) = $row->Values(['AttributeKey(id)', 'AttributeKey(data-type)',              my ($key, $relName, $notes) = $row->Values(['AttributeKey(id)',
921                                                         'AttributeKey(relationship-name)',
922                                                       'AttributeKey(description)']);                                                       'AttributeKey(description)']);
923              # If it's new, get its groups and add it to the return hash.              # If it's new, get its groups and add it to the return hash.
924              if (! exists $retVal{$key}) {              if (! exists $retVal{$key}) {
925                  my @groups = $self->GetFlat(['IsInGroup'], "IsInGroup(from-link) = ?",                  my @groups = $self->GetFlat(['IsInGroup'], "IsInGroup(from-link) = ?",
926                                              [$key], 'IsInGroup(to-link)');                                              [$key], 'IsInGroup(to-link)');
927                  $retVal{$key} = [$type, $notes, @groups];                  $retVal{$key} = [$relName, $notes, @groups];
928              }              }
929          }          }
930      }      }
# Line 851  Line 932 
932      return %retVal;      return %retVal;
933  }  }
934    
935    =head3 LogOperation
936    
937        $ca->LogOperation($action, $target, $description);
938    
939    Write an operation description to the attribute activity log (C<$FIG_Config::var/attributes.log>).
940    
941    =over 4
942    
943    =item action
944    
945    Action being logged (e.g. C<Delete Group> or C<Load Key>).
946    
947    =item target
948    
949    ID of the key or group affected.
950    
951    =item description
952    
953    Short description of the action.
954    
955    =back
956    
957    =cut
958    
959    sub LogOperation {
960        # Get the parameters.
961        my ($self, $action, $target, $description) = @_;
962        # Get the user ID.
963        my $user = $self->{user};
964        # Get a timestamp.
965        my $timeString = Tracer::Now();
966        # Open the log file for appending.
967        my $oh = Open(undef, ">>$FIG_Config::var/attributes.log");
968        # Write the data to it.
969        Tracer::PutLine($oh, [$timeString, $user, $action, $target, $description]);
970        # Close the log file.
971        close $oh;
972    }
973    
974  =head2 FIG Method Replacements  =head2 FIG Method Replacements
975    
976  The following methods are used by B<FIG.pm> to replace the previous attribute functionality.  The following methods are used by B<FIG.pm> to replace the previous attribute functionality.
# Line 862  Line 982 
982  The idea is that these methods represent attribute manipulation allowed by all users, while  The idea is that these methods represent attribute manipulation allowed by all users, while
983  the others are only for privileged users with access to the attribute server.  the others are only for privileged users with access to the attribute server.
984    
985  In the previous implementation, an attribute had a value and a URL. In the new implementation,  In the previous implementation, an attribute had a value and a URL. In this implementation,
986  there is only a value. In this implementation, each attribute has only a value. These  each attribute has only a value. These methods will treat the value as a list with the individual
987  methods will treat the value as a list with the individual elements separated by the  elements separated by the value of the splitter parameter on the constructor (L</new>). The default
988  value of the splitter parameter on the constructor (L</new>). The default is double  is double colons C<::>.
 colons C<::>.  
989    
990  So, for example, an old-style keyword with a value of C<essential> and a URL of  So, for example, an old-style keyword with a value of C<essential> and a URL of
991  C<http://www.sciencemag.org/cgi/content/abstract/293/5538/2266> using the default  C<http://www.sciencemag.org/cgi/content/abstract/293/5538/2266> using the default
# Line 879  Line 998 
998    
999  =head3 GetAttributes  =head3 GetAttributes
1000    
1001  C<< my @attributeList = $attrDB->GetAttributes($objectID, $key, @values); >>      my @attributeList = $attrDB->GetAttributes($objectID, $key, @values);
1002    
1003  In the database, attribute values are sectioned into pieces using a splitter  In the database, attribute values are sectioned into pieces using a splitter
1004  value specified in the constructor (L</new>). This is not a requirement of  value specified in the constructor (L</new>). This is not a requirement of
# Line 918  Line 1037 
1037  which has no wildcard in the key or the object ID, may return multiple tuples.  which has no wildcard in the key or the object ID, may return multiple tuples.
1038    
1039  Value matching in this system works very poorly, because of the way multiple values are  Value matching in this system works very poorly, because of the way multiple values are
1040  stored. For the object ID and key name, we create queries that filter for the desired  stored. For the object ID, key name, and first value, we create queries that filter for the
1041  results. For the values, we do a comparison after the attributes are retrieved from the  desired results. On any filtering by value, we must do a comparison after the attributes are
1042  database. As a result, queries in which filter only on value end up reading the entire  retrieved from the database, since the database has no notion of the multiple values, which
1043  attribute table to find the desired results.  are stored in a single string. As a result, queries in which filter only on value end up
1044    reading a lot more than they need to.
1045    
1046  =over 4  =over 4
1047    
# Line 945  Line 1065 
1065  or an empty string is specified, all values in that section will match. A  or an empty string is specified, all values in that section will match. A
1066  generic match can be requested by placing a percent sign (C<%>) at the end.  generic match can be requested by placing a percent sign (C<%>) at the end.
1067  In that case, all values that match up to and not including the percent sign  In that case, all values that match up to and not including the percent sign
1068  will match.  will match. You may also specify a regular expression enclosed
1069    in slashes. All values that match the regular expression will be returned. For
1070    performance reasons, only values have this extra capability.
1071    
1072  =item RETURN  =item RETURN
1073    
# Line 961  Line 1083 
1083  sub GetAttributes {  sub GetAttributes {
1084      # Get the parameters.      # Get the parameters.
1085      my ($self, $objectID, $key, @values) = @_;      my ($self, $objectID, $key, @values) = @_;
1086      # We will create one big honking query. The following hash will build the filter      # This hash will map value-table fields to patterns. We use it to build the
1087      # clause and a parameter list.      # SQL statement.
1088      my %data = ('HasValueFor(from-link)' => $key, 'HasValueFor(to-link)' => $objectID);      my %data;
1089        # Add the object ID to the key information.
1090        $data{'to-link'} = $objectID;
1091        # The first value represents a problem, because we can search it using SQL, but not
1092        # in the normal way. If the user specifies a generic search or exact match for
1093        # every alternative value (remember, the values may be specified as a list),
1094        # then we can create SQL filtering for it. If any of the values are specified
1095        # as a regular expression, however, that's a problem, because we need to read
1096        # every value to verify a match.
1097        if (@values > 0) {
1098            # Get the first value and put its alternatives in an array.
1099            my $valueParm = $values[0];
1100            my @valueList;
1101            if (ref $valueParm eq 'ARRAY') {
1102                @valueList = @{$valueParm};
1103            } else {
1104                @valueList = ($valueParm);
1105            }
1106            # Okay, now we have all the possible criteria for the first value in the list
1107            # @valueList. We'll copy the values to a new array in which they have been
1108            # converted to generic requests. If we find a regular-expression match
1109            # anywhere in the list, we toss the whole thing.
1110            my @valuePatterns = ();
1111            my $okValues = 1;
1112            for my $valuePattern (@valueList) {
1113                # Check the pattern type.
1114                if (substr($valuePattern, 0, 1) eq '/') {
1115                    # Regular expressions invalidate the entire process.
1116                    $okValues = 0;
1117                } elsif (substr($valuePattern, -1, 1) eq '%') {
1118                    # A Generic pattern is passed in unmodified.
1119                    push @valuePatterns, $valuePattern;
1120                } else {
1121                    # An exact match is converted to generic.
1122                    push @valuePatterns, "$valuePattern%";
1123                }
1124            }
1125            # If everything works, add the value data to the filtering hash.
1126            if ($okValues) {
1127                $data{value} = \@valuePatterns;
1128            }
1129        }
1130        # Now comes the really tricky part, which is key handling. The key is
1131        # actually split in two parts: the real key and a sub-key. The real key
1132        # determines which value table contains the relevant values. The information
1133        # we need is kept in here.
1134        my %tables = map { $_ => [] } $self->_GetAllTables();
1135        # See if we have any key filtering to worry about.
1136        if ($key) {
1137            # Here we have either a single key or a list. We convert both cases to a list.
1138            my $keyList = (ref $key ne 'ARRAY' ? [$key] : $key);
1139            # Get easy access to the key/table hash.
1140            my $keyTableHash = $self->_KeyTable();
1141            # Loop through the keys, discovering tables.
1142            for my $keyChoice (@$keyList) {
1143                # Now we have to start thinking about the real key and the subkeys.
1144                my ($realKey, $subKey) = $self->_SplitKeyPattern($keyChoice);
1145                # Find the matches for the real key in the key hash. For each of
1146                # these, we memorize the table name in the hash below.
1147                my %tableNames = ();
1148                for my $keyInTable (keys %{$keyTableHash}) {
1149                    if ($self->_CheckSQLPattern($realKey, $keyInTable)) {
1150                        $tableNames{$keyTableHash->{$key}} = 1;
1151                    }
1152                }
1153                # If the key is generic, or didn't match anything, add
1154                # the default table to the mix.
1155                if (keys %tableNames == 0 || $keyChoice =~ /%/) {
1156                    $tableNames{$self->{defaultRel}} = 1;
1157                }
1158                # Now we add this key combination to the key list for each relevant table.
1159                for my $tableName (keys %tableNames) {
1160                    push @{$tables{$tableName}}, [$realKey, $subKey];
1161                }
1162            }
1163        }
1164        # Declare the return variable.
1165        my @retVal = ();
1166        # Now we loop through the tables of interest, performing queries.
1167        # Loop through the tables.
1168        for my $table (keys %tables) {
1169            # Get the key pairs for this table.
1170            my $pairs = $tables{$table};
1171            # Does this table have data? It does if there is no key specified or
1172            # it has at least one key pair.
1173            my $pairCount = scalar @{$pairs};
1174            Trace("Pair count for table $table is $pairCount.") if T(3);
1175            if ($pairCount || ! $key) {
1176                # Create some lists to contain the filter fragments and parameter values.
1177      my @filter = ();      my @filter = ();
1178      my @parms = ();      my @parms = ();
1179      # This next loop goes through the different fields that can be specified in the      # This next loop goes through the different fields that can be specified in the
1180      # parameter list and generates filters for each.              # parameter list and generates filters for each. The %data hash that we built above
1181                # contains most of the necessary information to do this. When we're done, we'll
1182                # paste on stuff for the key pairs.
1183      for my $field (keys %data) {      for my $field (keys %data) {
1184          # Accumulate filter information for this field. We will OR together all the          # Accumulate filter information for this field. We will OR together all the
1185          # elements accumulated to create the final result.          # elements accumulated to create the final result.
1186          my @fieldFilter = ();          my @fieldFilter = ();
1187          # Get the specified data from the caller.                  # Get the specified filter for this field.
1188          my $fieldPattern = $data{$field};          my $fieldPattern = $data{$field};
1189          # Only proceed if the pattern is one that won't match everything.          # Only proceed if the pattern is one that won't match everything.
1190          if (defined($fieldPattern) && $fieldPattern ne "" && $fieldPattern ne "%") {          if (defined($fieldPattern) && $fieldPattern ne "" && $fieldPattern ne "%") {
# Line 989  Line 1201 
1201              if (@patterns) {              if (@patterns) {
1202                  # Loop through the individual patterns.                  # Loop through the individual patterns.
1203                  for my $pattern (@patterns) {                  for my $pattern (@patterns) {
1204                      # Check for a generic request.                              my ($clause, $value) = _WherePart($table, $field, $pattern);
1205                      if (substr($pattern, -1, 1) ne '%') {                              push @fieldFilter, $clause;
1206                          # Here we have a normal request.                              push @parms, $value;
                         push @fieldFilter, "$field = ?";  
                         push @parms, $pattern;  
                     } else {  
                         # Here we have a generate request, so we will use the LIKE operator to  
                         # filter the field to this value pattern.  
                         push @fieldFilter, "$field LIKE ?";  
                         # We must convert the pattern value to an SQL match pattern. First  
                         # we get a copy of it.  
                         my $actualPattern = $pattern;  
                         # Now we escape the underscores. Underscores are an SQL wild card  
                         # character, but they are used frequently in key names and object IDs.  
                         $actualPattern =~ s/_/\\_/g;  
                         # Add the escaped pattern to the bound parameter list.  
                         push @parms, $actualPattern;  
                     }  
1207                  }                  }
1208                  # Form the filter for this field.                  # Form the filter for this field.
1209                  my $fieldFilterString = join(" OR ", @fieldFilter);                  my $fieldFilterString = join(" OR ", @fieldFilter);
# Line 1014  Line 1211 
1211              }              }
1212          }          }
1213      }      }
1214      # Now @filter contains one or more filter strings and @parms contains the parameter              # The final filter is for the key pairs. Only proceed if we have some.
1215      # values to bind to them.              if ($pairCount) {
1216      my $actualFilter = join(" AND ", @filter);                  # We'll accumulate pair filter clauses in here.
1217      # Declare the return variable.                  my @pairFilters = ();
1218      my @retVal = ();                  # Loop through the key pairs.
1219      # Get the number of value sections we have to match.                  for my $pair (@$pairs) {
1220      my $sectionCount = scalar(@values);                      my ($realKey, $subKey) = @{$pair};
1221      # Now we're ready to make our query.                      my ($realClause, $realValue) = _WherePart($table, 'from-link', $realKey);
1222      my $query = $self->Get(['HasValueFor'], $actualFilter, \@parms);                      if (! $subKey) {
1223      # Loop through the assignments found.                          # Here the subkey is wild, so only the real key matters.
1224      while (my $row = $query->Fetch()) {                          push @pairFilters, $realClause;
1225          # Get the current row's data.                          push @parms, $realValue;
         my ($id, $key, $valueString) = $row->Values(['HasValueFor(to-link)', 'HasValueFor(from-link)',  
                                                       'HasValueFor(value)']);  
         # Break the value into sections.  
         my @sections = split($self->{splitter}, $valueString);  
         # Match each section against the incoming values. We'll assume we're  
         # okay unless we learn otherwise.  
         my $matching = 1;  
         for (my $i = 0; $i < $sectionCount && $matching; $i++) {  
             # We need to check to see if this section is generic.  
             if (substr($values[$i], -1, 1) eq '%') {  
                 my $matchLen = length($values[$i] - 1);  
                 $matching = substr($sections[$i], 0, $matchLen) eq  
                             substr($values[$i], 0, $matchLen);  
1226              } else {              } else {
1227                  $matching = ($sections[$i] eq $values[$i]);                          # Here we have to select on both keys.
1228                            my ($subClause, $subValue) = _WherePart($table, 'subkey', $subKey);
1229                            push @pairFilters, "($realClause AND $subClause)";
1230                            push @parms, $subValue;
1231              }              }
1232          }          }
1233          # If we match, output this row to the return list.                  # Join the pair filters together to make a giant key filter.
1234          if ($matching) {                  my $pairFilter = "(" . join(" OR ", @pairFilters) . ")";
1235              push @retVal, [$id, $key, @sections];                  push @filter, $pairFilter;
1236                }
1237                # At this point, @filter contains one or more filter strings and @parms
1238                # contains the parameter values to bind to them.
1239                my $actualFilter = join(" AND ", @filter);
1240                # Now we're ready to make our query.
1241                my $query = $self->Get([$table], $actualFilter, \@parms);
1242                # Format the results.
1243                push @retVal, $self->_QueryResults($query, $table, @values);
1244          }          }
1245      }      }
1246      # Return the rows found.      # The above loop ran the query for each necessary value table and merged the
1247        # results into @retVal. Now we return the rows found.
1248      return @retVal;      return @retVal;
1249  }  }
1250    
1251  =head3 AddAttribute  =head3 AddAttribute
1252    
1253  C<< $attrDB->AddAttribute($objectID, $key, @values); >>      $attrDB->AddAttribute($objectID, $key, @values);
1254    
1255  Add an attribute key/value pair to an object. This method cannot add a new key, merely  Add an attribute key/value pair to an object. This method cannot add a new key, merely
1256  add a value to an existing key. Use L</StoreAttributeKey> to create a new key.  add a value to an existing key. Use L</StoreAttributeKey> to create a new key.
# Line 1093  Line 1289 
1289          # Okay, now we have some reason to believe we can do this. Form the values          # Okay, now we have some reason to believe we can do this. Form the values
1290          # into a scalar.          # into a scalar.
1291          my $valueString = join($self->{splitter}, @values);          my $valueString = join($self->{splitter}, @values);
1292            # Split up the key.
1293            my ($realKey, $subKey) = $self->SplitKey($key);
1294            # Find the table containing the key.
1295            my $table = $self->_KeyTable($realKey);
1296          # Connect the object to the key.          # Connect the object to the key.
1297          $self->InsertObject('HasValueFor', { 'from-link' => $key,          $self->InsertObject($table, { 'from-link' => $realKey,
1298                                               'to-link'   => $objectID,                                               'to-link'   => $objectID,
1299                                                 'subkey'    => $subKey,
1300                                               'value'     => $valueString,                                               'value'     => $valueString,
1301                                         });                                         });
1302      }      }
# Line 1105  Line 1306 
1306    
1307  =head3 DeleteAttribute  =head3 DeleteAttribute
1308    
1309  C<< $attrDB->DeleteAttribute($objectID, $key, @values); >>      $attrDB->DeleteAttribute($objectID, $key, @values);
1310    
1311  Delete the specified attribute key/value combination from the database.  Delete the specified attribute key/value combination from the database.
1312    
# Line 1136  Line 1337 
1337          Confess("No object ID specified for DeleteAttribute call.");          Confess("No object ID specified for DeleteAttribute call.");
1338      } elsif (! defined($key)) {      } elsif (! defined($key)) {
1339          Confess("No attribute key specified for DeleteAttribute call.");          Confess("No attribute key specified for DeleteAttribute call.");
1340      } elsif (scalar(@values) == 0) {      } else {
1341          # Here we erase the entire key.          # Split the key into the real key and the subkey.
1342          $self->EraseAttribute($key);          my ($realKey, $subKey) = $self->SplitKey($key);
1343            # Find the table containing the key's values.
1344            my $table = $self->_KeyTable($realKey);
1345            if ($subKey eq '' && scalar(@values) == 0) {
1346                # Here we erase the entire key for this object.
1347                $self->DeleteRow('HasValueFor', $key, $objectID);
1348      } else {      } else {
1349          # Here we erase the matching values.          # Here we erase the matching values.
1350          my $valueString = join($self->{splitter}, @values);          my $valueString = join($self->{splitter}, @values);
1351          $self->DeleteRow('HasValueFor', $key, $objectID, { value => $valueString });              $self->DeleteRow('HasValueFor', $realKey, $objectID,
1352                                 { subkey => $subKey, value => $valueString });
1353            }
1354      }      }
1355      # Return a one. This is for backward compatability.      # Return a one. This is for backward compatability.
1356      return 1;      return 1;
1357  }  }
1358    
1359  =head3 ChangeAttribute  =head3 DeleteMatchingAttributes
1360    
1361  C<< $attrDB->ChangeAttribute($objectID, $key, \@oldValues, \@newValues); >>      my @deleted = $attrDB->DeleteMatchingAttributes($objectID, $key, @values);
1362    
1363  Change the value of an attribute key/value pair for an object.  Delete all attributes that match the specified criteria. This is equivalent to
1364    calling L</GetAttributes> and then invoking L</DeleteAttribute> for each
1365    row found.
1366    
1367  =over 4  =over 4
1368    
1369  =item objectID  =item objectID
1370    
1371  ID of the genome or feature to which the attribute is to be changed. In general, an ID that  ID of object whose attributes are to be deleted. If the attributes for multiple
1372  starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods  objects are to be deleted, this parameter can be specified as a list reference. If
1373  is treated as a genome ID. For IDs of other types, this parameter should be a reference  attributes are to be deleted for all objects, specify C<undef> or an empty string.
1374  to a 2-tuple consisting of the entity type name followed by the object ID.  Finally, you can delete attributes for a range of object IDs by putting a percent
1375    sign (C<%>) at the end.
1376    
1377  =item key  =item key
1378    
1379  Attribute key name. This corresponds to the name of a field in the database.  Attribute key name. A value of C<undef> or an empty string will match all
1380    attribute keys. If the values are to be deletedfor multiple keys, this parameter can be
1381    specified as a list reference. Finally, you can delete attributes for a range of
1382    keys by putting a percent sign (C<%>) at the end.
1383    
1384    =item values
1385    
1386    List of the desired attribute values, section by section. If C<undef>
1387    or an empty string is specified, all values in that section will match. A
1388    generic match can be requested by placing a percent sign (C<%>) at the end.
1389    In that case, all values that match up to and not including the percent sign
1390    will match. You may also specify a regular expression enclosed
1391    in slashes. All values that match the regular expression will be deleted. For
1392    performance reasons, only values have this extra capability.
1393    
1394    =item RETURN
1395    
1396    Returns a list of tuples for the attributes that were deleted, in the
1397    same form as L</GetAttributes>.
1398    
1399    =back
1400    
1401    =cut
1402    
1403    sub DeleteMatchingAttributes {
1404        # Get the parameters.
1405        my ($self, $objectID, $key, @values) = @_;
1406        # Get the matching attributes.
1407        my @retVal = $self->GetAttributes($objectID, $key, @values);
1408        # Loop through the attributes, deleting them.
1409        for my $tuple (@retVal) {
1410            $self->DeleteAttribute(@{$tuple});
1411        }
1412        # Log this operation.
1413        my $count = @retVal;
1414        $self->LogOperation("Mass Delete", $key, "$count matching attributes deleted.");
1415        # Return the deleted attributes.
1416        return @retVal;
1417    }
1418    
1419    =head3 ChangeAttribute
1420    
1421        $attrDB->ChangeAttribute($objectID, $key, \@oldValues, \@newValues);
1422    
1423    Change the value of an attribute key/value pair for an object.
1424    
1425    =over 4
1426    
1427    =item objectID
1428    
1429    ID of the genome or feature to which the attribute is to be changed. In general, an ID that
1430    starts with C<fig|> is treated as a feature ID, and an ID that is all digits and periods
1431    is treated as a genome ID. For IDs of other types, this parameter should be a reference
1432    to a 2-tuple consisting of the entity type name followed by the object ID.
1433    
1434    =item key
1435    
1436    Attribute key name. This corresponds to the name of a field in the database.
1437    
1438  =item oldValues  =item oldValues
1439    
# Line 1202  Line 1470 
1470    
1471  =head3 EraseAttribute  =head3 EraseAttribute
1472    
1473  C<< $attrDB->EraseAttribute($key); >>      $attrDB->EraseAttribute($key);
1474    
1475  Erase all values for the specified attribute key. This does not remove the  Erase all values for the specified attribute key. This does not remove the
1476  key from the database; it merely removes all the values.  key from the database; it merely removes all the values.
# Line 1211  Line 1479 
1479    
1480  =item key  =item key
1481    
1482  Key to erase.  Key to erase. This must be a real key; that is, it cannot have a subkey
1483    component.
1484    
1485  =back  =back
1486    
# Line 1220  Line 1489 
1489  sub EraseAttribute {  sub EraseAttribute {
1490      # Get the parameters.      # Get the parameters.
1491      my ($self, $key) = @_;      my ($self, $key) = @_;
1492      # Delete everything connected to the key. The "keepRoot" option keeps the key in the      # Find the table containing the key.
1493      # datanase while deleting everything attached to it.      my $table = $self->_KeyTable($key);
1494      $self->Delete('AttributeKey', $key, keepRoot => 1);      # Is it the default table?
1495        if ($table eq $self->{defaultRel}) {
1496            # Yes, so the key is mixed in with other keys.
1497            # Delete everything connected to it.
1498            $self->Disconnect('HasValueFor', 'AttributeKey', $key);
1499        } else {
1500            # No. Drop and re-create the table.
1501            $self->TruncateTable($table);
1502        }
1503        # Log the operation.
1504        $self->LogOperation("Erase Data", $key);
1505      # Return a 1, for backward compatability.      # Return a 1, for backward compatability.
1506      return 1;      return 1;
1507  }  }
1508    
1509  =head3 GetAttributeKeys  =head3 GetAttributeKeys
1510    
1511  C<< my @keyList = $attrDB->GetAttributeKeys($groupName); >>      my @keyList = $attrDB->GetAttributeKeys($groupName);
1512    
1513  Return a list of the attribute keys for a particular group.  Return a list of the attribute keys for a particular group.
1514    
# Line 1257  Line 1536 
1536      return sort @groups;      return sort @groups;
1537  }  }
1538    
1539    =head3 QueryAttributes
1540    
1541        my @attributeData = $ca->QueryAttributes($filter, $filterParms);
1542    
1543    Return the attribute data based on an SQL filter clause. In the filter clause,
1544    the name C<$object> should be used for the object ID, C<$key> should be used for
1545    the key name, C<$subkey> for the subkey value, and C<$value> for the value field.
1546    
1547    =over 4
1548    
1549    =item filter
1550    
1551    Filter clause in the standard ERDB format, except that the field names are C<$object> for
1552    the object ID field, C<$key> for the key name field, C<$subkey> for the subkey field,
1553    and C<$value> for the value field. This abstraction enables us to hide the details of
1554    the database construction from the user.
1555    
1556    =item filterParms
1557    
1558    Parameters for the filter clause.
1559    
1560    =item RETURN
1561    
1562    Returns a list of tuples. Each tuple consists of an object ID, a key (with optional subkey), and
1563    one or more attribute values.
1564    
1565    =back
1566    
1567    =cut
1568    
1569    # This hash is used to drive the substitution process.
1570    my %AttributeParms = (object => 'to-link',
1571                          key    => 'from-link',
1572                          subkey => 'subkey',
1573                          value  => 'value');
1574    
1575    sub QueryAttributes {
1576        # Get the parameters.
1577        my ($self, $filter, $filterParms) = @_;
1578        # Declare the return variable.
1579        my @retVal = ();
1580        # Make sue we have filter parameters.
1581        my $realParms = (defined($filterParms) ? $filterParms : []);
1582        # Loop through all the value tables.
1583        for my $table ($self->_GetAllTables()) {
1584            # Create the query for this table by converting the filter.
1585            my $realFilter = $filter;
1586            for my $name (keys %AttributeParms) {
1587                $realFilter =~ s/\$$name/$table($AttributeParms{$name})/g;
1588            }
1589            my $query = $self->Get([$table], $realFilter, $realParms);
1590            # Loop through the results, forming the output attribute tuples.
1591            while (my $result = $query->Fetch()) {
1592                # Get the four values from this query result row.
1593                my ($objectID, $key, $subkey, $value) = $result->Values(["$table($AttributeParms{object})",
1594                                                                        "$table($AttributeParms{key})",
1595                                                                        "$table($AttributeParms{subkey})",
1596                                                                        "$table($AttributeParms{value})"]);
1597                # Combine the key and the subkey.
1598                my $realKey = ($subkey ? $key . $self->{splitter} . $subkey : $key);
1599                # Split the value.
1600                my @values = split $self->{splitter}, $value;
1601                # Output the result.
1602                push @retVal, [$objectID, $realKey, @values];
1603            }
1604        }
1605        # Return the result.
1606        return @retVal;
1607    }
1608    
1609    =head2 Key and ID Manipulation Methods
1610    
1611    =head3 ParseID
1612    
1613        my ($type, $id) = CustomAttributes::ParseID($idValue);
1614    
1615    Determine the type and object ID corresponding to an ID value from the attribute database.
1616    Most ID values consist of a type name and an ID, separated by a colon (e.g. C<Family:aclame|cluster10>);
1617    however, Genomes, Features, and Subsystems are not stored with a type name, so we need to
1618    deduce the type from the ID value structure.
1619    
1620    The theory here is that you can plug the ID and type directly into a Sprout database method, as
1621    follows
1622    
1623        my ($type, $id) = CustomAttributes::ParseID($attrList[$num]->[0]);
1624        my $target = $sprout->GetEntity($type, $id);
1625    
1626    =over 4
1627    
1628    =item idValue
1629    
1630    ID value taken from the attribute database.
1631    
1632    =item RETURN
1633    
1634    Returns a two-element list. The first element is the type of object indicated by the ID value,
1635    and the second element is the actual object ID.
1636    
1637    =back
1638    
1639    =cut
1640    
1641    sub ParseID {
1642        # Get the parameters.
1643        my ($idValue) = @_;
1644        # Declare the return variables.
1645        my ($type, $id);
1646        # Parse the incoming ID. We first check for the presence of an entity name. Entity names
1647        # can only contain letters, which helps to insure typed object IDs don't collide with
1648        # subsystem names (which are untyped).
1649        if ($idValue =~ /^([A-Za-z]+):(.+)/) {
1650            # Here we have a typed ID.
1651            ($type, $id) = ($1, $2);
1652            # Fix the case sensitivity on PDB IDs.
1653            if ($type eq 'PDB') { $id = lc $id; }
1654        } elsif ($idValue =~ /fig\|/) {
1655            # Here we have a feature ID.
1656            ($type, $id) = (Feature => $idValue);
1657        } elsif ($idValue =~ /\d+\.\d+/) {
1658            # Here we have a genome ID.
1659            ($type, $id) = (Genome => $idValue);
1660        } else {
1661            # The default is a subsystem ID.
1662            ($type, $id) = (Subsystem => $idValue);
1663        }
1664        # Return the results.
1665        return ($type, $id);
1666    }
1667    
1668    =head3 FormID
1669    
1670        my $idValue = CustomAttributes::FormID($type, $id);
1671    
1672    Convert an object type and ID pair into an object ID string for the attribute system. Subsystems,
1673    genomes, and features are stored in the database without type information, but all other object IDs
1674    must be prefixed with the object type.
1675    
1676    =over 4
1677    
1678    =item type
1679    
1680    Relevant object type.
1681    
1682    =item id
1683    
1684    ID of the object in question.
1685    
1686    =item RETURN
1687    
1688    Returns a string that will be recognized as an object ID in the attribute database.
1689    
1690    =back
1691    
1692    =cut
1693    
1694    sub FormID {
1695        # Get the parameters.
1696        my ($type, $id) = @_;
1697        # Declare the return variable.
1698        my $retVal;
1699        # Compute the ID string from the type.
1700        if (grep { $type eq $_ } qw(Feature Genome Subsystem)) {
1701            $retVal = $id;
1702        } else {
1703            $retVal = "$type:$id";
1704        }
1705        # Return the result.
1706        return $retVal;
1707    }
1708    
1709    =head3 GetTargetObject
1710    
1711        my $object = CustomAttributes::GetTargetObject($erdb, $idValue);
1712    
1713    Return the database object corresponding to the specified attribute object ID. The
1714    object type associated with the ID value must correspond to an entity name in the
1715    specified database.
1716    
1717    =over 4
1718    
1719    =item erdb
1720    
1721    B<ERDB> object for accessing the target database.
1722    
1723    =item idValue
1724    
1725    ID value retrieved from the attribute database.
1726    
1727    =item RETURN
1728    
1729    Returns a B<ERDBObject> for the attribute value's target object.
1730    
1731    =back
1732    
1733    =cut
1734    
1735    sub GetTargetObject {
1736        # Get the parameters.
1737        my ($erdb, $idValue) = @_;
1738        # Declare the return variable.
1739        my $retVal;
1740        # Get the type and ID for the target object.
1741        my ($type, $id) = ParseID($idValue);
1742        # Plug them into the GetEntity method.
1743        $retVal = $erdb->GetEntity($type, $id);
1744        # Return the resulting object.
1745        return $retVal;
1746    }
1747    
1748    =head3 SplitKey
1749    
1750        my ($realKey, $subKey) = $ca->SplitKey($key);
1751    
1752    Split an external key (that is, one passed in by a caller) into the real key and the sub key.
1753    The real and sub keys are separated by a splitter value (usually C<::>). If there is no splitter,
1754    then the sub key is presumed to be an empty string.
1755    
1756    =over 4
1757    
1758    =item key
1759    
1760    Incoming key to be split.
1761    
1762    =item RETURN
1763    
1764    Returns a two-element list, the first element of which is the real key and the second element of
1765    which is the sub key.
1766    
1767    =back
1768    
1769    =cut
1770    
1771    sub SplitKey {
1772        # Get the parameters.
1773        my ($self, $key) = @_;
1774        # Do the split.
1775        my ($realKey, $subKey) = split($self->{splitter}, $key, 2);
1776        # Insure the subkey has a value.
1777        if (! defined $subKey) {
1778            $subKey = '';
1779        }
1780        # Return the results.
1781        return ($realKey, $subKey);
1782    }
1783    
1784    
1785    =head3 JoinKey
1786    
1787        my $key = $ca->JoinKey($realKey, $subKey);
1788    
1789    Join a real key and a subkey together to make an external key. The external key is the attribute key
1790    used by the caller. The real key and the subkey are how the keys are represented in the database. The
1791    real key is the key to the B<AttributeKey> entity. The subkey is an attribute of the B<HasValueFor>
1792    relationship.
1793    
1794    =over 4
1795    
1796    =item realKey
1797    
1798    The real attribute key.
1799    
1800    =item subKey
1801    
1802    The subordinate portion of the attribute key.
1803    
1804    =item RETURN
1805    
1806    Returns a single string representing both keys.
1807    
1808    =back
1809    
1810    =cut
1811    
1812    sub JoinKey {
1813        # Get the parameters.
1814        my ($self, $realKey, $subKey) = @_;
1815        # Declare the return variable.
1816        my $retVal;
1817        # Check for a subkey.
1818        if ($subKey eq '') {
1819            # No subkey, so the real key is the key.
1820            $retVal = $realKey;
1821        } else {
1822            # Subkey found, so the two pieces must be joined by a splitter.
1823            $retVal = "$realKey$self->{splitter}$subKey";
1824        }
1825        # Return the result.
1826        return $retVal;
1827    }
1828    
1829    
1830    =head3 AttributeTable
1831    
1832        my $tableHtml = CustomAttributes::AttributeTable($cgi, @attrList);
1833    
1834    Format the attribute data into an HTML table.
1835    
1836    =over 4
1837    
1838    =item cgi
1839    
1840    CGI query object used to generate the HTML
1841    
1842    =item attrList
1843    
1844    List of attribute results, in the format returned by the L</GetAttributes> or
1845    L</QueryAttributes> methods.
1846    
1847    =item RETURN
1848    
1849    Returns an HTML table displaying the attribute keys and values.
1850    
1851    =back
1852    
1853    =cut
1854    
1855    sub AttributeTable {
1856        # Get the parameters.
1857        my ($cgi, @attrList) = @_;
1858        # Accumulate the table rows.
1859        my @html = ();
1860        for my $attrData (@attrList) {
1861            # Format the object ID and key.
1862            my @columns = map { CGI::escapeHTML($_) } @{$attrData}[0,1];
1863            # Now we format the values. These remain unchanged unless one of them is a URL.
1864            my $lastValue = scalar(@{$attrData}) - 1;
1865            push @columns, map { $_ =~ /^http:/ ? $cgi->a({ href => $_ }, $_) : $_ } @{$attrData}[2 .. $lastValue];
1866            # Assemble the values into a table row.
1867            push @html, $cgi->Tr($cgi->td(\@columns));
1868        }
1869        # Format the table in the return variable.
1870        my $retVal = $cgi->table({ border => 2 }, $cgi->Tr($cgi->th(['Object', 'Key', 'Values'])), @html);
1871        # Return it.
1872        return $retVal;
1873    }
1874    
1875    
1876    =head2 Internal Utility Methods
1877    
1878    =head3 _KeyTable
1879    
1880        my $tableName = $ca->_KeyTable($keyName);
1881    
1882    Return the name of the table that contains the attribute values for the
1883    specified key.
1884    
1885    Most attribute values are stored in the default table (usually C<HasValueFor>).
1886    Some, however, are placed in private tables by themselves for performance reasons.
1887    
1888    =over 4
1889    
1890    =item keyName (optional)
1891    
1892    Name of the attribute key whose table name is desired. If not specified, the
1893    entire key/table hash is returned.
1894    
1895    =item RETURN
1896    
1897    Returns the name of the table containing the specified attribute key's values,
1898    or a reference to a hash that maps key names to table names.
1899    
1900    =back
1901    
1902    =cut
1903    
1904    sub _KeyTable {
1905        # Get the parameters.
1906        my ($self, $keyName) = @_;
1907        # Declare the return variable.
1908        my $retVal;
1909        # Insure the key table hash is present.
1910        if (! exists $self->{keyTables}) {
1911            $self->{keyTables} = { map { $_->[0] => $_->[1] } $self->GetAll(['AttributeKey'],
1912                                                    "AttributeKey(relationship-name) <> ?",
1913                                                    [$self->{defaultRel}],
1914                                                    ['AttributeKey(id)', 'AttributeKey(relationship-name)']) };
1915        }
1916        # Get the key hash.
1917        my $keyHash = $self->{keyTables};
1918        # Does the user want a specific table or the whole thing?
1919        if ($keyName) {
1920            # Here we want a specific table. Is this key in the hash?
1921            if (exists $keyHash->{$keyName}) {
1922                # It's there, so return the specified table.
1923                $retVal = $keyHash->{$keyName};
1924            } else {
1925                # No, return the default table name.
1926                $retVal = $self->{defaultRel};
1927            }
1928        } else {
1929            # Here we want the whole hash.
1930            $retVal = $keyHash;
1931        }
1932        # Return the result.
1933        return $retVal;
1934    }
1935    
1936    
1937    =head3 _QueryResults
1938    
1939        my @attributeList = $attrDB->_QueryResults($query, $table, @values);
1940    
1941    Match the results of a query against value criteria and return
1942    the results. This is an internal method that splits the values coming back
1943    and matches the sections against the specified section patterns. It serves
1944    as the back end to L</GetAttributes> and L</FindAttributes>.
1945    
1946    =over 4
1947    
1948    =item query
1949    
1950    A query object that will return the desired records.
1951    
1952    =item table
1953    
1954    Name of the value table for the query.
1955    
1956    =item values
1957    
1958    List of the desired attribute values, section by section. If C<undef>
1959    or an empty string is specified, all values in that section will match. A
1960    generic match can be requested by placing a percent sign (C<%>) at the end.
1961    In that case, all values that match up to and not including the percent sign
1962    will match. You may also specify a regular expression enclosed
1963    in slashes. All values that match the regular expression will be returned. For
1964    performance reasons, only values have this extra capability.
1965    
1966    =item RETURN
1967    
1968    Returns a list of tuples. The first element in the tuple is an object ID, the
1969    second is an attribute key, and the remaining elements are the sections of
1970    the attribute value. All of the tuples will match the criteria set forth in
1971    the parameter list.
1972    
1973    =back
1974    
1975    =cut
1976    
1977    sub _QueryResults {
1978        # Get the parameters.
1979        my ($self, $query, $table, @values) = @_;
1980        # Declare the return value.
1981        my @retVal = ();
1982        # Get the number of value sections we have to match.
1983        my $sectionCount = scalar(@values);
1984        # Loop through the assignments found.
1985        while (my $row = $query->Fetch()) {
1986            # Get the current row's data.
1987            my ($id, $realKey, $subKey, $valueString) = $row->Values(["$table(to-link)",
1988                                                                      "$table(from-link)",
1989                                                                      "$table(subkey)",
1990                                                                      "$table(value)"
1991                                                                    ]);
1992            # Form the key from the real key and the sub key.
1993            my $key = $self->JoinKey($realKey, $subKey);
1994            # Break the value into sections.
1995            my @sections = split($self->{splitter}, $valueString);
1996            # Match each section against the incoming values. We'll assume we're
1997            # okay unless we learn otherwise.
1998            my $matching = 1;
1999            for (my $i = 0; $i < $sectionCount && $matching; $i++) {
2000                # We need to check to see if this section is generic.
2001                my $value = $values[$i];
2002                Trace("Current value pattern is \"$value\".") if T(4);
2003                if ($value =~ m#^/(.+)/[a-z]*$#) {
2004                    Trace("Regular expression detected.") if T(4);
2005                    # Here we have a regular expression match.
2006                    my $section = $sections[$i];
2007                    $matching = eval("\$section =~ $value");
2008                } else {
2009                    # Here we have a normal match.
2010                    Trace("SQL match used.") if T(4);
2011                    $matching = _CheckSQLPattern($values[$i], $sections[$i]);
2012                }
2013            }
2014            # If we match, output this row to the return list.
2015            if ($matching) {
2016                push @retVal, [$id, $key, @sections];
2017            }
2018        }
2019        # Return the rows found.
2020        return @retVal;
2021    }
2022    
2023    
2024    =head3 _LoadAttributeTable
2025    
2026        $attr->_LoadAttributeTable($tableName, $fileName, $stats, $mode);
2027    
2028    Load a file's data into an attribute table. This is an internal method
2029    provided for the convenience of L</LoadAttributesFrom>. It loads the
2030    specified file into the specified table and updates the statistics
2031    object.
2032    
2033    =over 4
2034    
2035    =item tableName
2036    
2037    Name of the table being loaded. This is usually C<HasValueFor>, but may
2038    be a different table for some specific attribute keys.
2039    
2040    =item fileName
2041    
2042    Name of the file containing a chunk of attribute data to load.
2043    
2044    =item stats
2045    
2046    Statistics object into which counts and times should be placed.
2047    
2048    =item mode
2049    
2050    Load mode for the file, usually C<low_priority>, C<concurrent>, or
2051    an empty string. The mode is used by some applications to control access
2052    to the table while it's being loaded. The default (empty string) is to lock the
2053    table until all the data's in place.
2054    
2055    =back
2056    
2057    =cut
2058    
2059    sub _LoadAttributeTable {
2060        # Get the parameters.
2061        my ($self, $tableName, $fileName, $stats, $mode) = @_;
2062        # Load the table from the file. Note that we don't do an analyze.
2063        # The analyze is done only after everything is complete.
2064        my $startTime = time();
2065        Trace("Loading attributes from $fileName: " . (-s $fileName) .
2066              " characters.") if T(3);
2067        my $loadStats = $self->LoadTable($fileName, $tableName,
2068                                         mode => $mode, partial => 1);
2069        # Record the load time.
2070        $stats->Add(insertTime => time() - $startTime);
2071        # Roll up the other statistics.
2072        $stats->Accumulate($loadStats);
2073    }
2074    
2075    
2076    =head3 _GetAllTables
2077    
2078        my @tables = $ca->_GetAllTables();
2079    
2080    Return a list of the names of all the tables used to store attribute
2081    values.
2082    
2083    =cut
2084    
2085    sub _GetAllTables {
2086        # Get the parameters.
2087        my ($self) = @_;
2088        # Start with the default table.
2089        my @retVal = $self->{defaultRel};
2090        # Add the tables named in the key hash. These tables are automatically
2091        # NOT the default, and each can only occur once, because alternate tables
2092        # are allocated on a per-key basis.
2093        my $keyHash = $self->_KeyTable();
2094        push @retVal, values %$keyHash;
2095        # Return the result.
2096        return @retVal;
2097    }
2098    
2099    
2100    =head3 _SplitKeyPattern
2101    
2102        my ($realKey, $subKey) = $ca->_SplitKeyPattern($keyChoice);
2103    
2104    Split a key pattern into the main part (the I<real key>) and a sub-part
2105    (the I<sub key>). This method differs from L</SplitKey> in that it treats
2106    the key as an SQL pattern instead of a raw string. Also, if there is no
2107    incoming sub-part, the sub-key will be undefined instead of an empty
2108    string.
2109    
2110    =over 4
2111    
2112    =item keyChoice
2113    
2114    SQL key pattern to be examined. This can either be a literal, an SQL pattern,
2115    a literal with an internal splitter code (usually C<::>) or an SQL pattern with
2116    an internal splitter. Note that the only SQL pattern we support is a percent
2117    sign (C<%>) at the end. This is the way we've declared things in the documentation,
2118    so users who try anything else will have problems.
2119    
2120    =item RETURN
2121    
2122    Returns a two-element list. The first element is the SQL pattern for the
2123    real key and the second is the SQL pattern for the sub-key. If the value
2124    for either one does not matter (e.g., the user wants a real key value of
2125    C<iedb> and doesn't care about the sub-key value), it will be undefined.
2126    
2127    =back
2128    
2129    =cut
2130    
2131    sub _SplitKeyPattern {
2132        # Get the parameters.
2133        my ($self, $keyChoice) = @_;
2134        # Declare the return variables.
2135        my ($realKey, $subKey);
2136        # Look for a splitter in the input.
2137        if ($keyChoice =~ /^(.*?)$self->{splitter}(.*)/) {
2138            # We found one. This means we can treat both sides of the
2139            # splitter as known patterns.
2140            ($realKey, $subKey) = ($1, $2);
2141        } elsif ($keyChoice =~ /%$/) {
2142            # Here we have a generic pattern for the whole key. The pattern
2143            # is treated as the correct pattern for the real key, but the
2144            # sub-key is considered to be wild.
2145            $realKey = $keyChoice;
2146        } else {
2147            # Here we have a literal pattern for the whole key. The pattern
2148            # is treated as the correct pattern for the real key, and the
2149            # sub-key is required to be blank.
2150            $realKey = $keyChoice;
2151            $subKey = '';
2152        }
2153        # Return the results.
2154        return ($realKey, $subKey);
2155    }
2156    
2157    
2158    =head3 _WherePart
2159    
2160        my ($sqlClause, $escapedValue) = _WherePart($tableName, $fieldName, $sqlPattern);
2161    
2162    Return the SQL clause and value for checking a field against the
2163    specified SQL pattern value. If the pattern is generic (ends in a C<%>),
2164    then a C<LIKE> expression is returned. Otherwise, an equality expression
2165    is returned. We take in information describing the field being checked,
2166    and the pattern we're checking against it. The output is a WHERE clause
2167    fragment for the comparison and a value to be used as a bound parameter
2168    value for the clause.
2169    
2170    =over 4
2171    
2172    =item tableName
2173    
2174    Name of the table containing the field we want checked by the clause.
2175    
2176    =item fieldName
2177    
2178    Name of the field to check in that table.
2179    
2180    =item sqlPattern
2181    
2182    Pattern to be compared against the field. If the last character is a percent sign
2183    (C<%>), it will be treated as a generic SQL pattern; otherwise, it will be treated
2184    as a literal.
2185    
2186    =item RETURN
2187    
2188    Returns a two-element list. The first element will be an SQL comparison expression
2189    and the second will be the value to be used as a bound parameter for the expression
2190    in order to
2191    
2192    =back
2193    
2194    =cut
2195    
2196    sub _WherePart {
2197        # Get the parameters.
2198        my ($tableName, $fieldName, $sqlPattern) = @_;
2199        # Declare the return variables.
2200        my ($sqlClause, $escapedValue);
2201        # Copy the pattern into the return area.
2202        $escapedValue = $sqlPattern;
2203        # Check the pattern. Is it generic or exact?
2204        if ($sqlPattern =~ /(.+)%$/) {
2205            # Yes, it is. We need a LIKE clause and we must escape the underscores
2206            # and percents in the pattern (except for the last one, of course).
2207            $escapedValue = $1;
2208            $escapedValue =~ s/(%|_)/\\$1/g;
2209            $escapedValue .= "%";
2210            $sqlClause = "$tableName($fieldName) LIKE ?";
2211        } else {
2212            # No, it isn't. We use an equality clause.
2213            $sqlClause = "$tableName($fieldName) = ?";
2214        }
2215        # Return the results.
2216        return ($sqlClause, $escapedValue);
2217    }
2218    
2219    
2220    =head3 _CheckSQLPattern
2221    
2222        my $flag = _CheckSQLPattern($pattern, $value);
2223    
2224    Return TRUE if the specified SQL pattern matches the specified value,
2225    else FALSE. The pattern is not a true full-blown SQL LIKE pattern: the
2226    only wild-carding allowed is a percent sign (C<%>) at the end.
2227    
2228    =over 4
2229    
2230    =item pattern
2231    
2232    SQL pattern to match against a value.
2233    
2234    =item value
2235    
2236    Value to match against an SQL pattern.
2237    
2238    =item RETURN
2239    
2240    Returns TRUE if the pattern matches the value, else FALSE.
2241    
2242    =back
2243    
2244    =cut
2245    
2246    sub _CheckSQLPattern {
2247        # Get the parameters.
2248        my ($pattern, $value) = @_;
2249        # Declare the return variable.
2250        my $retVal;
2251        # Check for a generic pattern.
2252        if ($pattern =~ /(.*)%$/) {
2253            # Here we have one. Do a substring match.
2254            $retVal = (substr($value, 0, length $1) eq $1);
2255        } else {
2256            # Here it's an exact match.
2257            $retVal = ($pattern eq $value);
2258        }
2259        # Return the result.
2260        return $retVal;
2261    }
2262    
2263  1;  1;

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.34

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3