[Bio] / Sprout / AttrDBRefresh.pl Repository:
ViewVC logotype

Annotation of /Sprout/AttrDBRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.23 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 AttrDBRefresh
4 :    
5 : parrello 1.5 This script performs useful function on the custom attributes database.
6 : parrello 1.1
7 :     The currently-supported command-line options are as follows.
8 :    
9 :     =over 4
10 :    
11 :     =item user
12 :    
13 :     Name suffix to be used for log files. If omitted, the PID is used.
14 :    
15 :     =item trace
16 :    
17 :     Numeric trace level. A higher trace level causes more messages to appear. The
18 : parrello 1.2 default trace level is C<3>.
19 : parrello 1.1
20 :     =item sql
21 :    
22 :     If specified, turns on tracing of SQL activity.
23 :    
24 :     =item background
25 :    
26 :     Save the standard and error output to files. The files will be created
27 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
28 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
29 :     B<user> option above.
30 :    
31 :     =item h
32 :    
33 :     Display this command's parameters and options.
34 :    
35 :     =item phone
36 :    
37 :     Phone number to message when the script is complete.
38 :    
39 : parrello 1.2 =item migrate
40 :    
41 : parrello 1.10 If specified, a comma-delimited list of attributes to be migrated from the old system to the
42 :     new one. The attributes will be erased before migration.
43 : parrello 1.2
44 : parrello 1.4 =item initializeAndClear
45 : parrello 1.2
46 : parrello 1.4 If specified, then the tables in the attribute database are dropped and re-created.
47 : parrello 1.2
48 : parrello 1.22 =item replace
49 : parrello 1.12
50 : parrello 1.22 If specified, existing keys will be erased before loading the attribute data. This
51 :     option only makes sense if C<load> is specified.
52 : parrello 1.12
53 : parrello 1.5 =item load
54 :    
55 :     If specified, the name of a file containing attribute data to be loaded into the
56 :     system. The file is presumed to be tab-delimited. The first column must be the
57 :     object ID, the second the attribute key name, and the remaining columns the
58 : parrello 1.22 attribute values. Existing attributes will be unchanged unless the C<replace>
59 :     option is specified.
60 : parrello 1.5
61 : parrello 1.7 =item loadKey
62 :    
63 :     If specified, the name of a tab-delimited file containing attribute key data. For each key,
64 :     there is a pair of lines. The first line contains the ID, data type, and description
65 :     of the key. The second line contains the marker C<#GROUPS> followed by zero or more
66 :     group names. The attribute will be connected to all the specified groups.
67 :    
68 : parrello 1.5 =item backup
69 :    
70 :     If specified, the name of a file into which all the attribute data should be
71 : parrello 1.7 dumped. The file itself will receive the attribute data in the format expected
72 :     by C<load>. A second file, consisting of the same file name with the suffix C<.key>,
73 :     will contain the attribute key data in the format expected by C<loadKey>.
74 : parrello 1.5
75 : parrello 1.10 =item compare
76 :    
77 :     If specified, the name of a file to contain a comparision report. The comparison report
78 :     contains a table of the attribute keys and the number of values of the key in the new
79 :     and old systems, along with a list of the attributes and values not in the new system.
80 :     The report is formatted as a web page.
81 :    
82 :     =item summary
83 :    
84 :     If specified, the name of a file to contain a summary report. The summary report
85 :     contains a table of the attribute keys and the number of values of each. The report
86 :     is formatted as a web page.
87 :    
88 :     =item trimSpaces
89 :    
90 :     If specified, the name of an attribute value backup file. The file will be processed to
91 :     remove excess spaces. The fixed file will have the same name as the incoming backup
92 :     file with the extension <.fixed>. This new file can then be reloaded using the
93 :     C<load> option.
94 :    
95 : parrello 1.13 =item showKeyDef
96 :    
97 :     If specified, the name of an attribute key. The key's descriptive data will be displayed.
98 :    
99 : parrello 1.14 =item mapSubkey
100 :    
101 :     Subkey mapping rule. Consists of a key name, followed by a substitution command enclosed in
102 :     slashes. For example, to remove the word C<ZINC> from the beginning of C<docking_results>
103 :     subkeys, you would code
104 :    
105 :     -mapSubkey=docking_results/^ZINC//
106 :    
107 : parrello 1.17 =item mapObjectID
108 :    
109 :     Object ID mapping rule. Consists of a key name, followed by a substitution command enclosed in
110 :     slashes. For example, to add the prefix C<fig|> to all the object IDs for the C<PRODOM> key,
111 :     you would code
112 :    
113 :     -mapObjectID=PRODOM/^/fig\|/
114 :    
115 : parrello 1.15 =item dockClean
116 :    
117 :     If specified, Predicted docking results will be removed from the attribute database.
118 :    
119 : parrello 1.19 =item resume
120 :    
121 : parrello 1.21 If specified, key-value pairs already in the database will not be reinserted.
122 :     Specify a number to start checking after the specified number of lines and
123 :     then admit everything after the first line not yet loaded. Specify C<careful>
124 :     to check every single line. Specify C<none> to ignore this option. The default
125 :     is C<none>. So, if you believe that a previous load failed somewhere after 50000
126 :     lines, a resume value of C<50000> would skip 50000 lines in the file, then
127 :     check each line after that until it finds one not already in the database. The
128 :     first such line found and all lines after that will be loaded. On the other
129 :     hand, if you have a file of 100000 records, and some have been loaded and some
130 :     not, you would use the word C<careful>, so that every line would be checked before
131 :     it is inserted. A resume of C<0> will start checking the first line of the
132 :     input file and then begin loading once it finds a line not in the database.
133 :    
134 :     =item chunkSize
135 :    
136 :     Number of lines to load in each burst. The default is 10,000. This option
137 :     is only used if C<load> is specified.
138 :    
139 :     =item mode
140 :    
141 :     C<concurrent> to use concurrent loading in MySQL or C<low_priority> to use
142 : parrello 1.23 low-priority loading in MySQL. If C<normal>, normal loading will be used. The
143 :     default is C<concurrent>. This option is only used if C<load> is specified.
144 : parrello 1.19
145 : parrello 1.1 =back
146 :    
147 :     =cut
148 :    
149 :     use strict;
150 :     use Tracer;
151 :     use Cwd;
152 :     use File::Copy;
153 :     use File::Path;
154 :     use CustomAttributes;
155 : parrello 1.5 use ERDBLoad;
156 : parrello 1.14 use Stats;
157 : parrello 1.10 use CGI;
158 : parrello 1.1
159 :     # Get the command-line options and parameters.
160 : parrello 1.21 my ($options, @parameters) = StandardSetup([qw(CustomAttributes ERDB DBKernel) ],
161 : parrello 1.1 {
162 : parrello 1.2 trace => [3, "trace level"],
163 : parrello 1.4 initializeAndClear => ["", "if specified, the tables of the attribute database will be re-created"],
164 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
165 : parrello 1.5 load => ["", "file from which to load attribute data"],
166 : parrello 1.7 loadKey => ["", "file from which to load attribute key data"],
167 : parrello 1.6 backup => ["", "file to which attribute data should be dumped"],
168 : parrello 1.10 compare => ["", "name of a file into which a comparison report will be written"],
169 :     summary => ["", "name of a file into which a summary report will be written"],
170 :     trimSpaces => ["", "if specified, the name of a backup file, which will be processed to remove excess spaces"],
171 : parrello 1.22 replace => ["", "if specified, data will be erased before loading from the load file"],
172 : parrello 1.13 showKeyDef => ["", "if specified, the name of a key whose descriptive data is to be displayed"],
173 : parrello 1.14 mapSubkey => ["", "instructions for fixing subkey values"],
174 : parrello 1.17 mapObjectID => ["", "instructions for fixing object ID values"],
175 : parrello 1.15 dockClean => ["", "if specified, Predicted docking results will be removed from the database"],
176 : parrello 1.21 resume => ["", "if specified, key-value pairs already in the database will not be inserted when loading from the load file"],
177 : parrello 1.23 mode => ["concurrent", "MySQL load mode to use"],
178 : parrello 1.21 chunksize => ["", "number of attributes to load in each burst"]
179 : parrello 1.1 },
180 :     "",
181 :     @ARGV);
182 :     # Set a variable to contain return type information.
183 :     my $rtype;
184 : parrello 1.10 # Create a CGI object.
185 :     my $cgi = CGI->new();
186 : parrello 1.1 # Insure we catch errors.
187 :     eval {
188 : parrello 1.3 # Insure we don't use the new attribute system for accessing the old attributes.
189 :     $FIG_Config::attrOld = 1;
190 : parrello 1.4 # Get the attribute database.
191 : parrello 1.14 Trace("Connecting to local attribute database.") if T(2);
192 : parrello 1.4 my $ca = CustomAttributes->new();
193 :     # Process according to the options selected.
194 : parrello 1.7 if ($options->{backup}) {
195 :     # Back up the attributes to the specified file.
196 :     my $backupFileName = $options->{backup};
197 :     Trace("Backing up attribute data.") if T(2);
198 :     my $stats = $ca->BackupAllAttributes($backupFileName);
199 :     Trace("Attribute backup statistics:\n" . $stats->Show()) if T(2);
200 :     Trace("Backing up key data.") if T(2);
201 :     $stats = $ca->BackupKeys("$backupFileName.key");
202 :     Trace("Key backup statistics:\n" . $stats->Show()) if T(2);
203 : parrello 1.5 }
204 : parrello 1.4 if ($options->{initializeAndClear}) {
205 :     # Create the tables.
206 :     $ca->CreateTables();
207 :     Trace("Tables recreated.") if T(2);
208 : parrello 1.2 }
209 : parrello 1.10 if ($options->{trimSpaces}) {
210 :     # Here we need to remove unnecessary spaces from an attribute values backup
211 :     # file. First, we open the input backup file.
212 :     my $fileName = $options->{trimSpaces};
213 :     my $ih = Open(undef, "<$fileName");
214 :     # Now we open the proposed output file.
215 :     my $oh = Open(undef, ">$fileName.fixed");
216 : parrello 1.11 # Create a statistics object to track our progress.
217 :     my $stats = Stats->new('lines', 'trims');
218 : parrello 1.10 Trace("Cleaning $fileName and copying to $fileName.fixed.") if T(2);
219 :     # Loop through the input file.
220 :     while (! eof $ih) {
221 :     # Get the next record in the input file.
222 : parrello 1.11 $stats->Add(lines => 1);
223 : parrello 1.10 my ($id, $key, @values) = Tracer::GetLine($ih);
224 :     # Trim the values.
225 : parrello 1.11 for my $value (@values) {
226 :     if ($value =~ /(\S.+\S)\s+/) {
227 :     $value = $1;
228 :     $stats->Add(trims => 1);
229 :     }
230 :     }
231 : parrello 1.10 # Write the result to the output file.
232 :     Tracer::PutLine($oh, [$id, $key, @values]);
233 :     }
234 :     # Close the files.
235 :     close $ih;
236 :     close $oh;
237 : parrello 1.11 Trace("$fileName.fixed is now a cleaned backup.\n" . $stats->Show()) if T(2);
238 : parrello 1.10 }
239 : parrello 1.7 if ($options->{loadKey}) {
240 :     # We want to load the attribute data from the specified file, but
241 :     # first we need to verify that the file exists.
242 :     my $loadFileName = $options->{loadKey};
243 :     if (! -f $loadFileName) {
244 :     Confess("Cannot load keys: file \"$loadFileName\" is not found or not a file.");
245 :     } else {
246 :     Trace("Loading key data from $loadFileName.") if T(2);
247 :     my $stats = $ca->RestoreKeys($loadFileName);
248 :     Trace("Load statistics:\n" . $stats->Show()) if T(2);
249 :     }
250 :     }
251 : parrello 1.13 if ($options->{showKeyDef}) {
252 :     # We want to display the identified key's description. Get the key name.
253 :     my $name = $options->{showKeyDef};
254 :     # Look for keys with the same name.
255 :     my %keys = $ca->GetAttributeData('find', $options->{showKeyDef});
256 :     # See if we found the key.
257 :     if (! $keys{$name}) {
258 :     print "Key $name not found.\n";
259 :     } else {
260 :     print "Description for $name.\n\n";
261 :     print $keys{$name}->[1];
262 :     print "\n\n";
263 :     }
264 :     }
265 : parrello 1.5 if ($options->{load}) {
266 :     # We want to load the attribute data from the specified file, but
267 :     # first we need to verify that the file exists.
268 :     my $loadFileName = $options->{load};
269 :     if (! -f $loadFileName) {
270 :     Confess("Cannot load: file \"$loadFileName\" is not found or not a file.");
271 :     } else {
272 : parrello 1.19 # Set up options. We may need to specify the append and resume options, and
273 :     # we need to archive.
274 : parrello 1.12 my %loadOptions;
275 : parrello 1.22 if ($options->{replace}) {
276 :     $loadOptions{append} = 0;
277 :     } else {
278 : parrello 1.12 $loadOptions{append} = 1;
279 :     }
280 : parrello 1.19 if ($options->{resume}) {
281 :     $loadOptions{resume} = 1;
282 :     }
283 : parrello 1.21 if ($options->{mode}) {
284 :     $loadOptions{mode} = $options->{mode};
285 :     }
286 : parrello 1.19 # Insure we have the archive directory available.
287 :     my $archiveDirectory = "$FIG_Config::fig/AttribData";
288 :     Tracer::Insure($archiveDirectory, 0777);
289 :     # Create an archive file name from the current time and the PID.
290 :     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time());
291 :     $loadOptions{archive} = "$archiveDirectory/attrSave$$.$mon.$mday.$year.$hour.$min.$sec.log";
292 : parrello 1.12 # Now we can load.
293 : parrello 1.5 Trace("Loading attribute data from $loadFileName.") if T(2);
294 : parrello 1.12 my $stats = $ca->LoadAttributesFrom($loadFileName, %loadOptions);
295 : parrello 1.5 Trace("Load statistics:\n" . $stats->Show()) if T(2);
296 :     }
297 :     }
298 : parrello 1.10 if ($options->{summary}) {
299 :     # Here we have a summary report. The value of the option is the name of a file that is to
300 :     # contain an html-formatted report. We start by getting a complete list of the keys and
301 :     # the associated counts.
302 :     my $keyCounts = GetAllKeys($ca);
303 :     # Buffer the lines in the following list.
304 :     my @lines = ();
305 :     # Start the table.
306 :     push @lines, $cgi->start_table({border => 2});
307 :     push @lines, $cgi->Tr($cgi->th({align => 'left'}, 'Key Name'), $cgi->th({align => 'right'}, 'Values'));
308 :     # Loop through the key hash, building row data.
309 :     for my $key (sort keys %{$keyCounts}) {
310 :     push @lines, $cgi->Tr($cgi->td({align => 'left'}, $key), $cgi->td({align => 'right'}, $keyCounts->{$key}));
311 :     }
312 :     # Close off the table.
313 :     push @lines, $cgi->end_table();
314 :     # Output the page.
315 :     WritePage($options->{summary}, 'Attribute Key Summary', \@lines);
316 :     }
317 :     if ($options->{compare}) {
318 :     # Here we have a comparison report. The value of the option is the name of a file that is
319 :     # to contain an html-formatted report. We need access to the SEED database to pull this
320 :     # off.
321 : parrello 1.21 Trace("Connecting to FIG object.") if T(2);
322 :     require FIG;
323 :     my $fig = FIG->new();
324 : parrello 1.10 my $dbh = $fig->db_handle();
325 :     Trace("Retrieving key data.") if T(2);
326 :     # Get counts for all the keys in the old system.
327 :     my $oldKeys = $dbh->SQL("SELECT tag, COUNT(*) FROM attribute GROUP BY tag");
328 :     # Insure the query worked.
329 :     if (! defined($oldKeys)) {
330 :     Trace("Database error retrieving old system keys:" . $dbh->errstr) if T(0);
331 :     } else {
332 :     # Convert the key data into a hash.
333 :     my %oldKeyHash = map { $_->[0] => $_->[1] } @{$oldKeys};
334 :     # Get the counts for all the keys in the new system.
335 :     my $newKeyHashRef = GetNewKeyCounts($ca);
336 :     # We've got our data, so the next step is to start accumulating the lines of the web page.
337 :     Trace("Processing key table for new system.") if T(2);
338 :     my @lines = ();
339 :     # Start the first table.
340 :     push @lines, $cgi->h3("New System Keys");
341 :     push @lines, $cgi->start_table({border => 2});
342 :     push @lines, $cgi->Tr($cgi->th({align => 'left'}, 'Key Name'), $cgi->th({align => 'right'}, 'New Values'),
343 :     $cgi->th({align => 'right'}, 'Old Values'));
344 :     # Now we process the new keys. As we run through them, we'll delete matching keys from
345 :     # the old key hash. The remaining keys will be output as a missing-keys table.
346 :     for my $key (sort keys %{$newKeyHashRef}) {
347 :     # Get the new system count.
348 :     my $newCount = $newKeyHashRef->{$key};
349 :     # Default to a blank in the old system count column.
350 :     my $oldCount = '&nbsp;';
351 :     # Check to see if this key exists in the old system.
352 :     if (exists $oldKeyHash{$key}) {
353 :     # If it does, save its count and delete it from the old system hash.
354 :     $oldCount = $oldKeyHash{$key};
355 :     delete $oldKeyHash{$key};
356 :     }
357 :     # Output this table row.
358 :     push @lines, $cgi->Tr($cgi->td({align => 'left'}, $key), $cgi->td({align => 'right'}, $newCount),
359 :     $cgi->td({align => 'right'}, $oldCount));
360 :     }
361 :     # Close the table.
362 :     push @lines, $cgi->end_table();
363 :     # Now the remaining keys in the old key hash are missing from the new system. We create a new table
364 :     # to display them.
365 :     my @missingKeys = sort keys %oldKeyHash;
366 :     # Only do this if there's at least one missing key.
367 :     if (@missingKeys == 0) {
368 :     push @lines, $cgi->p("No missing keys found.");
369 :     } else {
370 :     Trace("Processing missing key table.") if T(2);
371 :     # Start the second table.
372 :     push @lines, $cgi->h3("Keys Missing from New System");
373 :     push @lines, $cgi->start_table({border => 2});
374 :     push @lines, $cgi->Tr($cgi->th({align => 'left'}, 'Key Name'), $cgi->td({align => 'right'}, 'Values'));
375 :     # Loop through the missing keys, writing them to the table.
376 :     for my $key (@missingKeys) {
377 :     push @lines, $cgi->Tr($cgi->td({align => 'left'}, $key), $cgi->td({align => 'right'}, $oldKeyHash{$key}));
378 :     }
379 :     # Close the table.
380 :     push @lines, $cgi->end_table();
381 :     }
382 :     # Write the web page.
383 :     WritePage($options->{compare}, 'Attribute System Comparison', \@lines);
384 :     }
385 :     }
386 : parrello 1.17 if ($options->{mapSubkey} || $options->{mapObjectID}) {
387 : parrello 1.14 # Parse out the main key.
388 : parrello 1.17 my $type = ($options->{mapSubkey} ? 'mapSubkey' : 'mapObjectID');
389 :     my $mapThing = $options->{$type};
390 :     my $field = ($options->{mapSubkey} ? 'subkey' : 'to-link');
391 :     if ($mapThing =~ m#([^/]+)(/.+)#) {
392 : parrello 1.14 my ($keyName, $pattern) = ($1, $2);
393 : parrello 1.17 Trace("Processing $type mapping for $keyName with pattern s$pattern.") if T(2);
394 : parrello 1.14 # Create a statistics object.
395 :     my $results = Stats->new();
396 : parrello 1.17 # Get all the field values.
397 :     my %things = map { $_ => 1 } $ca->GetFlat(['HasValueFor'], "HasValueFor(from-link) = ?", [$keyName],
398 :     "HasValueFor($field)");
399 :     my $totalThings = scalar keys %things;
400 :     Trace("$totalThings ${field}s found.") if T(2);
401 :     # Loop through the values, doing updates where necessary.
402 :     for my $thing (keys %things) {
403 : parrello 1.14 # Count this subkey.
404 : parrello 1.17 my $thingCount = $results->Add("${field}s" => 1);
405 : parrello 1.14 # Apply the substitution.
406 : parrello 1.17 my $newThing = $thing;
407 :     eval("\$newThing =~ s$pattern");
408 : parrello 1.14 # If the evaluation resulted in an error, stop immediately.
409 :     if ($@) {
410 :     Confess("Error in substitution pattern: $@");
411 : parrello 1.17 } elsif ($newThing ne $thing) {
412 : parrello 1.14 # Here the substitution worked and it changed the key value.
413 :     # We need to update the database.
414 :     $results->Add(updates => 1);
415 : parrello 1.17 my $count = $ca->UpdateField("HasValueFor($field)", $thing, $newThing,
416 : parrello 1.14 "HasValueFor(from-link) = ?", [$keyName]);
417 :     $results->Add(rowsChanged => $count);
418 :     }
419 :     # Trace our progress.
420 : parrello 1.17 if ($thingCount % 100 == 0) {
421 : parrello 1.18 Trace("$thingCount processed.") if T(3);
422 : parrello 1.14 }
423 :     }
424 :     # Display the statistics.
425 : parrello 1.17 Trace("Statistics from $field update:\n" . $results->Show()) if T(2);
426 : parrello 1.14 } else {
427 :     # Here the incoming parameter was in the wrong format. Mostly this means there
428 :     # was nothing before the slash or no slash was found.
429 : parrello 1.17 Confess("Invalid substitution syntax in map option.");
430 : parrello 1.14 }
431 :     }
432 : parrello 1.15 if ($options->{dockClean}) {
433 :     # Get the list of PDBs with results.
434 :     my @pdbList = sort map { $_->[0] } $ca->GetAttributes(undef, 'has_results');
435 :     # Loop through the PDB IDs.
436 :     for my $pdbID (@pdbList) {
437 :     Trace("Processing $pdbID.") if T(3);
438 :     # Loop until we run out of rows to delete.
439 :     my $thisCount = 1;
440 :     my $totalCount = 0;
441 :     while ($thisCount) {
442 :     # Delete a bunch of rows. To avoid a timeout, we limit the results.
443 : parrello 1.16 $thisCount = $ca->DeleteLike('HasValueFor', 'HasValueFor(to-link) = ? AND HasValueFor(value) LIKE ? LIMIT 10000',
444 : parrello 1.15 [$pdbID, '%Predicted']);
445 :     $totalCount += $thisCount;
446 :     Trace("$thisCount rows deleted in batch. $totalCount total deletions for pdb $pdbID.") if T(3);
447 :     }
448 :     }
449 :     }
450 : parrello 1.2 Trace("Processing complete.") if T(2);
451 : parrello 1.1 };
452 :     if ($@) {
453 :     Trace("Script failed with error: $@") if T(0);
454 :     $rtype = "error";
455 :     } else {
456 :     Trace("Script complete.") if T(2);
457 :     $rtype = "no error";
458 :     }
459 :     if ($options->{phone}) {
460 :     my $msgID = Tracer::SendSMS($options->{phone}, "RefreshAttrDB terminated with $rtype.");
461 :     if ($msgID) {
462 :     Trace("Phone message sent with ID $msgID.") if T(2);
463 :     } else {
464 :     Trace("Phone message not sent.") if T(2);
465 :     }
466 :     }
467 :    
468 : parrello 1.4 =head3 MigrateAttributes
469 :    
470 : parrello 1.20 my $stats = MigrateAttributes($ca, $fig, $preserve);
471 : parrello 1.4
472 :     Migrate all the attributes data from the specified FIG instance. This is a long, slow
473 :     method used to convert the old attribute data to the new system. Only attribute
474 :     keys that are already in the database will be loaded, and they will completely
475 :     replace the existing values for those keys. Therefore, it is very important that the
476 :     FIG instance not be connected to the attribute database.
477 :    
478 :     =over 4
479 :    
480 :     =item ca
481 :    
482 :     B<CustomAttributes> object used to access the attribute database.
483 :    
484 :     =item fig
485 :    
486 :     A FIG object that can be used to retrieve attributes for migration purposes.
487 :    
488 : parrello 1.6 =item preserve (optional)
489 :    
490 :     A comma-delimited list of attributes that are not to be migrated.
491 :    
492 : parrello 1.4 =item RETURN
493 :    
494 :     Returns a statistical object for the load process.
495 :    
496 :     =back
497 :    
498 :     =cut
499 :    
500 :     sub MigrateAttributes {
501 :     # Get the parameters.
502 : parrello 1.6 my ($ca, $fig, $preserve) = @_;
503 : parrello 1.4 # Create the return value.
504 : parrello 1.5 my $retVal = Stats->new('keysIn');
505 :     # Create a loader for the value table.
506 :     my $hasValueFor = ERDBLoad->new($ca, 'HasValueFor', $FIG_Config::temp);
507 :     # Create a hash for the target objects.
508 :     my %targetObjectHash = ();
509 : parrello 1.6 # Get a list of the attributes we're to preserve.
510 :     my %preserve = ();
511 :     if (defined $preserve) {
512 :     %preserve = map { $_ => 1 } split /\s*,\s*/, $preserve;
513 :     }
514 :     # Put the preserved keys into the load file.
515 :     for my $key (keys %preserve) {
516 :     Trace("Preserving key $key.") if T(3);
517 :     my @newValues = $ca->GetAttributes(undef, $key);
518 :     Trace(scalar(@newValues) . " values of $key will be preserved.");
519 :     # Put the values into the load file.
520 :     PutValue($hasValueFor, $ca, @newValues);
521 :     }
522 : parrello 1.4 # Get a list of all our attribute keys.
523 : parrello 1.6 my @allKeys = $ca->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');
524 :     # Delete the preserved keys.
525 :     my @keys = grep { ! $preserve{$_} } @allKeys;
526 :     # Loop through the reset, building the load files.
527 : parrello 1.4 for my $key (@keys) {
528 :     Trace("Migrating key $key.") if T(3);
529 :     $retVal->Add(keysIn => 1);
530 :     # Get all the values of the specified key.
531 :     my @oldValues = $fig->get_attributes(undef, $key);
532 :     my $count = scalar(@oldValues);
533 :     Trace("$count values found for $key in source system.") if T(3);
534 : parrello 1.6 # Put the values into the load file.
535 :     PutValue($hasValueFor, $ca, @oldValues);
536 : parrello 1.4 }
537 : parrello 1.5 # Close and finish the loads to upload the data.
538 :     Trace("Closing value table.") if T(2);
539 :     my $hvfStats = $hasValueFor->FinishAndLoad();
540 :     Trace("Statistics from value table load:\n" . $hvfStats->Show()) if T(2);
541 :     # Merge the statistics.
542 :     $retVal->Accumulate($hvfStats);
543 : parrello 1.4 # Return the statistics object.
544 :     return $retVal;
545 :     }
546 :    
547 : parrello 1.6 =head3 PutValue
548 :    
549 : parrello 1.20 PutValue($hasValueFor, $ca, @values);
550 : parrello 1.6
551 :     Put the values from an attribute value list into a HasValueFor load file.
552 :    
553 :     =over 4
554 :    
555 :     =item hasValueFor
556 :    
557 :     Load object for the HasValueFor table.
558 :    
559 :     =item ca
560 :    
561 :     A CustomAttribute object. We get the splitter value from it.
562 :    
563 :     =item value
564 :    
565 :     A list of tuples, each consisting of an object ID, a key name, and one or more values.
566 :    
567 :     =back
568 :    
569 :     =cut
570 :    
571 :     sub PutValue {
572 :     # Get the parameters.
573 :     my ($hasValueFor, $ca, @values) = @_;
574 :     # Loop through the value rows.
575 :     for my $row (@values) {
576 :     # Get this row's data.
577 :     my ($id, $key, @values) = @{$row};
578 :     # Format the values.
579 :     my $valueString = join($ca->{splitter}, @values);
580 :     # Add the value.
581 :     $hasValueFor->Put($key, $id, $valueString);
582 :     }
583 :     }
584 :    
585 :     =head3 MigrateCollections
586 :    
587 : parrello 1.20 my $stats = MigrateCollections($ca, $fig);
588 : parrello 1.6
589 :     This method copies the collection data from the specified FIG object and stores it as values
590 :     of the C<collection> attribute in the specified custom attribute database.
591 :    
592 :     =over 4
593 :    
594 :     =item ca
595 :    
596 :     Custom attribute database into which the collections are to be stored.
597 :    
598 :     =item fig
599 :    
600 :     FIG object from which the collection attributes are to be harvested.
601 :    
602 :     =item RETURN
603 :    
604 :     Returns a statistics object with informatino about the migration.
605 :    
606 :     =back
607 :    
608 :     =cut
609 :    
610 :     sub MigrateCollections {
611 :     # Get the parameters.
612 :     my ($ca, $fig) = @_;
613 :     # Declare the return variable.
614 :     my $retVal = Stats->new();
615 :     # Get the collection names.
616 :     my @collections = qw(higher_plants eukaryotic_ps nonoxygenic_ps hundred_hundred functional_coupling_paper ecoli_essentiality_paper);
617 :     # Erase the current collection date.
618 :     $ca->EraseAttribute('collection');
619 :     # Loop through the collection attributes.
620 :     for my $cname (@collections) {
621 :     $retVal->Add(collection => 1);
622 :     # Get this attribute from the old system.
623 :     my @rows = $fig->get_attributes(undef, $cname);
624 :     # Loop through its values.
625 :     for my $row (@rows) {
626 :     $retVal->Add($cname => 1);
627 :     # Determine the object key.
628 :     my $objectID = ($row->[0] eq 'Subsystem' ? $row->[2] : $row->[0]);
629 :     $ca->AddAttribute($objectID, 'collection', $cname);
630 :     }
631 :     }
632 :     # Return the statistics.
633 :     return $retVal;
634 :     }
635 :    
636 : parrello 1.10 =head3 GetAllKeys
637 :    
638 : parrello 1.20 my @keys = GetAllKeys($ca);
639 : parrello 1.10
640 :     Return a sorted list of the attribute keys.
641 :    
642 :     =over 4
643 :    
644 :     =item ca
645 :    
646 :     CustomAttributes object used to access the database.
647 :    
648 :     =item RETURN
649 :    
650 :     Returns a sorted list of all the attribute keys.
651 :    
652 :     =back
653 :    
654 :     =cut
655 :    
656 :     sub GetAllKeys {
657 :     # Get the parameters.
658 :     my ($ca) = @_;
659 :     # Get the attribute data.
660 :     my %keyData = $ca->GetAttributeData('name', '');
661 :     # Sort the keys.
662 :     my @retVal = sort keys %keyData;
663 :     # Return the result.
664 :     return @retVal;
665 :     }
666 :    
667 :     =head3 OpenPage
668 :    
669 : parrello 1.20 my $fh = OpenPage($fileName, $title);
670 : parrello 1.10
671 :     Start writing an HTML page to a file and return the file handle.
672 :    
673 :     =over 4
674 :    
675 :     =item fileName
676 :    
677 :     Name of the file to which the page will be written.
678 :    
679 :     =item title
680 :    
681 :     Title for the page.
682 :    
683 :     =item RETURN
684 :    
685 :     Returns the file handle for writing the rest of the page.
686 :    
687 :     =back
688 :    
689 :     =cut
690 :    
691 :     sub OpenPage {
692 :     # Get the parameters.
693 :     my ($fileName, $title) = @_;
694 :     # Open the file.
695 :     my $retVal = Open(undef, ">$fileName");
696 :     # Write out the HTML headers.
697 :     print $retVal $cgi->start_html(-title => $title, -BGCOLOR => 'silver');
698 :     print $retVal "\n";
699 :     # Return the file handle.
700 :     return $retVal;
701 :     }
702 :    
703 :     =head3 GetNewKeyCounts
704 :    
705 : parrello 1.20 my %counts = GetNewKeyCounts($ca);
706 : parrello 1.10
707 :     Return a hash mapping attribute key names to counts.
708 :    
709 :     =over 4
710 :    
711 :     =item ca
712 :    
713 :     CustomAttributes object for accessing the attribute database.
714 :    
715 :     =item RETURN
716 :    
717 :     Returns a reference to a hash mapping each key name to a count of the key's values.
718 :    
719 :     =back
720 :    
721 :     =cut
722 :    
723 :     sub GetNewKeyCounts {
724 :     # Get the parameters.
725 :     my ($ca) = @_;
726 :     # Declare the return variable.
727 :     my $retVal = {};
728 :     # Get all of the keys.
729 :     my @keys = GetAllKeys($ca);
730 :     # Loop through the list, filling the hash.
731 :     for my $key (@keys) {
732 :     my $count = $ca->GetCount(['HasValueFor'], "HasValueFor(from-link) = ?", [$key]);
733 :     $retVal->{$key} = $count;
734 :     }
735 :     # Return the result.
736 :     return $retVal;
737 :     }
738 :    
739 :     =head3 WritePage
740 :    
741 : parrello 1.20 WritePage($fileName, $title, \@lines);
742 : parrello 1.10
743 :     Write the specified lines to the specified file as a web page. The lines are assumed to be raw
744 :     HTML body text. They will be preceded by a standard HTML header and followed by a standard
745 :     HTML footer.
746 :    
747 :     =over 4
748 :    
749 :     =item fileName
750 :    
751 :     Name of the output file.
752 :    
753 :     =item title
754 :    
755 :     Title for the web page.
756 :    
757 :     =item lines
758 :    
759 :     Reference to a list of lines of HTML.
760 :    
761 :     =back
762 :    
763 :     =cut
764 :    
765 :     sub WritePage {
766 :     # Get the parameters.
767 :     my ($fileName, $title, $lines) = @_;
768 :     # Open the file and write the header to it. The header includes everything up to and including
769 :     # the BODY tag.
770 :     Trace("Writing web page to $fileName.") if T(2);
771 :     my $oh = OpenPage($fileName, $title);
772 :     # Write the lines one at a time.
773 :     for my $line (@{$lines}) {
774 :     print $oh "$line\n";
775 :     }
776 :     # Write the HTML footer.
777 :     print $oh $cgi->end_html();
778 :     # Close the output file.
779 :     close $oh;
780 :     Trace("Web page created in $fileName.") if T(2);
781 :     }
782 :    
783 : parrello 1.20 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3