[Bio] / Sprout / AttrDBRefresh.pl Repository:
ViewVC logotype

Annotation of /Sprout/AttrDBRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 AttrDBRefresh
4 :    
5 : parrello 1.5 This script performs useful function on the custom attributes database.
6 : parrello 1.1
7 :     The currently-supported command-line options are as follows.
8 :    
9 :     =over 4
10 :    
11 :     =item user
12 :    
13 :     Name suffix to be used for log files. If omitted, the PID is used.
14 :    
15 :     =item trace
16 :    
17 :     Numeric trace level. A higher trace level causes more messages to appear. The
18 : parrello 1.2 default trace level is C<3>.
19 : parrello 1.1
20 :     =item sql
21 :    
22 :     If specified, turns on tracing of SQL activity.
23 :    
24 :     =item background
25 :    
26 :     Save the standard and error output to files. The files will be created
27 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
28 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
29 :     B<user> option above.
30 :    
31 :     =item h
32 :    
33 :     Display this command's parameters and options.
34 :    
35 :     =item phone
36 :    
37 :     Phone number to message when the script is complete.
38 :    
39 : parrello 1.2 =item migrate
40 :    
41 : parrello 1.10 If specified, a comma-delimited list of attributes to be migrated from the old system to the
42 :     new one. The attributes will be erased before migration.
43 : parrello 1.2
44 : parrello 1.4 =item initializeAndClear
45 : parrello 1.2
46 : parrello 1.4 If specified, then the tables in the attribute database are dropped and re-created.
47 : parrello 1.2
48 : parrello 1.12 =item appendLoad
49 :    
50 :     If specified, existing keys will not be erased before loading the attribute
51 :     data. This option only makes sense if C<load> is specified.
52 :    
53 : parrello 1.5 =item load
54 :    
55 :     If specified, the name of a file containing attribute data to be loaded into the
56 :     system. The file is presumed to be tab-delimited. The first column must be the
57 :     object ID, the second the attribute key name, and the remaining columns the
58 :     attribute values. All attribute keys mentioned in the file will be erased before
59 : parrello 1.12 loading unless the C<appendLoad> option is specified.
60 : parrello 1.5
61 : parrello 1.7 =item loadKey
62 :    
63 :     If specified, the name of a tab-delimited file containing attribute key data. For each key,
64 :     there is a pair of lines. The first line contains the ID, data type, and description
65 :     of the key. The second line contains the marker C<#GROUPS> followed by zero or more
66 :     group names. The attribute will be connected to all the specified groups.
67 :    
68 : parrello 1.5 =item backup
69 :    
70 :     If specified, the name of a file into which all the attribute data should be
71 : parrello 1.7 dumped. The file itself will receive the attribute data in the format expected
72 :     by C<load>. A second file, consisting of the same file name with the suffix C<.key>,
73 :     will contain the attribute key data in the format expected by C<loadKey>.
74 : parrello 1.5
75 : parrello 1.6 =item hh_copy
76 :    
77 :     If specified, the attributes used by the collections system will be converted from the
78 :     old system to the new one. In the old system, these attributes are assigned either to
79 :     a genome or to the word "Subsystem". If they are attached to the word "Subsystem", then
80 :     the value is the subsystem name; otherwise, the value is ignored. In the new attribute
81 :     system, the attribute key is always "collection" and the value is the collection name.
82 :     Rather than attaching attributes to the word "Subsystem", we attach them to the
83 :     subsystem itself.
84 :    
85 : parrello 1.10 =item compare
86 :    
87 :     If specified, the name of a file to contain a comparision report. The comparison report
88 :     contains a table of the attribute keys and the number of values of the key in the new
89 :     and old systems, along with a list of the attributes and values not in the new system.
90 :     The report is formatted as a web page.
91 :    
92 :     =item summary
93 :    
94 :     If specified, the name of a file to contain a summary report. The summary report
95 :     contains a table of the attribute keys and the number of values of each. The report
96 :     is formatted as a web page.
97 :    
98 :     =item trimSpaces
99 :    
100 :     If specified, the name of an attribute value backup file. The file will be processed to
101 :     remove excess spaces. The fixed file will have the same name as the incoming backup
102 :     file with the extension <.fixed>. This new file can then be reloaded using the
103 :     C<load> option.
104 :    
105 : parrello 1.13 =item showKeyDef
106 :    
107 :     If specified, the name of an attribute key. The key's descriptive data will be displayed.
108 :    
109 : parrello 1.14 =item mapSubkey
110 :    
111 :     Subkey mapping rule. Consists of a key name, followed by a substitution command enclosed in
112 :     slashes. For example, to remove the word C<ZINC> from the beginning of C<docking_results>
113 :     subkeys, you would code
114 :    
115 :     -mapSubkey=docking_results/^ZINC//
116 :    
117 : parrello 1.17 =item mapObjectID
118 :    
119 :     Object ID mapping rule. Consists of a key name, followed by a substitution command enclosed in
120 :     slashes. For example, to add the prefix C<fig|> to all the object IDs for the C<PRODOM> key,
121 :     you would code
122 :    
123 :     -mapObjectID=PRODOM/^/fig\|/
124 :    
125 : parrello 1.15 =item dockClean
126 :    
127 :     If specified, Predicted docking results will be removed from the attribute database.
128 :    
129 : parrello 1.19 =item resume
130 :    
131 :     If specified, attribute data already in the file will not be reloaded. This option only
132 :     makes sense if C<load> is specified.
133 :    
134 : parrello 1.1 =back
135 :    
136 :     =cut
137 :    
138 :     use strict;
139 :     use Tracer;
140 :     use DocUtils;
141 :     use TestUtils;
142 :     use Cwd;
143 :     use File::Copy;
144 :     use File::Path;
145 :     use CustomAttributes;
146 : parrello 1.5 use ERDBLoad;
147 : parrello 1.14 use Stats;
148 : parrello 1.2 use FIG;
149 : parrello 1.10 use CGI;
150 : parrello 1.1
151 :     # Get the command-line options and parameters.
152 : parrello 1.5 my ($options, @parameters) = StandardSetup([qw(CustomAttributes FIG) ],
153 : parrello 1.1 {
154 : parrello 1.2 trace => [3, "trace level"],
155 : parrello 1.4 initializeAndClear => ["", "if specified, the tables of the attribute database will be re-created"],
156 : parrello 1.2 migrate => ["", "if specified, attribute data will be migrated along with the object IDs"],
157 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
158 : parrello 1.5 load => ["", "file from which to load attribute data"],
159 : parrello 1.7 loadKey => ["", "file from which to load attribute key data"],
160 : parrello 1.6 backup => ["", "file to which attribute data should be dumped"],
161 : parrello 1.8 hh_copy => ["", "if specified, the attributes used by the collections system will be converted"],
162 : parrello 1.10 compare => ["", "name of a file into which a comparison report will be written"],
163 :     summary => ["", "name of a file into which a summary report will be written"],
164 :     trimSpaces => ["", "if specified, the name of a backup file, which will be processed to remove excess spaces"],
165 : parrello 1.12 appendLoad => ["", "if specified, no data will be erased before loading from the load file"],
166 : parrello 1.13 showKeyDef => ["", "if specified, the name of a key whose descriptive data is to be displayed"],
167 : parrello 1.14 mapSubkey => ["", "instructions for fixing subkey values"],
168 : parrello 1.17 mapObjectID => ["", "instructions for fixing object ID values"],
169 : parrello 1.15 dockClean => ["", "if specified, Predicted docking results will be removed from the database"],
170 : parrello 1.19 resume => ["", "if specified, key-value pairs already in the database will not be inserted when loading from the load file"]
171 : parrello 1.1 },
172 :     "",
173 :     @ARGV);
174 :     # Set a variable to contain return type information.
175 :     my $rtype;
176 : parrello 1.10 # Create a CGI object.
177 :     my $cgi = CGI->new();
178 : parrello 1.1 # Insure we catch errors.
179 :     eval {
180 : parrello 1.3 # Insure we don't use the new attribute system for accessing the old attributes.
181 :     $FIG_Config::attrOld = 1;
182 : parrello 1.2 # Get the FIG object.
183 : parrello 1.14 Trace("Connecting to legacy attribute database via FIG object.") if T(2);
184 : parrello 1.2 my $fig = FIG->new();
185 : parrello 1.4 # Get the attribute database.
186 : parrello 1.14 Trace("Connecting to local attribute database.") if T(2);
187 : parrello 1.4 my $ca = CustomAttributes->new();
188 :     # Process according to the options selected.
189 : parrello 1.7 if ($options->{backup}) {
190 :     # Back up the attributes to the specified file.
191 :     my $backupFileName = $options->{backup};
192 :     Trace("Backing up attribute data.") if T(2);
193 :     my $stats = $ca->BackupAllAttributes($backupFileName);
194 :     Trace("Attribute backup statistics:\n" . $stats->Show()) if T(2);
195 :     Trace("Backing up key data.") if T(2);
196 :     $stats = $ca->BackupKeys("$backupFileName.key");
197 :     Trace("Key backup statistics:\n" . $stats->Show()) if T(2);
198 : parrello 1.5 }
199 : parrello 1.4 if ($options->{initializeAndClear}) {
200 :     # Create the tables.
201 :     $ca->CreateTables();
202 :     Trace("Tables recreated.") if T(2);
203 : parrello 1.2 }
204 :     if ($options->{migrate}) {
205 : parrello 1.10 # Migrate data from the old system.
206 :     Trace("Migrating attributes: $options->{migrate}.") if T(2);
207 :     # Create a statistics object to track our progress.
208 :     my $stats = Stats->new('keys', 'values');
209 :     # Loop through the attributes specified.
210 :     my @keys = split /\s*,\s*/, $options->{migrate};
211 :     for my $key (@keys) {
212 :     # Look for the key in the new system.
213 :     if (! $ca->Exists('AttributeKey', $key)) {
214 :     # If it's not found, we have an error.
215 :     Trace("Attribute key $key not found in new system.") if T(0);
216 :     $stats->AddMessage("$key not found.");
217 :     $stats->Add(badKeys => 1);
218 :     } else {
219 :     # Denote we're processing another key.
220 :     $stats->Add(keys => 1);
221 :     # Get the values from the old system.
222 :     Trace("Retrieving values for key $key.") if T(3);
223 :     my @valueList = $fig->get_attributes(undef, $key);
224 :     Trace("Erasing existing values for key $key.") if T(3);
225 :     # Erase the values in the new attribute system.
226 :     $ca->EraseAttribute($key);
227 :     Trace("Moving values for $key into new system.") if T(3);
228 :     # Loop through the old system values, adding them to the new system.
229 :     for my $valueRow (@valueList) {
230 :     my ($id, $key, @values) = @{$valueRow};
231 :     $ca->AddAttribute($id, $key, @values);
232 :     }
233 :     # Record what we did.
234 :     my $count = scalar(@valueList);
235 :     Trace("$count values migrated for $key.") if T(3);
236 :     $stats->Add(values => $count);
237 :     }
238 :     }
239 : parrello 1.5 Trace("Migration statistics:\n" . $stats->Show()) if T(2);
240 :     }
241 : parrello 1.6 if ($options->{hh_copy}) {
242 :     # Migrate the collections keys to the new system.
243 :     Trace("Migrating collections data.") if T(2);
244 :     my $stats = MigrateCollections($ca, $fig);
245 :     Trace("Collection statistics:\n" . $stats->Show()) if T(2);
246 :     }
247 : parrello 1.10 if ($options->{trimSpaces}) {
248 :     # Here we need to remove unnecessary spaces from an attribute values backup
249 :     # file. First, we open the input backup file.
250 :     my $fileName = $options->{trimSpaces};
251 :     my $ih = Open(undef, "<$fileName");
252 :     # Now we open the proposed output file.
253 :     my $oh = Open(undef, ">$fileName.fixed");
254 : parrello 1.11 # Create a statistics object to track our progress.
255 :     my $stats = Stats->new('lines', 'trims');
256 : parrello 1.10 Trace("Cleaning $fileName and copying to $fileName.fixed.") if T(2);
257 :     # Loop through the input file.
258 :     while (! eof $ih) {
259 :     # Get the next record in the input file.
260 : parrello 1.11 $stats->Add(lines => 1);
261 : parrello 1.10 my ($id, $key, @values) = Tracer::GetLine($ih);
262 :     # Trim the values.
263 : parrello 1.11 for my $value (@values) {
264 :     if ($value =~ /(\S.+\S)\s+/) {
265 :     $value = $1;
266 :     $stats->Add(trims => 1);
267 :     }
268 :     }
269 : parrello 1.10 # Write the result to the output file.
270 :     Tracer::PutLine($oh, [$id, $key, @values]);
271 :     }
272 :     # Close the files.
273 :     close $ih;
274 :     close $oh;
275 : parrello 1.11 Trace("$fileName.fixed is now a cleaned backup.\n" . $stats->Show()) if T(2);
276 : parrello 1.10 }
277 : parrello 1.7 if ($options->{loadKey}) {
278 :     # We want to load the attribute data from the specified file, but
279 :     # first we need to verify that the file exists.
280 :     my $loadFileName = $options->{loadKey};
281 :     if (! -f $loadFileName) {
282 :     Confess("Cannot load keys: file \"$loadFileName\" is not found or not a file.");
283 :     } else {
284 :     Trace("Loading key data from $loadFileName.") if T(2);
285 :     my $stats = $ca->RestoreKeys($loadFileName);
286 :     Trace("Load statistics:\n" . $stats->Show()) if T(2);
287 :     }
288 :     }
289 : parrello 1.13 if ($options->{showKeyDef}) {
290 :     # We want to display the identified key's description. Get the key name.
291 :     my $name = $options->{showKeyDef};
292 :     # Look for keys with the same name.
293 :     my %keys = $ca->GetAttributeData('find', $options->{showKeyDef});
294 :     # See if we found the key.
295 :     if (! $keys{$name}) {
296 :     print "Key $name not found.\n";
297 :     } else {
298 :     print "Description for $name.\n\n";
299 :     print $keys{$name}->[1];
300 :     print "\n\n";
301 :     }
302 :     }
303 : parrello 1.5 if ($options->{load}) {
304 :     # We want to load the attribute data from the specified file, but
305 :     # first we need to verify that the file exists.
306 :     my $loadFileName = $options->{load};
307 :     if (! -f $loadFileName) {
308 :     Confess("Cannot load: file \"$loadFileName\" is not found or not a file.");
309 :     } else {
310 : parrello 1.19 # Set up options. We may need to specify the append and resume options, and
311 :     # we need to archive.
312 : parrello 1.12 my %loadOptions;
313 :     if ($options->{appendLoad}) {
314 :     $loadOptions{append} = 1;
315 :     }
316 : parrello 1.19 if ($options->{resume}) {
317 :     $loadOptions{resume} = 1;
318 :     }
319 :     # Insure we have the archive directory available.
320 :     my $archiveDirectory = "$FIG_Config::fig/AttribData";
321 :     Tracer::Insure($archiveDirectory, 0777);
322 :     # Create an archive file name from the current time and the PID.
323 :     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time());
324 :     $loadOptions{archive} = "$archiveDirectory/attrSave$$.$mon.$mday.$year.$hour.$min.$sec.log";
325 : parrello 1.12 # Now we can load.
326 : parrello 1.5 Trace("Loading attribute data from $loadFileName.") if T(2);
327 : parrello 1.12 my $stats = $ca->LoadAttributesFrom($loadFileName, %loadOptions);
328 : parrello 1.5 Trace("Load statistics:\n" . $stats->Show()) if T(2);
329 :     }
330 :     }
331 : parrello 1.10 if ($options->{summary}) {
332 :     # Here we have a summary report. The value of the option is the name of a file that is to
333 :     # contain an html-formatted report. We start by getting a complete list of the keys and
334 :     # the associated counts.
335 :     my $keyCounts = GetAllKeys($ca);
336 :     # Buffer the lines in the following list.
337 :     my @lines = ();
338 :     # Start the table.
339 :     push @lines, $cgi->start_table({border => 2});
340 :     push @lines, $cgi->Tr($cgi->th({align => 'left'}, 'Key Name'), $cgi->th({align => 'right'}, 'Values'));
341 :     # Loop through the key hash, building row data.
342 :     for my $key (sort keys %{$keyCounts}) {
343 :     push @lines, $cgi->Tr($cgi->td({align => 'left'}, $key), $cgi->td({align => 'right'}, $keyCounts->{$key}));
344 :     }
345 :     # Close off the table.
346 :     push @lines, $cgi->end_table();
347 :     # Output the page.
348 :     WritePage($options->{summary}, 'Attribute Key Summary', \@lines);
349 :     }
350 :     if ($options->{compare}) {
351 :     # Here we have a comparison report. The value of the option is the name of a file that is
352 :     # to contain an html-formatted report. We need access to the SEED database to pull this
353 :     # off.
354 :     my $dbh = $fig->db_handle();
355 :     Trace("Retrieving key data.") if T(2);
356 :     # Get counts for all the keys in the old system.
357 :     my $oldKeys = $dbh->SQL("SELECT tag, COUNT(*) FROM attribute GROUP BY tag");
358 :     # Insure the query worked.
359 :     if (! defined($oldKeys)) {
360 :     Trace("Database error retrieving old system keys:" . $dbh->errstr) if T(0);
361 :     } else {
362 :     # Convert the key data into a hash.
363 :     my %oldKeyHash = map { $_->[0] => $_->[1] } @{$oldKeys};
364 :     # Get the counts for all the keys in the new system.
365 :     my $newKeyHashRef = GetNewKeyCounts($ca);
366 :     # We've got our data, so the next step is to start accumulating the lines of the web page.
367 :     Trace("Processing key table for new system.") if T(2);
368 :     my @lines = ();
369 :     # Start the first table.
370 :     push @lines, $cgi->h3("New System Keys");
371 :     push @lines, $cgi->start_table({border => 2});
372 :     push @lines, $cgi->Tr($cgi->th({align => 'left'}, 'Key Name'), $cgi->th({align => 'right'}, 'New Values'),
373 :     $cgi->th({align => 'right'}, 'Old Values'));
374 :     # Now we process the new keys. As we run through them, we'll delete matching keys from
375 :     # the old key hash. The remaining keys will be output as a missing-keys table.
376 :     for my $key (sort keys %{$newKeyHashRef}) {
377 :     # Get the new system count.
378 :     my $newCount = $newKeyHashRef->{$key};
379 :     # Default to a blank in the old system count column.
380 :     my $oldCount = '&nbsp;';
381 :     # Check to see if this key exists in the old system.
382 :     if (exists $oldKeyHash{$key}) {
383 :     # If it does, save its count and delete it from the old system hash.
384 :     $oldCount = $oldKeyHash{$key};
385 :     delete $oldKeyHash{$key};
386 :     }
387 :     # Output this table row.
388 :     push @lines, $cgi->Tr($cgi->td({align => 'left'}, $key), $cgi->td({align => 'right'}, $newCount),
389 :     $cgi->td({align => 'right'}, $oldCount));
390 :     }
391 :     # Close the table.
392 :     push @lines, $cgi->end_table();
393 :     # Now the remaining keys in the old key hash are missing from the new system. We create a new table
394 :     # to display them.
395 :     my @missingKeys = sort keys %oldKeyHash;
396 :     # Only do this if there's at least one missing key.
397 :     if (@missingKeys == 0) {
398 :     push @lines, $cgi->p("No missing keys found.");
399 :     } else {
400 :     Trace("Processing missing key table.") if T(2);
401 :     # Start the second table.
402 :     push @lines, $cgi->h3("Keys Missing from New System");
403 :     push @lines, $cgi->start_table({border => 2});
404 :     push @lines, $cgi->Tr($cgi->th({align => 'left'}, 'Key Name'), $cgi->td({align => 'right'}, 'Values'));
405 :     # Loop through the missing keys, writing them to the table.
406 :     for my $key (@missingKeys) {
407 :     push @lines, $cgi->Tr($cgi->td({align => 'left'}, $key), $cgi->td({align => 'right'}, $oldKeyHash{$key}));
408 :     }
409 :     # Close the table.
410 :     push @lines, $cgi->end_table();
411 :     }
412 :     # Write the web page.
413 :     WritePage($options->{compare}, 'Attribute System Comparison', \@lines);
414 :     }
415 :     }
416 : parrello 1.17 if ($options->{mapSubkey} || $options->{mapObjectID}) {
417 : parrello 1.14 # Parse out the main key.
418 : parrello 1.17 my $type = ($options->{mapSubkey} ? 'mapSubkey' : 'mapObjectID');
419 :     my $mapThing = $options->{$type};
420 :     my $field = ($options->{mapSubkey} ? 'subkey' : 'to-link');
421 :     if ($mapThing =~ m#([^/]+)(/.+)#) {
422 : parrello 1.14 my ($keyName, $pattern) = ($1, $2);
423 : parrello 1.17 Trace("Processing $type mapping for $keyName with pattern s$pattern.") if T(2);
424 : parrello 1.14 # Create a statistics object.
425 :     my $results = Stats->new();
426 : parrello 1.17 # Get all the field values.
427 :     my %things = map { $_ => 1 } $ca->GetFlat(['HasValueFor'], "HasValueFor(from-link) = ?", [$keyName],
428 :     "HasValueFor($field)");
429 :     my $totalThings = scalar keys %things;
430 :     Trace("$totalThings ${field}s found.") if T(2);
431 :     # Loop through the values, doing updates where necessary.
432 :     for my $thing (keys %things) {
433 : parrello 1.14 # Count this subkey.
434 : parrello 1.17 my $thingCount = $results->Add("${field}s" => 1);
435 : parrello 1.14 # Apply the substitution.
436 : parrello 1.17 my $newThing = $thing;
437 :     eval("\$newThing =~ s$pattern");
438 : parrello 1.14 # If the evaluation resulted in an error, stop immediately.
439 :     if ($@) {
440 :     Confess("Error in substitution pattern: $@");
441 : parrello 1.17 } elsif ($newThing ne $thing) {
442 : parrello 1.14 # Here the substitution worked and it changed the key value.
443 :     # We need to update the database.
444 :     $results->Add(updates => 1);
445 : parrello 1.17 my $count = $ca->UpdateField("HasValueFor($field)", $thing, $newThing,
446 : parrello 1.14 "HasValueFor(from-link) = ?", [$keyName]);
447 :     $results->Add(rowsChanged => $count);
448 :     }
449 :     # Trace our progress.
450 : parrello 1.17 if ($thingCount % 100 == 0) {
451 : parrello 1.18 Trace("$thingCount processed.") if T(3);
452 : parrello 1.14 }
453 :     }
454 :     # Display the statistics.
455 : parrello 1.17 Trace("Statistics from $field update:\n" . $results->Show()) if T(2);
456 : parrello 1.14 } else {
457 :     # Here the incoming parameter was in the wrong format. Mostly this means there
458 :     # was nothing before the slash or no slash was found.
459 : parrello 1.17 Confess("Invalid substitution syntax in map option.");
460 : parrello 1.14 }
461 :     }
462 : parrello 1.15 if ($options->{dockClean}) {
463 :     # Get the list of PDBs with results.
464 :     my @pdbList = sort map { $_->[0] } $ca->GetAttributes(undef, 'has_results');
465 :     # Loop through the PDB IDs.
466 :     for my $pdbID (@pdbList) {
467 :     Trace("Processing $pdbID.") if T(3);
468 :     # Loop until we run out of rows to delete.
469 :     my $thisCount = 1;
470 :     my $totalCount = 0;
471 :     while ($thisCount) {
472 :     # Delete a bunch of rows. To avoid a timeout, we limit the results.
473 : parrello 1.16 $thisCount = $ca->DeleteLike('HasValueFor', 'HasValueFor(to-link) = ? AND HasValueFor(value) LIKE ? LIMIT 10000',
474 : parrello 1.15 [$pdbID, '%Predicted']);
475 :     $totalCount += $thisCount;
476 :     Trace("$thisCount rows deleted in batch. $totalCount total deletions for pdb $pdbID.") if T(3);
477 :     }
478 :     }
479 :     }
480 : parrello 1.2 Trace("Processing complete.") if T(2);
481 : parrello 1.1 };
482 :     if ($@) {
483 :     Trace("Script failed with error: $@") if T(0);
484 :     $rtype = "error";
485 :     } else {
486 :     Trace("Script complete.") if T(2);
487 :     $rtype = "no error";
488 :     }
489 :     if ($options->{phone}) {
490 :     my $msgID = Tracer::SendSMS($options->{phone}, "RefreshAttrDB terminated with $rtype.");
491 :     if ($msgID) {
492 :     Trace("Phone message sent with ID $msgID.") if T(2);
493 :     } else {
494 :     Trace("Phone message not sent.") if T(2);
495 :     }
496 :     }
497 :    
498 : parrello 1.4 =head3 MigrateAttributes
499 :    
500 : parrello 1.20 my $stats = MigrateAttributes($ca, $fig, $preserve);
501 : parrello 1.4
502 :     Migrate all the attributes data from the specified FIG instance. This is a long, slow
503 :     method used to convert the old attribute data to the new system. Only attribute
504 :     keys that are already in the database will be loaded, and they will completely
505 :     replace the existing values for those keys. Therefore, it is very important that the
506 :     FIG instance not be connected to the attribute database.
507 :    
508 :     =over 4
509 :    
510 :     =item ca
511 :    
512 :     B<CustomAttributes> object used to access the attribute database.
513 :    
514 :     =item fig
515 :    
516 :     A FIG object that can be used to retrieve attributes for migration purposes.
517 :    
518 : parrello 1.6 =item preserve (optional)
519 :    
520 :     A comma-delimited list of attributes that are not to be migrated.
521 :    
522 : parrello 1.4 =item RETURN
523 :    
524 :     Returns a statistical object for the load process.
525 :    
526 :     =back
527 :    
528 :     =cut
529 :    
530 :     sub MigrateAttributes {
531 :     # Get the parameters.
532 : parrello 1.6 my ($ca, $fig, $preserve) = @_;
533 : parrello 1.4 # Create the return value.
534 : parrello 1.5 my $retVal = Stats->new('keysIn');
535 :     # Create a loader for the value table.
536 :     my $hasValueFor = ERDBLoad->new($ca, 'HasValueFor', $FIG_Config::temp);
537 :     # Create a hash for the target objects.
538 :     my %targetObjectHash = ();
539 : parrello 1.6 # Get a list of the attributes we're to preserve.
540 :     my %preserve = ();
541 :     if (defined $preserve) {
542 :     %preserve = map { $_ => 1 } split /\s*,\s*/, $preserve;
543 :     }
544 :     # Put the preserved keys into the load file.
545 :     for my $key (keys %preserve) {
546 :     Trace("Preserving key $key.") if T(3);
547 :     my @newValues = $ca->GetAttributes(undef, $key);
548 :     Trace(scalar(@newValues) . " values of $key will be preserved.");
549 :     # Put the values into the load file.
550 :     PutValue($hasValueFor, $ca, @newValues);
551 :     }
552 : parrello 1.4 # Get a list of all our attribute keys.
553 : parrello 1.6 my @allKeys = $ca->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');
554 :     # Delete the preserved keys.
555 :     my @keys = grep { ! $preserve{$_} } @allKeys;
556 :     # Loop through the reset, building the load files.
557 : parrello 1.4 for my $key (@keys) {
558 :     Trace("Migrating key $key.") if T(3);
559 :     $retVal->Add(keysIn => 1);
560 :     # Get all the values of the specified key.
561 :     my @oldValues = $fig->get_attributes(undef, $key);
562 :     my $count = scalar(@oldValues);
563 :     Trace("$count values found for $key in source system.") if T(3);
564 : parrello 1.6 # Put the values into the load file.
565 :     PutValue($hasValueFor, $ca, @oldValues);
566 : parrello 1.4 }
567 : parrello 1.5 # Close and finish the loads to upload the data.
568 :     Trace("Closing value table.") if T(2);
569 :     my $hvfStats = $hasValueFor->FinishAndLoad();
570 :     Trace("Statistics from value table load:\n" . $hvfStats->Show()) if T(2);
571 :     # Merge the statistics.
572 :     $retVal->Accumulate($hvfStats);
573 : parrello 1.4 # Return the statistics object.
574 :     return $retVal;
575 :     }
576 :    
577 : parrello 1.6 =head3 PutValue
578 :    
579 : parrello 1.20 PutValue($hasValueFor, $ca, @values);
580 : parrello 1.6
581 :     Put the values from an attribute value list into a HasValueFor load file.
582 :    
583 :     =over 4
584 :    
585 :     =item hasValueFor
586 :    
587 :     Load object for the HasValueFor table.
588 :    
589 :     =item ca
590 :    
591 :     A CustomAttribute object. We get the splitter value from it.
592 :    
593 :     =item value
594 :    
595 :     A list of tuples, each consisting of an object ID, a key name, and one or more values.
596 :    
597 :     =back
598 :    
599 :     =cut
600 :    
601 :     sub PutValue {
602 :     # Get the parameters.
603 :     my ($hasValueFor, $ca, @values) = @_;
604 :     # Loop through the value rows.
605 :     for my $row (@values) {
606 :     # Get this row's data.
607 :     my ($id, $key, @values) = @{$row};
608 :     # Format the values.
609 :     my $valueString = join($ca->{splitter}, @values);
610 :     # Add the value.
611 :     $hasValueFor->Put($key, $id, $valueString);
612 :     }
613 :     }
614 :    
615 :     =head3 MigrateCollections
616 :    
617 : parrello 1.20 my $stats = MigrateCollections($ca, $fig);
618 : parrello 1.6
619 :     This method copies the collection data from the specified FIG object and stores it as values
620 :     of the C<collection> attribute in the specified custom attribute database.
621 :    
622 :     =over 4
623 :    
624 :     =item ca
625 :    
626 :     Custom attribute database into which the collections are to be stored.
627 :    
628 :     =item fig
629 :    
630 :     FIG object from which the collection attributes are to be harvested.
631 :    
632 :     =item RETURN
633 :    
634 :     Returns a statistics object with informatino about the migration.
635 :    
636 :     =back
637 :    
638 :     =cut
639 :    
640 :     sub MigrateCollections {
641 :     # Get the parameters.
642 :     my ($ca, $fig) = @_;
643 :     # Declare the return variable.
644 :     my $retVal = Stats->new();
645 :     # Get the collection names.
646 :     my @collections = qw(higher_plants eukaryotic_ps nonoxygenic_ps hundred_hundred functional_coupling_paper ecoli_essentiality_paper);
647 :     # Erase the current collection date.
648 :     $ca->EraseAttribute('collection');
649 :     # Loop through the collection attributes.
650 :     for my $cname (@collections) {
651 :     $retVal->Add(collection => 1);
652 :     # Get this attribute from the old system.
653 :     my @rows = $fig->get_attributes(undef, $cname);
654 :     # Loop through its values.
655 :     for my $row (@rows) {
656 :     $retVal->Add($cname => 1);
657 :     # Determine the object key.
658 :     my $objectID = ($row->[0] eq 'Subsystem' ? $row->[2] : $row->[0]);
659 :     $ca->AddAttribute($objectID, 'collection', $cname);
660 :     }
661 :     }
662 :     # Return the statistics.
663 :     return $retVal;
664 :     }
665 :    
666 : parrello 1.10 =head3 GetAllKeys
667 :    
668 : parrello 1.20 my @keys = GetAllKeys($ca);
669 : parrello 1.10
670 :     Return a sorted list of the attribute keys.
671 :    
672 :     =over 4
673 :    
674 :     =item ca
675 :    
676 :     CustomAttributes object used to access the database.
677 :    
678 :     =item RETURN
679 :    
680 :     Returns a sorted list of all the attribute keys.
681 :    
682 :     =back
683 :    
684 :     =cut
685 :    
686 :     sub GetAllKeys {
687 :     # Get the parameters.
688 :     my ($ca) = @_;
689 :     # Get the attribute data.
690 :     my %keyData = $ca->GetAttributeData('name', '');
691 :     # Sort the keys.
692 :     my @retVal = sort keys %keyData;
693 :     # Return the result.
694 :     return @retVal;
695 :     }
696 :    
697 :     =head3 OpenPage
698 :    
699 : parrello 1.20 my $fh = OpenPage($fileName, $title);
700 : parrello 1.10
701 :     Start writing an HTML page to a file and return the file handle.
702 :    
703 :     =over 4
704 :    
705 :     =item fileName
706 :    
707 :     Name of the file to which the page will be written.
708 :    
709 :     =item title
710 :    
711 :     Title for the page.
712 :    
713 :     =item RETURN
714 :    
715 :     Returns the file handle for writing the rest of the page.
716 :    
717 :     =back
718 :    
719 :     =cut
720 :    
721 :     sub OpenPage {
722 :     # Get the parameters.
723 :     my ($fileName, $title) = @_;
724 :     # Open the file.
725 :     my $retVal = Open(undef, ">$fileName");
726 :     # Write out the HTML headers.
727 :     print $retVal $cgi->start_html(-title => $title, -BGCOLOR => 'silver');
728 :     print $retVal "\n";
729 :     # Return the file handle.
730 :     return $retVal;
731 :     }
732 :    
733 :     =head3 GetNewKeyCounts
734 :    
735 : parrello 1.20 my %counts = GetNewKeyCounts($ca);
736 : parrello 1.10
737 :     Return a hash mapping attribute key names to counts.
738 :    
739 :     =over 4
740 :    
741 :     =item ca
742 :    
743 :     CustomAttributes object for accessing the attribute database.
744 :    
745 :     =item RETURN
746 :    
747 :     Returns a reference to a hash mapping each key name to a count of the key's values.
748 :    
749 :     =back
750 :    
751 :     =cut
752 :    
753 :     sub GetNewKeyCounts {
754 :     # Get the parameters.
755 :     my ($ca) = @_;
756 :     # Declare the return variable.
757 :     my $retVal = {};
758 :     # Get all of the keys.
759 :     my @keys = GetAllKeys($ca);
760 :     # Loop through the list, filling the hash.
761 :     for my $key (@keys) {
762 :     my $count = $ca->GetCount(['HasValueFor'], "HasValueFor(from-link) = ?", [$key]);
763 :     $retVal->{$key} = $count;
764 :     }
765 :     # Return the result.
766 :     return $retVal;
767 :     }
768 :    
769 :     =head3 WritePage
770 :    
771 : parrello 1.20 WritePage($fileName, $title, \@lines);
772 : parrello 1.10
773 :     Write the specified lines to the specified file as a web page. The lines are assumed to be raw
774 :     HTML body text. They will be preceded by a standard HTML header and followed by a standard
775 :     HTML footer.
776 :    
777 :     =over 4
778 :    
779 :     =item fileName
780 :    
781 :     Name of the output file.
782 :    
783 :     =item title
784 :    
785 :     Title for the web page.
786 :    
787 :     =item lines
788 :    
789 :     Reference to a list of lines of HTML.
790 :    
791 :     =back
792 :    
793 :     =cut
794 :    
795 :     sub WritePage {
796 :     # Get the parameters.
797 :     my ($fileName, $title, $lines) = @_;
798 :     # Open the file and write the header to it. The header includes everything up to and including
799 :     # the BODY tag.
800 :     Trace("Writing web page to $fileName.") if T(2);
801 :     my $oh = OpenPage($fileName, $title);
802 :     # Write the lines one at a time.
803 :     for my $line (@{$lines}) {
804 :     print $oh "$line\n";
805 :     }
806 :     # Write the HTML footer.
807 :     print $oh $cgi->end_html();
808 :     # Close the output file.
809 :     close $oh;
810 :     Trace("Web page created in $fileName.") if T(2);
811 :     }
812 :    
813 : parrello 1.20 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3