[Bio] / Sprout / AttrDBRefresh.pl Repository:
ViewVC logotype

Annotation of /Sprout/AttrDBRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 AttrDBRefresh
4 :    
5 : parrello 1.5 This script performs useful function on the custom attributes database.
6 : parrello 1.1
7 :     The currently-supported command-line options are as follows.
8 :    
9 :     =over 4
10 :    
11 :     =item user
12 :    
13 :     Name suffix to be used for log files. If omitted, the PID is used.
14 :    
15 :     =item trace
16 :    
17 :     Numeric trace level. A higher trace level causes more messages to appear. The
18 : parrello 1.2 default trace level is C<3>.
19 : parrello 1.1
20 :     =item sql
21 :    
22 :     If specified, turns on tracing of SQL activity.
23 :    
24 :     =item background
25 :    
26 :     Save the standard and error output to files. The files will be created
27 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
28 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
29 :     B<user> option above.
30 :    
31 :     =item h
32 :    
33 :     Display this command's parameters and options.
34 :    
35 :     =item phone
36 :    
37 :     Phone number to message when the script is complete.
38 :    
39 : parrello 1.2 =item migrate
40 :    
41 : parrello 1.6 If specified, the attributes will be migrated from the old FIG system to the
42 :     new attribute system.
43 :    
44 :     =item preserve
45 :    
46 :     If specified, a comma-delimited list of attributes that should not be migrated.
47 :     The values of these attributes in the new system will be preserved.
48 : parrello 1.2
49 : parrello 1.4 =item initializeAndClear
50 : parrello 1.2
51 : parrello 1.4 If specified, then the tables in the attribute database are dropped and re-created.
52 : parrello 1.2
53 : parrello 1.5 =item load
54 :    
55 :     If specified, the name of a file containing attribute data to be loaded into the
56 :     system. The file is presumed to be tab-delimited. The first column must be the
57 :     object ID, the second the attribute key name, and the remaining columns the
58 :     attribute values. All attribute keys mentioned in the file will be erased before
59 :     loading.
60 :    
61 : parrello 1.7 =item loadKey
62 :    
63 :     If specified, the name of a tab-delimited file containing attribute key data. For each key,
64 :     there is a pair of lines. The first line contains the ID, data type, and description
65 :     of the key. The second line contains the marker C<#GROUPS> followed by zero or more
66 :     group names. The attribute will be connected to all the specified groups.
67 :    
68 : parrello 1.5 =item backup
69 :    
70 :     If specified, the name of a file into which all the attribute data should be
71 : parrello 1.7 dumped. The file itself will receive the attribute data in the format expected
72 :     by C<load>. A second file, consisting of the same file name with the suffix C<.key>,
73 :     will contain the attribute key data in the format expected by C<loadKey>.
74 : parrello 1.5
75 : parrello 1.6 =item hh_copy
76 :    
77 :     If specified, the attributes used by the collections system will be converted from the
78 :     old system to the new one. In the old system, these attributes are assigned either to
79 :     a genome or to the word "Subsystem". If they are attached to the word "Subsystem", then
80 :     the value is the subsystem name; otherwise, the value is ignored. In the new attribute
81 :     system, the attribute key is always "collection" and the value is the collection name.
82 :     Rather than attaching attributes to the word "Subsystem", we attach them to the
83 :     subsystem itself.
84 :    
85 : parrello 1.8 =item re_index
86 :    
87 :     If specified, the text search index will be rebuilt.
88 :    
89 : parrello 1.1 =back
90 :    
91 :     =cut
92 :    
93 :     use strict;
94 :     use Tracer;
95 :     use DocUtils;
96 :     use TestUtils;
97 :     use Cwd;
98 :     use File::Copy;
99 :     use File::Path;
100 :     use CustomAttributes;
101 : parrello 1.5 use ERDBLoad;
102 : parrello 1.2 use FIG;
103 : parrello 1.1
104 :     # Get the command-line options and parameters.
105 : parrello 1.5 my ($options, @parameters) = StandardSetup([qw(CustomAttributes FIG) ],
106 : parrello 1.1 {
107 : parrello 1.2 trace => [3, "trace level"],
108 : parrello 1.4 initializeAndClear => ["", "if specified, the tables of the attribute database will be re-created"],
109 : parrello 1.2 migrate => ["", "if specified, attribute data will be migrated along with the object IDs"],
110 : parrello 1.6 preserve => ["", "comma-delimited list of attributes to be preserved during migration"],
111 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
112 : parrello 1.5 load => ["", "file from which to load attribute data"],
113 : parrello 1.7 loadKey => ["", "file from which to load attribute key data"],
114 : parrello 1.6 backup => ["", "file to which attribute data should be dumped"],
115 : parrello 1.8 hh_copy => ["", "if specified, the attributes used by the collections system will be converted"],
116 :     re_index => ["", "if specified, the text search index will be rebuilt"]
117 : parrello 1.1 },
118 :     "",
119 :     @ARGV);
120 :     # Set a variable to contain return type information.
121 :     my $rtype;
122 :     # Insure we catch errors.
123 :     eval {
124 : parrello 1.3 # Insure we don't use the new attribute system for accessing the old attributes.
125 :     $FIG_Config::attrOld = 1;
126 : parrello 1.2 # Get the FIG object.
127 :     my $fig = FIG->new();
128 : parrello 1.4 # Get the attribute database.
129 :     Trace("Connecting to attribute database.") if T(2);
130 :     my $ca = CustomAttributes->new();
131 :     # Process according to the options selected.
132 : parrello 1.7 if ($options->{backup}) {
133 :     # Back up the attributes to the specified file.
134 :     my $backupFileName = $options->{backup};
135 :     Trace("Backing up attribute data.") if T(2);
136 :     my $stats = $ca->BackupAllAttributes($backupFileName);
137 :     Trace("Attribute backup statistics:\n" . $stats->Show()) if T(2);
138 :     Trace("Backing up key data.") if T(2);
139 :     $stats = $ca->BackupKeys("$backupFileName.key");
140 :     Trace("Key backup statistics:\n" . $stats->Show()) if T(2);
141 : parrello 1.5 }
142 : parrello 1.4 if ($options->{initializeAndClear}) {
143 :     # Create the tables.
144 :     $ca->CreateTables();
145 :     Trace("Tables recreated.") if T(2);
146 : parrello 1.2 }
147 :     if ($options->{migrate}) {
148 : parrello 1.4 # Migrate the data.
149 : parrello 1.2 Trace("Migrating attribute data.") if T(2);
150 : parrello 1.6 my $stats = MigrateAttributes($ca, $fig, $options->{preserve});
151 : parrello 1.5 Trace("Migration statistics:\n" . $stats->Show()) if T(2);
152 :     }
153 : parrello 1.6 if ($options->{hh_copy}) {
154 :     # Migrate the collections keys to the new system.
155 :     Trace("Migrating collections data.") if T(2);
156 :     my $stats = MigrateCollections($ca, $fig);
157 :     Trace("Collection statistics:\n" . $stats->Show()) if T(2);
158 :     }
159 : parrello 1.7 if ($options->{loadKey}) {
160 :     # We want to load the attribute data from the specified file, but
161 :     # first we need to verify that the file exists.
162 :     my $loadFileName = $options->{loadKey};
163 :     if (! -f $loadFileName) {
164 :     Confess("Cannot load keys: file \"$loadFileName\" is not found or not a file.");
165 :     } else {
166 :     Trace("Loading key data from $loadFileName.") if T(2);
167 :     my $stats = $ca->RestoreKeys($loadFileName);
168 :     Trace("Load statistics:\n" . $stats->Show()) if T(2);
169 :     }
170 :     }
171 : parrello 1.5 if ($options->{load}) {
172 :     # We want to load the attribute data from the specified file, but
173 :     # first we need to verify that the file exists.
174 :     my $loadFileName = $options->{load};
175 :     if (! -f $loadFileName) {
176 :     Confess("Cannot load: file \"$loadFileName\" is not found or not a file.");
177 :     } else {
178 :     Trace("Loading attribute data from $loadFileName.") if T(2);
179 :     my $stats = $ca->LoadAttributesFrom($loadFileName);
180 :     Trace("Load statistics:\n" . $stats->Show()) if T(2);
181 :     }
182 :     }
183 : parrello 1.8 if ($options->{re_index}) {
184 :     # We want to rebuild the text search index.
185 :     $ca->CreateSearchIndex('HasValueFor');
186 :     Trace("Search index created on \"HasValueFor\" table.");
187 :     }
188 : parrello 1.2 Trace("Processing complete.") if T(2);
189 : parrello 1.1 };
190 :     if ($@) {
191 :     Trace("Script failed with error: $@") if T(0);
192 :     $rtype = "error";
193 :     } else {
194 :     Trace("Script complete.") if T(2);
195 :     $rtype = "no error";
196 :     }
197 :     if ($options->{phone}) {
198 :     my $msgID = Tracer::SendSMS($options->{phone}, "RefreshAttrDB terminated with $rtype.");
199 :     if ($msgID) {
200 :     Trace("Phone message sent with ID $msgID.") if T(2);
201 :     } else {
202 :     Trace("Phone message not sent.") if T(2);
203 :     }
204 :     }
205 :    
206 : parrello 1.4 =head3 MigrateAttributes
207 :    
208 : parrello 1.6 C<< my $stats = MigrateAttributes($ca, $fig, $preserve); >>
209 : parrello 1.4
210 :     Migrate all the attributes data from the specified FIG instance. This is a long, slow
211 :     method used to convert the old attribute data to the new system. Only attribute
212 :     keys that are already in the database will be loaded, and they will completely
213 :     replace the existing values for those keys. Therefore, it is very important that the
214 :     FIG instance not be connected to the attribute database.
215 :    
216 :     =over 4
217 :    
218 :     =item ca
219 :    
220 :     B<CustomAttributes> object used to access the attribute database.
221 :    
222 :     =item fig
223 :    
224 :     A FIG object that can be used to retrieve attributes for migration purposes.
225 :    
226 : parrello 1.6 =item preserve (optional)
227 :    
228 :     A comma-delimited list of attributes that are not to be migrated.
229 :    
230 : parrello 1.4 =item RETURN
231 :    
232 :     Returns a statistical object for the load process.
233 :    
234 :     =back
235 :    
236 :     =cut
237 :    
238 :     sub MigrateAttributes {
239 :     # Get the parameters.
240 : parrello 1.6 my ($ca, $fig, $preserve) = @_;
241 : parrello 1.4 # Create the return value.
242 : parrello 1.5 my $retVal = Stats->new('keysIn');
243 :     # Create a loader for the value table.
244 :     my $hasValueFor = ERDBLoad->new($ca, 'HasValueFor', $FIG_Config::temp);
245 :     # Create a hash for the target objects.
246 :     my %targetObjectHash = ();
247 : parrello 1.6 # Get a list of the attributes we're to preserve.
248 :     my %preserve = ();
249 :     if (defined $preserve) {
250 :     %preserve = map { $_ => 1 } split /\s*,\s*/, $preserve;
251 :     }
252 :     # Put the preserved keys into the load file.
253 :     for my $key (keys %preserve) {
254 :     Trace("Preserving key $key.") if T(3);
255 :     my @newValues = $ca->GetAttributes(undef, $key);
256 :     Trace(scalar(@newValues) . " values of $key will be preserved.");
257 :     # Put the values into the load file.
258 :     PutValue($hasValueFor, $ca, @newValues);
259 :     }
260 : parrello 1.4 # Get a list of all our attribute keys.
261 : parrello 1.6 my @allKeys = $ca->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');
262 :     # Delete the preserved keys.
263 :     my @keys = grep { ! $preserve{$_} } @allKeys;
264 :     # Loop through the reset, building the load files.
265 : parrello 1.4 for my $key (@keys) {
266 :     Trace("Migrating key $key.") if T(3);
267 :     $retVal->Add(keysIn => 1);
268 :     # Get all the values of the specified key.
269 :     my @oldValues = $fig->get_attributes(undef, $key);
270 :     my $count = scalar(@oldValues);
271 :     Trace("$count values found for $key in source system.") if T(3);
272 : parrello 1.6 # Put the values into the load file.
273 :     PutValue($hasValueFor, $ca, @oldValues);
274 : parrello 1.4 }
275 : parrello 1.5 # Close and finish the loads to upload the data.
276 :     Trace("Closing value table.") if T(2);
277 :     my $hvfStats = $hasValueFor->FinishAndLoad();
278 :     Trace("Statistics from value table load:\n" . $hvfStats->Show()) if T(2);
279 :     # Merge the statistics.
280 :     $retVal->Accumulate($hvfStats);
281 : parrello 1.4 # Return the statistics object.
282 :     return $retVal;
283 :     }
284 :    
285 : parrello 1.6 =head3 PutValue
286 :    
287 :     C<< PutValue($hasValueFor, $ca, @values); >>
288 :    
289 :     Put the values from an attribute value list into a HasValueFor load file.
290 :    
291 :     =over 4
292 :    
293 :     =item hasValueFor
294 :    
295 :     Load object for the HasValueFor table.
296 :    
297 :     =item ca
298 :    
299 :     A CustomAttribute object. We get the splitter value from it.
300 :    
301 :     =item value
302 :    
303 :     A list of tuples, each consisting of an object ID, a key name, and one or more values.
304 :    
305 :     =back
306 :    
307 :     =cut
308 :    
309 :     sub PutValue {
310 :     # Get the parameters.
311 :     my ($hasValueFor, $ca, @values) = @_;
312 :     # Loop through the value rows.
313 :     for my $row (@values) {
314 :     # Get this row's data.
315 :     my ($id, $key, @values) = @{$row};
316 :     # Format the values.
317 :     my $valueString = join($ca->{splitter}, @values);
318 :     # Add the value.
319 :     $hasValueFor->Put($key, $id, $valueString);
320 :     }
321 :     }
322 :    
323 :     =head3 MigrateCollections
324 :    
325 :     C<< my $stats = MigrateCollections($ca, $fig); >>
326 :    
327 :     This method copies the collection data from the specified FIG object and stores it as values
328 :     of the C<collection> attribute in the specified custom attribute database.
329 :    
330 :     =over 4
331 :    
332 :     =item ca
333 :    
334 :     Custom attribute database into which the collections are to be stored.
335 :    
336 :     =item fig
337 :    
338 :     FIG object from which the collection attributes are to be harvested.
339 :    
340 :     =item RETURN
341 :    
342 :     Returns a statistics object with informatino about the migration.
343 :    
344 :     =back
345 :    
346 :     =cut
347 :    
348 :     sub MigrateCollections {
349 :     # Get the parameters.
350 :     my ($ca, $fig) = @_;
351 :     # Declare the return variable.
352 :     my $retVal = Stats->new();
353 :     # Get the collection names.
354 :     my @collections = qw(higher_plants eukaryotic_ps nonoxygenic_ps hundred_hundred functional_coupling_paper ecoli_essentiality_paper);
355 :     # Erase the current collection date.
356 :     $ca->EraseAttribute('collection');
357 :     # Loop through the collection attributes.
358 :     for my $cname (@collections) {
359 :     $retVal->Add(collection => 1);
360 :     # Get this attribute from the old system.
361 :     my @rows = $fig->get_attributes(undef, $cname);
362 :     # Loop through its values.
363 :     for my $row (@rows) {
364 :     $retVal->Add($cname => 1);
365 :     # Determine the object key.
366 :     my $objectID = ($row->[0] eq 'Subsystem' ? $row->[2] : $row->[0]);
367 :     $ca->AddAttribute($objectID, 'collection', $cname);
368 :     }
369 :     }
370 :     # Return the statistics.
371 :     return $retVal;
372 :     }
373 :    
374 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3