[Bio] / Sprout / AttrDBRefresh.pl Repository:
ViewVC logotype

Annotation of /Sprout/AttrDBRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 AttrDBRefresh
4 :    
5 : parrello 1.5 This script performs useful function on the custom attributes database.
6 : parrello 1.1
7 :     The currently-supported command-line options are as follows.
8 :    
9 :     =over 4
10 :    
11 :     =item user
12 :    
13 :     Name suffix to be used for log files. If omitted, the PID is used.
14 :    
15 :     =item trace
16 :    
17 :     Numeric trace level. A higher trace level causes more messages to appear. The
18 : parrello 1.2 default trace level is C<3>.
19 : parrello 1.1
20 :     =item sql
21 :    
22 :     If specified, turns on tracing of SQL activity.
23 :    
24 :     =item background
25 :    
26 :     Save the standard and error output to files. The files will be created
27 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
28 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
29 :     B<user> option above.
30 :    
31 :     =item h
32 :    
33 :     Display this command's parameters and options.
34 :    
35 :     =item phone
36 :    
37 :     Phone number to message when the script is complete.
38 :    
39 : parrello 1.2 =item migrate
40 :    
41 : parrello 1.6 If specified, the attributes will be migrated from the old FIG system to the
42 :     new attribute system.
43 :    
44 :     =item preserve
45 :    
46 :     If specified, a comma-delimited list of attributes that should not be migrated.
47 :     The values of these attributes in the new system will be preserved.
48 : parrello 1.2
49 : parrello 1.4 =item initializeAndClear
50 : parrello 1.2
51 : parrello 1.4 If specified, then the tables in the attribute database are dropped and re-created.
52 : parrello 1.2
53 : parrello 1.5 =item load
54 :    
55 :     If specified, the name of a file containing attribute data to be loaded into the
56 :     system. The file is presumed to be tab-delimited. The first column must be the
57 :     object ID, the second the attribute key name, and the remaining columns the
58 :     attribute values. All attribute keys mentioned in the file will be erased before
59 :     loading.
60 :    
61 : parrello 1.7 =item loadKey
62 :    
63 :     If specified, the name of a tab-delimited file containing attribute key data. For each key,
64 :     there is a pair of lines. The first line contains the ID, data type, and description
65 :     of the key. The second line contains the marker C<#GROUPS> followed by zero or more
66 :     group names. The attribute will be connected to all the specified groups.
67 :    
68 : parrello 1.5 =item backup
69 :    
70 :     If specified, the name of a file into which all the attribute data should be
71 : parrello 1.7 dumped. The file itself will receive the attribute data in the format expected
72 :     by C<load>. A second file, consisting of the same file name with the suffix C<.key>,
73 :     will contain the attribute key data in the format expected by C<loadKey>.
74 : parrello 1.5
75 : parrello 1.6 =item hh_copy
76 :    
77 :     If specified, the attributes used by the collections system will be converted from the
78 :     old system to the new one. In the old system, these attributes are assigned either to
79 :     a genome or to the word "Subsystem". If they are attached to the word "Subsystem", then
80 :     the value is the subsystem name; otherwise, the value is ignored. In the new attribute
81 :     system, the attribute key is always "collection" and the value is the collection name.
82 :     Rather than attaching attributes to the word "Subsystem", we attach them to the
83 :     subsystem itself.
84 :    
85 : parrello 1.1 =back
86 :    
87 :     =cut
88 :    
89 :     use strict;
90 :     use Tracer;
91 :     use DocUtils;
92 :     use TestUtils;
93 :     use Cwd;
94 :     use File::Copy;
95 :     use File::Path;
96 :     use CustomAttributes;
97 : parrello 1.5 use ERDBLoad;
98 : parrello 1.2 use FIG;
99 : parrello 1.1
100 :     # Get the command-line options and parameters.
101 : parrello 1.5 my ($options, @parameters) = StandardSetup([qw(CustomAttributes FIG) ],
102 : parrello 1.1 {
103 : parrello 1.2 trace => [3, "trace level"],
104 : parrello 1.4 initializeAndClear => ["", "if specified, the tables of the attribute database will be re-created"],
105 : parrello 1.2 migrate => ["", "if specified, attribute data will be migrated along with the object IDs"],
106 : parrello 1.6 preserve => ["", "comma-delimited list of attributes to be preserved during migration"],
107 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
108 : parrello 1.5 load => ["", "file from which to load attribute data"],
109 : parrello 1.7 loadKey => ["", "file from which to load attribute key data"],
110 : parrello 1.6 backup => ["", "file to which attribute data should be dumped"],
111 :     hh_copy => ["", "if specified, the attributes used by the collections system will be converted"]
112 : parrello 1.1 },
113 :     "",
114 :     @ARGV);
115 :     # Set a variable to contain return type information.
116 :     my $rtype;
117 :     # Insure we catch errors.
118 :     eval {
119 : parrello 1.3 # Insure we don't use the new attribute system for accessing the old attributes.
120 :     $FIG_Config::attrOld = 1;
121 : parrello 1.2 # Get the FIG object.
122 :     my $fig = FIG->new();
123 : parrello 1.4 # Get the attribute database.
124 :     Trace("Connecting to attribute database.") if T(2);
125 :     my $ca = CustomAttributes->new();
126 :     # Process according to the options selected.
127 : parrello 1.7 if ($options->{backup}) {
128 :     # Back up the attributes to the specified file.
129 :     my $backupFileName = $options->{backup};
130 :     Trace("Backing up attribute data.") if T(2);
131 :     my $stats = $ca->BackupAllAttributes($backupFileName);
132 :     Trace("Attribute backup statistics:\n" . $stats->Show()) if T(2);
133 :     Trace("Backing up key data.") if T(2);
134 :     $stats = $ca->BackupKeys("$backupFileName.key");
135 :     Trace("Key backup statistics:\n" . $stats->Show()) if T(2);
136 : parrello 1.5 }
137 : parrello 1.4 if ($options->{initializeAndClear}) {
138 :     # Create the tables.
139 :     $ca->CreateTables();
140 :     Trace("Tables recreated.") if T(2);
141 : parrello 1.2 }
142 :     if ($options->{migrate}) {
143 : parrello 1.4 # Migrate the data.
144 : parrello 1.2 Trace("Migrating attribute data.") if T(2);
145 : parrello 1.6 my $stats = MigrateAttributes($ca, $fig, $options->{preserve});
146 : parrello 1.5 Trace("Migration statistics:\n" . $stats->Show()) if T(2);
147 :     }
148 : parrello 1.6 if ($options->{hh_copy}) {
149 :     # Migrate the collections keys to the new system.
150 :     Trace("Migrating collections data.") if T(2);
151 :     my $stats = MigrateCollections($ca, $fig);
152 :     Trace("Collection statistics:\n" . $stats->Show()) if T(2);
153 :     }
154 : parrello 1.7 if ($options->{loadKey}) {
155 :     # We want to load the attribute data from the specified file, but
156 :     # first we need to verify that the file exists.
157 :     my $loadFileName = $options->{loadKey};
158 :     if (! -f $loadFileName) {
159 :     Confess("Cannot load keys: file \"$loadFileName\" is not found or not a file.");
160 :     } else {
161 :     Trace("Loading key data from $loadFileName.") if T(2);
162 :     my $stats = $ca->RestoreKeys($loadFileName);
163 :     Trace("Load statistics:\n" . $stats->Show()) if T(2);
164 :     }
165 :     }
166 : parrello 1.5 if ($options->{load}) {
167 :     # We want to load the attribute data from the specified file, but
168 :     # first we need to verify that the file exists.
169 :     my $loadFileName = $options->{load};
170 :     if (! -f $loadFileName) {
171 :     Confess("Cannot load: file \"$loadFileName\" is not found or not a file.");
172 :     } else {
173 :     Trace("Loading attribute data from $loadFileName.") if T(2);
174 :     my $stats = $ca->LoadAttributesFrom($loadFileName);
175 :     Trace("Load statistics:\n" . $stats->Show()) if T(2);
176 :     }
177 :     }
178 : parrello 1.2 Trace("Processing complete.") if T(2);
179 : parrello 1.1 };
180 :     if ($@) {
181 :     Trace("Script failed with error: $@") if T(0);
182 :     $rtype = "error";
183 :     } else {
184 :     Trace("Script complete.") if T(2);
185 :     $rtype = "no error";
186 :     }
187 :     if ($options->{phone}) {
188 :     my $msgID = Tracer::SendSMS($options->{phone}, "RefreshAttrDB terminated with $rtype.");
189 :     if ($msgID) {
190 :     Trace("Phone message sent with ID $msgID.") if T(2);
191 :     } else {
192 :     Trace("Phone message not sent.") if T(2);
193 :     }
194 :     }
195 :    
196 : parrello 1.4 =head3 MigrateAttributes
197 :    
198 : parrello 1.6 C<< my $stats = MigrateAttributes($ca, $fig, $preserve); >>
199 : parrello 1.4
200 :     Migrate all the attributes data from the specified FIG instance. This is a long, slow
201 :     method used to convert the old attribute data to the new system. Only attribute
202 :     keys that are already in the database will be loaded, and they will completely
203 :     replace the existing values for those keys. Therefore, it is very important that the
204 :     FIG instance not be connected to the attribute database.
205 :    
206 :     =over 4
207 :    
208 :     =item ca
209 :    
210 :     B<CustomAttributes> object used to access the attribute database.
211 :    
212 :     =item fig
213 :    
214 :     A FIG object that can be used to retrieve attributes for migration purposes.
215 :    
216 : parrello 1.6 =item preserve (optional)
217 :    
218 :     A comma-delimited list of attributes that are not to be migrated.
219 :    
220 : parrello 1.4 =item RETURN
221 :    
222 :     Returns a statistical object for the load process.
223 :    
224 :     =back
225 :    
226 :     =cut
227 :    
228 :     sub MigrateAttributes {
229 :     # Get the parameters.
230 : parrello 1.6 my ($ca, $fig, $preserve) = @_;
231 : parrello 1.4 # Create the return value.
232 : parrello 1.5 my $retVal = Stats->new('keysIn');
233 :     # Create a loader for the value table.
234 :     my $hasValueFor = ERDBLoad->new($ca, 'HasValueFor', $FIG_Config::temp);
235 :     # Create a hash for the target objects.
236 :     my %targetObjectHash = ();
237 : parrello 1.6 # Get a list of the attributes we're to preserve.
238 :     my %preserve = ();
239 :     if (defined $preserve) {
240 :     %preserve = map { $_ => 1 } split /\s*,\s*/, $preserve;
241 :     }
242 :     # Put the preserved keys into the load file.
243 :     for my $key (keys %preserve) {
244 :     Trace("Preserving key $key.") if T(3);
245 :     my @newValues = $ca->GetAttributes(undef, $key);
246 :     Trace(scalar(@newValues) . " values of $key will be preserved.");
247 :     # Put the values into the load file.
248 :     PutValue($hasValueFor, $ca, @newValues);
249 :     }
250 : parrello 1.4 # Get a list of all our attribute keys.
251 : parrello 1.6 my @allKeys = $ca->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');
252 :     # Delete the preserved keys.
253 :     my @keys = grep { ! $preserve{$_} } @allKeys;
254 :     # Loop through the reset, building the load files.
255 : parrello 1.4 for my $key (@keys) {
256 :     Trace("Migrating key $key.") if T(3);
257 :     $retVal->Add(keysIn => 1);
258 :     # Get all the values of the specified key.
259 :     my @oldValues = $fig->get_attributes(undef, $key);
260 :     my $count = scalar(@oldValues);
261 :     Trace("$count values found for $key in source system.") if T(3);
262 : parrello 1.6 # Put the values into the load file.
263 :     PutValue($hasValueFor, $ca, @oldValues);
264 : parrello 1.4 }
265 : parrello 1.5 # Close and finish the loads to upload the data.
266 :     Trace("Closing value table.") if T(2);
267 :     my $hvfStats = $hasValueFor->FinishAndLoad();
268 :     Trace("Statistics from value table load:\n" . $hvfStats->Show()) if T(2);
269 :     # Merge the statistics.
270 :     $retVal->Accumulate($hvfStats);
271 : parrello 1.4 # Return the statistics object.
272 :     return $retVal;
273 :     }
274 :    
275 : parrello 1.6 =head3 PutValue
276 :    
277 :     C<< PutValue($hasValueFor, $ca, @values); >>
278 :    
279 :     Put the values from an attribute value list into a HasValueFor load file.
280 :    
281 :     =over 4
282 :    
283 :     =item hasValueFor
284 :    
285 :     Load object for the HasValueFor table.
286 :    
287 :     =item ca
288 :    
289 :     A CustomAttribute object. We get the splitter value from it.
290 :    
291 :     =item value
292 :    
293 :     A list of tuples, each consisting of an object ID, a key name, and one or more values.
294 :    
295 :     =back
296 :    
297 :     =cut
298 :    
299 :     sub PutValue {
300 :     # Get the parameters.
301 :     my ($hasValueFor, $ca, @values) = @_;
302 :     # Loop through the value rows.
303 :     for my $row (@values) {
304 :     # Get this row's data.
305 :     my ($id, $key, @values) = @{$row};
306 :     # Format the values.
307 :     my $valueString = join($ca->{splitter}, @values);
308 :     # Add the value.
309 :     $hasValueFor->Put($key, $id, $valueString);
310 :     }
311 :     }
312 :    
313 :     =head3 MigrateCollections
314 :    
315 :     C<< my $stats = MigrateCollections($ca, $fig); >>
316 :    
317 :     This method copies the collection data from the specified FIG object and stores it as values
318 :     of the C<collection> attribute in the specified custom attribute database.
319 :    
320 :     =over 4
321 :    
322 :     =item ca
323 :    
324 :     Custom attribute database into which the collections are to be stored.
325 :    
326 :     =item fig
327 :    
328 :     FIG object from which the collection attributes are to be harvested.
329 :    
330 :     =item RETURN
331 :    
332 :     Returns a statistics object with informatino about the migration.
333 :    
334 :     =back
335 :    
336 :     =cut
337 :    
338 :     sub MigrateCollections {
339 :     # Get the parameters.
340 :     my ($ca, $fig) = @_;
341 :     # Declare the return variable.
342 :     my $retVal = Stats->new();
343 :     # Get the collection names.
344 :     my @collections = qw(higher_plants eukaryotic_ps nonoxygenic_ps hundred_hundred functional_coupling_paper ecoli_essentiality_paper);
345 :     # Erase the current collection date.
346 :     $ca->EraseAttribute('collection');
347 :     # Loop through the collection attributes.
348 :     for my $cname (@collections) {
349 :     $retVal->Add(collection => 1);
350 :     # Get this attribute from the old system.
351 :     my @rows = $fig->get_attributes(undef, $cname);
352 :     # Loop through its values.
353 :     for my $row (@rows) {
354 :     $retVal->Add($cname => 1);
355 :     # Determine the object key.
356 :     my $objectID = ($row->[0] eq 'Subsystem' ? $row->[2] : $row->[0]);
357 :     $ca->AddAttribute($objectID, 'collection', $cname);
358 :     }
359 :     }
360 :     # Return the statistics.
361 :     return $retVal;
362 :     }
363 :    
364 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3