[Bio] / Sprout / ERDBLoadGroup.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBLoadGroup.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package ERDBLoadGroup;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use Stats;
26 :     use Time::HiRes qw(time);
27 :     use ERDBGenerate;
28 :    
29 :     =head1 ERDB Database Load Group Object
30 :    
31 :     The process of loading an ERDB database can be a simple matter of creating some
32 :     sequential files from other sequential files, or it can be a complex web of
33 :     connected sub-processes involving multiple groups of tables being loaded in
34 :     parallel by multiple worker processes. The ERDB Database Load Group object
35 :     provides housekeeping functions to simplify the management of the more complex
36 :     load tasks.
37 :    
38 :     When discussing an ERDB database load, there are two similar concepts we use to
39 :     break the load into pieces: I<sections> and I<groups>. A I<section> is a
40 :     partition of the data that can be processed in isolation from other sections. A
41 :     I<group> is a set of tables that should be loaded at the same time. An ERDB load
42 :     group is a request to generate load files for one or more sections of the data
43 :     targeting a single group of tables.
44 :    
45 :     A certain amount of bookkeeping is required in order to handle parallelism. For
46 :     each table, a separate output file is generated for each section. If a section
47 :     does not complete successfully, then its load file is deleted and the section
48 :     must be loaded again. Because each section has its own load file, only the
49 :     particular sections that fail need to be reloaded.
50 :    
51 :     Individual load groups should subclass this object, providing a virtual override
52 :     for the L</Generate> method.
53 :    
54 :     The subclass name should consist of the group name followed by noise in capital
55 :     case. So, for example, the subclass name for a group named C<Feature> would be
56 :     C<FeatureSproutLoader> or C<FeatureAttributeLoader> or something similar. The
57 :     group name should only be letters, and only the first letter should be capitalized.
58 :     This allows the load script to be case-insensitive with regard to incoming group
59 :     names.
60 :    
61 :     Any working or status files generated by a subclass should have a prefix of C<dt>-something.
62 :     This will insure they are deleted by the C<clear> option of [[ERDBGeneratorPl]].
63 :    
64 :     The fields in this object are as follows.
65 :    
66 :     =over 4
67 :    
68 :     =item db
69 :    
70 :     [[ErdbPm]] object for accessing the target database
71 :    
72 :     =item directory
73 :    
74 :     Directory into which the load files should be placed.
75 :    
76 :     =item group
77 :    
78 :     name of this load group
79 :    
80 : parrello 1.5 =item label
81 :    
82 :     name of this worker process
83 :    
84 : parrello 1.1 =item lastKey
85 :    
86 :     ID of the last major object processed
87 :    
88 :     =item loaders
89 :    
90 :     hash mapping the names of the group's tables to [[ERDBGeneratePm]] objects
91 :    
92 :     =item stats
93 :    
94 :     statistics object that can be used to track the progress of the load
95 :    
96 :     =item section
97 :    
98 :     name of this data section
99 :    
100 :     =item source
101 :    
102 :     object used to access the data from which the load files are to be generated
103 :    
104 :     =item tables
105 :    
106 :     reference to a list of the names of the tables in this group
107 :    
108 :     =item options
109 :    
110 :     hash containing the options originally passed in to the constructor
111 :    
112 :     =back
113 :    
114 :     =cut
115 :    
116 :     =head3 new
117 :    
118 : parrello 1.5 my $edbl = ERDBLoadGroup->new($db, $directory, $options, @tables);
119 : parrello 1.1
120 :     Construct a new ERDBLoadGroup object. The following parameters are expected:
121 :    
122 :     =over 4
123 :    
124 :     =item db
125 :    
126 :     The [[ErdbPm]] object for the database being loaded.
127 :    
128 :     =item options
129 :    
130 :     Reference to a hash of options. At the current time, no options are needed
131 :     by this object, but they may be important to subclass objects.
132 :    
133 :     =item tables
134 :    
135 :     A list of the names for the tables in this load group.
136 :    
137 :     =back
138 :    
139 :     =cut
140 :    
141 :     sub new {
142 :     # Get the parameters.
143 : parrello 1.5 my ($class, $db, $options, @tables) = @_;
144 : parrello 1.1 # Create a statistics object
145 :     my $stats = Stats->new();
146 :     # Compute the group name from the class name. It is the first word in
147 :     # a name that is presumably capital case.
148 :     my $group = ($class =~ /^([A-Z][a-z]+)/ ? $1 : $class);
149 : parrello 1.3 # Get the directory.
150 :     my $directory = $db->LoadDirectory();
151 : parrello 1.1 Confess("Load directory \"$directory\" not found or invalid.") if ! -d $directory;
152 :     # Create the ERDBLoadGroup object. Note that so far we don't have any loaders
153 :     # defined and the section has not yet been assigned. The "ProcessSection"
154 :     # method is used to assign the section, and the loaders are created the first
155 :     # time it's called.
156 :     my $retVal = {
157 :     db => $db,
158 :     directory => $directory,
159 :     group => $group,
160 :     stats => $stats,
161 : parrello 1.5 source => undef,
162 :     label => ($options->{label} || $$),
163 : parrello 1.1 lastKey => undef,
164 :     loaders => {},
165 :     tables => \@tables,
166 :     section => undef,
167 :     options => $options
168 :     };
169 :     # Bless and return it.
170 :     bless $retVal, $class;
171 :     return $retVal;
172 :     }
173 :    
174 :     =head2 Subclass Methods
175 :    
176 :     =head3 Put
177 :    
178 :     $edbl->Put($table, %fields);
179 :    
180 :     Place a table record in a load file. This method is the workhorse of the
181 :     file generation phase of a load.
182 :    
183 :     =over 4
184 :    
185 :     =item table
186 :    
187 :     Name of the table being loaded.
188 :    
189 :     =item fields
190 :    
191 :     Hash of field names to field values for the fields in the table.
192 :    
193 :     =back
194 :    
195 :     =cut
196 :    
197 :     sub Put {
198 :     # Get the parameters.
199 :     my ($self, $table, %fields) = @_;
200 :     # Get the loader for this table.
201 :     my $loader = $self->{loaders}->{$table};
202 :     # Complain if it doesn't exist.
203 :     Confess("Table $table not found in load group $self->{group}.") if ! defined $loader;
204 :     # Put this record to the loader's output file.
205 :     my $bytes = $loader->Put(%fields);
206 :     # Count the record and the bytes of data. If no bytes were output, the record
207 :     # was discarded.
208 :     if (! $bytes) {
209 :     $self->Add("$table-discards" => 1);
210 :     } else {
211 :     $self->Add("$table-records" => 1);
212 :     $self->Add("$table-bytes" => $bytes);
213 :     }
214 :     }
215 :    
216 : parrello 1.4 =head3 PutE
217 :    
218 :     $edbl->PutE($table => $id, %fields);
219 :    
220 :     Place an entity-based table record in a load file. The first field
221 :     specified after the table name is the ID.
222 :    
223 :     =over 4
224 :    
225 :     =item table
226 :    
227 :     Name of the relevant table.
228 :    
229 :     =item id
230 :    
231 :     ID of the relevant entity.
232 :    
233 :     =item fields
234 :    
235 :     Hash mapping field names to values.
236 :    
237 :     =back
238 :    
239 :     =cut
240 :    
241 :     sub PutE {
242 :     # Get the parameters.
243 :     my ($self, $table, $id, %fields) = @_;
244 :     # Put the record.
245 :     $self->Put($table, id => $id, %fields);
246 :     # Record that we've done a putE.
247 :     $self->Add(putE => 1);
248 :     }
249 :    
250 :     =head3 PutR
251 :    
252 :     $edbl->PutR($table => $from, $to, %fields);
253 :    
254 :     Place a relationship record in a load file. The first two fields
255 :     specified after the table name are the from-link and the to-link,
256 :     respectively.
257 :    
258 :     =over 4
259 :    
260 :     =item table
261 :    
262 :     Name of the relevant relationship.
263 :    
264 :     =item from
265 :    
266 :     ID of the from-entity.
267 :    
268 :     =item to
269 :    
270 :     ID of the to-entity.
271 :    
272 :     =item fields
273 :    
274 :     Hash mapping field names to field values.
275 :    
276 :     =back
277 :    
278 :     =cut
279 :    
280 :     sub PutR {
281 :     # Get the parameters.
282 :     my ($self, $table, $from, $to, %fields) = @_;
283 :     # Put the record.
284 :     $self->Put($table, 'from-link' => $from, 'to-link' => $to, %fields);
285 :     # Record that we've done a PutR.
286 :     $self->Add(putR => 1);
287 :     }
288 :    
289 :    
290 : parrello 1.1 =head3 Add
291 :    
292 :     $edbl->Add($statName => $count);
293 :    
294 :     Add the specified count to the named statistical counter. The statistical
295 :     counts are kept in an internal statistics object whose contents are
296 :     displayed when the group is finished.
297 :    
298 :     =over 4
299 :    
300 :     =item statName
301 :    
302 :     Name of the statistic to increment.
303 :    
304 :     =item count
305 :    
306 :     Value by which to increment it.
307 :    
308 :     =back
309 :    
310 :     =cut
311 :    
312 :     sub Add {
313 :     # Get the parameters.
314 :     my ($self, $statName, $count) = @_;
315 :     # Update the statistic.
316 :     $self->{stats}->Add($statName => $count);
317 :     }
318 :    
319 : parrello 1.4 =head3 AddWarning
320 :    
321 :     $edbl->AddWarning($errorType => $message);
322 :    
323 :     Record a warning. Warnings indicate possible errors in the incoming data.
324 :     The first warning of a specified type is added as a message to the load
325 :     statistic. All warnings are also traced at level 3.
326 :    
327 :     =over 4
328 :    
329 :     =item errorType
330 :    
331 :     Type of error indicated by the warning. This is used as the label when the
332 :     warning is counted in the statistics object.
333 :    
334 :     =item message
335 :    
336 :     Message describing the reason for the warning.
337 :    
338 :     =back
339 :    
340 :     =cut
341 :    
342 :     sub AddWarning {
343 :     # Get the parameters.
344 :     my ($self, $errorType, $message) = @_;
345 :     # Count the warning.
346 :     my $count = $self->Add($errorType);
347 :     # Is this the first one of this type?
348 :     if ($count == 1) {
349 :     # Yes, add it to the messages for the end.
350 :     $self->{stats}->AddMessage($errorType);
351 :     } else {
352 :     # No, just trace it.
353 :     Trace("Data warning: $message") if T(3);
354 :     }
355 :     }
356 :    
357 : parrello 1.1 =head3 Track
358 :    
359 :     $edbl->Track($statName => $key, $period);
360 :    
361 :     Save the specified key as the one currently in progress. If an error
362 :     occurs, the key value will appear in the output log. The named statistic
363 :     will also be incremented, and if the count is an even multiple of the stated
364 :     period, a trace message will be output at level 3.
365 :    
366 :     Most load groups have a primary object type that drives the main loop. When
367 :     something goes wrong, we want to know the ID of the offending object. When
368 :     things go right, we want to know how far we've progressed toward completion.
369 :     This method can be used to record each occurrence of a primary object, and
370 :     provide a log of the progress or our current position in times of stress.
371 :    
372 :     =over 4
373 :    
374 :     =item statName
375 :    
376 :     Name of the statistic to be incremented. This should be a plural noun
377 : parrello 1.5 describing the object whose key is coming in.
378 : parrello 1.1
379 :     =item key
380 :    
381 :     Key value to be displayed if something goes wrong.
382 :    
383 :     =item period (optional)
384 :    
385 :     If specified, should be the number of objects to be counted between each
386 :     level-3 trace message.
387 :    
388 :     =back
389 :    
390 :     =cut
391 :    
392 :     sub Track {
393 :     # Get the parameters.
394 :     my ($self, $statName, $key, $period) = @_;
395 :     # Save the key.
396 :     $self->{lastKey} = $key;
397 :     # Count it.
398 :     my $newValue = $self->{stats}->Add($statName => 1);
399 :     # Do we need to output a progress message?
400 :     if ($period && T(3) && ($newValue % $period == 0)) {
401 :     # Yes.
402 : parrello 1.6 MemTrace("$newValue $statName processed by $self->{label} for $self->{group} group.");
403 : parrello 1.1 }
404 :     }
405 :    
406 :     =head3 section
407 :    
408 :     my $sectionID = $edbl->section();
409 :    
410 :     Return the ID of the current section.
411 :    
412 :     =cut
413 :    
414 :     sub section {
415 :     # Get the parameters.
416 :     my ($self) = @_;
417 :     # Return the result.
418 :     return $self->{section};
419 :     }
420 :    
421 :     =head3 source
422 :    
423 :     my $sourceObject = $edbl->source();
424 :    
425 :     Return the source object used to get the data needed for creating
426 :     the load files.
427 :    
428 :     =cut
429 :    
430 :     sub source {
431 :     # Get the parameters.
432 :     my ($self) = @_;
433 : parrello 1.3 # If we do not have a source object, retrieve it.
434 :     if (! defined $self->{source}) {
435 :     $self->{source} = $self->{db}->GetSourceObject();
436 :     }
437 : parrello 1.1 # Return the result.
438 :     return $self->{source};
439 :     }
440 :    
441 :     =head3 db
442 :    
443 :     my $erdbObject = $edbl->db();
444 :    
445 :     Return the database object for the target database.
446 :    
447 :     =cut
448 :    
449 :     sub db {
450 :     # Get the parameters.
451 :     my ($self) = @_;
452 :     # Return the result.
453 :     return $self->{db};
454 :     }
455 :    
456 :     =head2 Internal Methods
457 :    
458 :     =head3 ProcessSection
459 :    
460 :     my $flag = $edbl->ProcessSection($section);
461 :    
462 :     Generate the load file for a particular data section. This method calls
463 :     the virtual method L</Generate> to actually put the data into the load
464 :     files, and is responsible for assigning the section and finalizing the
465 :     load files if the load is successful.
466 :    
467 :     =over 4
468 :    
469 :     =item section
470 :    
471 :     ID of the section to load.
472 :    
473 :     =item RETURN
474 :    
475 :     Returns TRUE if successful, FALSE if an error prevented loading the section.
476 :    
477 :     =back
478 :    
479 :     =cut
480 :    
481 :     sub ProcessSection {
482 :     # Get the parameters.
483 :     my ($self, $section) = @_;
484 :     # Declare the return variable. We'll set it to 1 if we succeed.
485 :     # Save the section ID.
486 :     $self->{section} = $section;
487 :     # Get the database object.
488 :     my $db = $self->db();
489 : parrello 1.5 # Get the list of tables for this group.
490 :     my @tables = @{$self->{tables}};
491 :     # Should we skip this section?
492 :     if ($self->SkipIndicated($section, \@tables)) {
493 :     Trace("Resume mode: section $section skipped for group $self->{group}.") if T(3);
494 :     $self->Add("section-skips" => 1);
495 :     } else {
496 :     # Not skipping. Start a timer and protect ourselves from errors.
497 :     my $startTime = time();
498 :     eval {
499 :     # Get the loader hash.
500 :     my $loaderHash = $self->{loaders};
501 :     # Initialize the loaders for the necessary tables.
502 :     for my $table (@tables) {
503 :     # Get this table's loader.
504 :     my $loader = $loaderHash->{$table};
505 :     # If it doesn't exist yet, create it.
506 :     if (! defined $loader) {
507 :     $loader = ERDBGenerate->new($db, $self->{directory}, $table, $self->{stats});
508 :     # Save it for future use.
509 :     $loaderHash->{$table} = $loader;
510 :     # Count it.
511 :     $self->Add(tables => 1);
512 :     }
513 :     $loader->Start($section);
514 :     }
515 :     # Generate the data to put in the newly-created load files.
516 :     $self->Generate();
517 : parrello 1.6 # Release our hold on the source object. This allows the database object to
518 :     # decide whether or not we need a new one.
519 :     delete $self->{source};
520 :     # Clean up the database object.
521 :     $db->Cleanup();
522 : parrello 1.5 };
523 :     # Did it work?
524 :     if ($@) {
525 :     # No, so emit an error message and abort all the loaders.
526 :     $self->{stats}->AddMessage("Error loading section $section: $@");
527 :     if (defined $self->{lastKey}) {
528 :     $self->{stats}->AddMessage("Error occurred while processing \"$self->{lastKey}\".");
529 :     }
530 :     $self->Add("section-errors" => 1);
531 :     for my $loader (values %{$self->{loaders}}) {
532 :     $loader->Abort();
533 :     }
534 :     } else {
535 :     # Yes! Finish all the loaders.
536 :     for my $loader (values %{$self->{loaders}}) {
537 :     $loader->Finish();
538 : parrello 1.1 }
539 : parrello 1.5 # Update the load count.
540 :     $self->Add("section-loads" => 1);
541 : parrello 1.1 }
542 : parrello 1.5 # Update the timer.
543 : parrello 1.1 $self->Add(duration => (time() - $startTime));
544 :     }
545 :     }
546 :    
547 :     =head3 DisplayStats
548 :    
549 :     my $text = $edbl->DisplayStats();
550 :    
551 :     Display the statistics for this load gorup.
552 :    
553 :     =cut
554 :    
555 :     sub DisplayStats {
556 :     # Get the parameters.
557 :     my ($self) = @_;
558 :     # Return the result.
559 :     return $self->{stats}->Show();
560 :     }
561 :    
562 :     =head3 GetGroupHash
563 :    
564 :     my $groupHash = ERDBLoadGroup::GetGroupHash($erdb);
565 :    
566 :     Return a hash that maps each load group in the specified database to its
567 :     constituent tables. This is useful when checking for problems with a load
568 :     or performing finishing tasks.
569 :    
570 :     =over 4
571 :    
572 :     =item erdb
573 :    
574 :     [[ErdbPm]] database whose load information is desired.
575 :    
576 :     =item RETURN
577 :    
578 :     Returns a reference to a hash that maps each group name to a list of
579 :     table names.
580 :    
581 :     =back
582 :    
583 :     =cut
584 :    
585 :     sub GetGroupHash {
586 :     # Get the parameters.
587 :     my ($erdb) = @_;
588 :     # Initialize the return variable.
589 :     my $retVal = {};
590 :     # Loop through the list of load groups.
591 :     for my $group ($erdb->LoadGroupList()) {
592 :     # Stash the loader's tables in the output hash.
593 : parrello 1.4 $retVal->{$group} = [ GetTables($erdb, $group) ];
594 : parrello 1.1 }
595 :     # Return the result.
596 :     return $retVal;
597 :     }
598 :    
599 : parrello 1.3 =head3 GetTables
600 :    
601 :     my @tables = ERDBLoadGroup::GetTables($group);
602 :    
603 :     Return the list of tables belonging to the specified load group.
604 :    
605 :     =over 4
606 :    
607 :     =item erdb
608 :    
609 :     Return the list of tables for the specified load group.
610 :    
611 :     =item group
612 :    
613 :     Name of relevant group.
614 :    
615 :     =item RETURN
616 :    
617 :     Returns a list of a tables loaded by the specified group.
618 :    
619 :     =back
620 :    
621 :     =cut
622 :    
623 :     sub GetTables {
624 :     # Get the parameters.
625 :     my ($erdb, $group) = @_;
626 :     # Create a loader for the specified group.
627 :     my $loader = $erdb->Loader($group, undef, {});
628 :     # Extract the list of tables.
629 :     my @retVal = @{$loader->{tables}};
630 :     # Return the result.
631 :     return @retVal;
632 :     }
633 :    
634 :    
635 : parrello 1.1 =head3 ComputeGroups
636 :    
637 : parrello 1.2 my @groupList = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
638 : parrello 1.1
639 : parrello 1.2 Compute the actual list of groups determined by the incoming group list.
640 : parrello 1.1
641 :     =over 4
642 :    
643 :     =item erdb
644 :    
645 :     [[ErdbPm]] object for the database being loaded.
646 :    
647 :     =item groups
648 :    
649 : parrello 1.2 Reference to a list of group names specified on the command line. A plus sign
650 :     (C<+>) has special meaning.
651 : parrello 1.1
652 :     =item RETURN
653 :    
654 :     Returns the actual list of groups to be processed by the calling command. The
655 :     names will have been normalized to capital case.
656 :    
657 :     =back
658 :    
659 :     =cut
660 :    
661 :     sub ComputeGroups {
662 :     # Get the parameters.
663 : parrello 1.2 my ($erdb, $groups) = @_;
664 :     # Get the complete group list in standard order.
665 :     my @allGroups = $erdb->LoadGroupList();
666 :     # Create a hash for validation purposes. This will map each valid group
667 :     # name to its position in the standard order.
668 :     my %allGroupHash;
669 :     for (my $i = 0; $i <= $#allGroups; $i++) {
670 :     $allGroupHash{$allGroups[$i]} = $i;
671 :     }
672 :     # This variable will be the index of the last-processed group in
673 :     # the standard order. We start it before the first group in the list.
674 :     my $lastI = -1;
675 :     # The listed groups will be put in here.
676 : parrello 1.1 my @retVal;
677 : parrello 1.2 # Process the group list.
678 :     for my $group (@$groups) {
679 :     # Process this group.
680 :     if ($group eq '+') {
681 :     # Here we have a plus sign. Push in everything after the previous
682 :     # group processed. Note that we'll be ending at the last position.
683 :     # A second "+" after this one will generate no entries in the result
684 :     # list.
685 :     my $firstI = $lastI + 1;
686 :     $lastI = $#allGroups;
687 :     push @retVal, @allGroups[$firstI..$lastI];
688 :     } elsif (exists $allGroupHash{$group}) {
689 :     # Here we have a valid group name. Push it into the list.
690 :     push @retVal, $group;
691 :     # Remember its location in case there's a plus sign.
692 :     $lastI = $allGroupHash{$group};
693 :     } else {
694 :     # This is an error.
695 :     Confess("Invalid load group name $group.");
696 : parrello 1.1 }
697 :     }
698 :     # Normalize the group names and return them.
699 : parrello 1.4 @retVal = map { ucfirst $_ } @retVal;
700 :     Trace("Final group list is " . join(" ", @retVal) . ".") if T(2);
701 :     return @retVal;
702 : parrello 1.1 }
703 :    
704 : parrello 1.2 =head3 KillFileName
705 :    
706 :     my $fileName = ERDBLoadGroup::KillFileName($erdb, $directory);
707 :    
708 :     Compute the kill file name for the specified database in the specified
709 :     directory. When the [[ERDBGeneratorPl]] script sees the kill file, it will
710 :     terminate itself at the end of the current section.
711 :    
712 :     =over 4
713 :    
714 :     =item erdb
715 :    
716 :     Database
717 :    
718 :     =item directory (optional)
719 :    
720 :     Load directory for the database.
721 :    
722 :     =item RETURN
723 :    
724 :     Returns the specified database's kill file name. If a directory is specified,
725 :     it is prefixed to the name with an intervening slash.
726 :    
727 :    
728 :     =back
729 :    
730 :     =cut
731 :    
732 :     sub KillFileName {
733 :     # Get the parameters.
734 :     my ($erdb, $directory) = @_;
735 :     # Compute the kill file name. We start with the database name in
736 :     # lower case, then prefix it with "kill_";
737 :     my $dbName = lc ref $erdb;
738 :     my $retVal = ERDBGenerate::CreateFileName("kill_$dbName", undef, 'control', $directory);
739 :     # Return the result.
740 :     return $retVal;
741 :     }
742 :    
743 : parrello 1.5 =head3 SkipIndicated
744 :    
745 :     my $flag = $edbl->SkipIndicated($section, \@tables);
746 :    
747 :     Return FALSE if the current group should be run for the current section.
748 :     If the C<resume> option is not set, this method always returns FALSE;
749 :     otherwise, it will look at the files currently in the load directory and
750 :     if enough of them are present, it will return TRUE, indicating there's
751 :     no point in generating data for the indicated tables with respect to the
752 :     current section. In other words, it will return TRUE if, for every table,
753 :     there is either a load file for that table or a load file for the
754 :     specified section of that table.
755 :    
756 :     =over 4
757 :    
758 :     =item section
759 :    
760 :     ID of the relevant section.
761 :    
762 :     =item tables
763 :    
764 :     List of tables to check.
765 :    
766 :     =item RETURN
767 :    
768 :     Returns TRUE if load files are already generated for the specified section, else FALSE.
769 :    
770 :     =back
771 :    
772 :     =cut
773 :    
774 :     sub SkipIndicated {
775 :     # Get the parameters.
776 :     my ($self, $section, $tables) = @_;
777 :     # Declare the return variable. It's FALSE if there's no resume parameter.
778 :     my $retVal = $self->{options}->{resume};
779 :     # Loop through the table names while $retval is TRUE.
780 :     for my $table (@$tables) { last if ! $retVal;
781 :     # Compute the file names.
782 :     my @files = map { ERDBGenerate::CreateFileName($table, $_, data => $self->{directory}) }
783 :     (undef, $section);
784 :     # If neither is present, we can't skip. So, if the grep below returns an empty
785 :     # list, we set $retVal FALSE, which stops the loop.
786 :     if (scalar(grep { -f $_ } @files) == 0) {
787 :     $retVal = 0;
788 :     Trace("Section $section not found for $table in $self->{group}. Regeneration required.") if T(3);
789 :     }
790 :     }
791 :     # Return the result.
792 :     return $retVal;
793 :     }
794 :    
795 : parrello 1.2
796 : parrello 1.1 =head2 Virtual Methods
797 :    
798 :     =head3 Generate
799 :    
800 :     $edbl->Generate();
801 :    
802 :     Generate the data for this load group with respect to the current
803 :     section. This method must be overridden by the subclass and should call
804 :     the L</Put> method to put data into the tables.
805 :    
806 :     =cut
807 :    
808 :     sub Generate {
809 :     Confess("Pure virtual method Generate called.");
810 :     }
811 :    
812 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3