[Bio] / Sprout / ERDBLoadGroup.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoadGroup.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Sep 30 15:23:55 2008 UTC revision 1.6, Thu Apr 2 01:37:07 2009 UTC
# Line 77  Line 77 
77    
78  name of this load group  name of this load group
79    
80    =item label
81    
82    name of this worker process
83    
84  =item lastKey  =item lastKey
85    
86  ID of the last major object processed  ID of the last major object processed
# Line 111  Line 115 
115    
116  =head3 new  =head3 new
117    
118      my $edbl = ERDBLoadGroup->new($source, $db, $directory, $options, @tables);      my $edbl = ERDBLoadGroup->new($db, $directory, $options, @tables);
119    
120  Construct a new ERDBLoadGroup object. The following parameters are expected:  Construct a new ERDBLoadGroup object. The following parameters are expected:
121    
122  =over 4  =over 4
123    
 =item source  
   
 The object to be used by the subclass to access the source data.  
   
124  =item db  =item db
125    
126  The [[ErdbPm]] object for the database being loaded.  The [[ErdbPm]] object for the database being loaded.
127    
 =item directory  
   
 Name of the directory to contain the load files.  
   
128  =item options  =item options
129    
130  Reference to a hash of options. At the current time, no options are needed  Reference to a hash of options. At the current time, no options are needed
# Line 140  Line 136 
136    
137  =back  =back
138    
 This constructor is deliberately kept lightweight in order to insure that  
 L</GetGroupHash> is high-performance. For this reason, the [[ERDBGeneratePm]]  
 objects in the loaders hash are not created until L</ProcessSection>.  
   
139  =cut  =cut
140    
141  sub new {  sub new {
142      # Get the parameters.      # Get the parameters.
143      my ($class, $source, $db, $directory, $options, @tables) = @_;      my ($class, $db, $options, @tables) = @_;
144      # Create a statistics object      # Create a statistics object
145      my $stats = Stats->new();      my $stats = Stats->new();
146      # Compute the group name from the class name. It is the first word in      # Compute the group name from the class name. It is the first word in
147      # a name that is presumably capital case.      # a name that is presumably capital case.
148      my $group = ($class =~ /^([A-Z][a-z]+)/ ? $1 : $class);      my $group = ($class =~ /^([A-Z][a-z]+)/ ? $1 : $class);
149      # Validate the directory.      # Get the directory.
150        my $directory = $db->LoadDirectory();
151      Confess("Load directory \"$directory\" not found or invalid.") if ! -d $directory;      Confess("Load directory \"$directory\" not found or invalid.") if ! -d $directory;
152      # Create the ERDBLoadGroup object. Note that so far we don't have any loaders      # Create the ERDBLoadGroup object. Note that so far we don't have any loaders
153      # defined and the section has not yet been assigned. The "ProcessSection"      # defined and the section has not yet been assigned. The "ProcessSection"
# Line 165  Line 158 
158                      directory => $directory,                      directory => $directory,
159                      group => $group,                      group => $group,
160                      stats => $stats,                      stats => $stats,
161                      source => $source,                      source => undef,
162                        label => ($options->{label} || $$),
163                      lastKey => undef,                      lastKey => undef,
164                      loaders => {},                      loaders => {},
165                      tables => \@tables,                      tables => \@tables,
# Line 219  Line 213 
213      }      }
214  }  }
215    
216    =head3 PutE
217    
218        $edbl->PutE($table => $id, %fields);
219    
220    Place an entity-based table record in a load file. The first field
221    specified after the table name is the ID.
222    
223    =over 4
224    
225    =item table
226    
227    Name of the relevant table.
228    
229    =item id
230    
231    ID of the relevant entity.
232    
233    =item fields
234    
235    Hash mapping field names to values.
236    
237    =back
238    
239    =cut
240    
241    sub PutE {
242        # Get the parameters.
243        my ($self, $table, $id, %fields) = @_;
244        # Put the record.
245        $self->Put($table, id => $id, %fields);
246        # Record that we've done a putE.
247        $self->Add(putE => 1);
248    }
249    
250    =head3 PutR
251    
252        $edbl->PutR($table => $from, $to, %fields);
253    
254    Place a relationship record in a load file. The first two fields
255    specified after the table name are the from-link and the to-link,
256    respectively.
257    
258    =over 4
259    
260    =item table
261    
262    Name of the relevant relationship.
263    
264    =item from
265    
266    ID of the from-entity.
267    
268    =item to
269    
270    ID of the to-entity.
271    
272    =item fields
273    
274    Hash mapping field names to field values.
275    
276    =back
277    
278    =cut
279    
280    sub PutR {
281        # Get the parameters.
282        my ($self, $table, $from, $to, %fields) = @_;
283        # Put the record.
284        $self->Put($table, 'from-link' => $from, 'to-link' => $to, %fields);
285        # Record that we've done a PutR.
286        $self->Add(putR => 1);
287    }
288    
289    
290  =head3 Add  =head3 Add
291    
292      $edbl->Add($statName => $count);      $edbl->Add($statName => $count);
# Line 248  Line 316 
316      $self->{stats}->Add($statName => $count);      $self->{stats}->Add($statName => $count);
317  }  }
318    
319    =head3 AddWarning
320    
321        $edbl->AddWarning($errorType => $message);
322    
323    Record a warning. Warnings indicate possible errors in the incoming data.
324    The first warning of a specified type is added as a message to the load
325    statistic. All warnings are also traced at level 3.
326    
327    =over 4
328    
329    =item errorType
330    
331    Type of error indicated by the warning. This is used as the label when the
332    warning is counted in the statistics object.
333    
334    =item message
335    
336    Message describing the reason for the warning.
337    
338    =back
339    
340    =cut
341    
342    sub AddWarning {
343        # Get the parameters.
344        my ($self, $errorType, $message) = @_;
345        # Count the warning.
346        my $count = $self->Add($errorType);
347        # Is this the first one of this type?
348        if ($count == 1) {
349            # Yes, add it to the messages for the end.
350            $self->{stats}->AddMessage($errorType);
351        } else {
352            # No, just trace it.
353            Trace("Data warning: $message") if T(3);
354        }
355    }
356    
357  =head3 Track  =head3 Track
358    
359      $edbl->Track($statName => $key, $period);      $edbl->Track($statName => $key, $period);
# Line 268  Line 374 
374  =item statName  =item statName
375    
376  Name of the statistic to be incremented. This should be a plural noun  Name of the statistic to be incremented. This should be a plural noun
377  describing the object whose kep is coming in.  describing the object whose key is coming in.
378    
379  =item key  =item key
380    
# Line 293  Line 399 
399      # Do we need to output a progress message?      # Do we need to output a progress message?
400      if ($period && T(3) && ($newValue % $period == 0)) {      if ($period && T(3) && ($newValue % $period == 0)) {
401          # Yes.          # Yes.
402          Trace("$newValue $statName processed for $self->{group} group.");          MemTrace("$newValue $statName processed by $self->{label} for $self->{group} group.");
403      }      }
404  }  }
405    
# Line 324  Line 430 
430  sub source {  sub source {
431      # Get the parameters.      # Get the parameters.
432      my ($self) = @_;      my ($self) = @_;
433        # If we do not have a source object, retrieve it.
434        if (! defined $self->{source}) {
435            $self->{source} = $self->{db}->GetSourceObject();
436        }
437      # Return the result.      # Return the result.
438      return $self->{source};      return $self->{source};
439  }  }
# Line 376  Line 486 
486      $self->{section} = $section;      $self->{section} = $section;
487      # Get the database object.      # Get the database object.
488      my $db = $self->db();      my $db = $self->db();
     # Start a timer and protect ourselves from errors.  
     my $startTime = time();  
     eval {  
489          # Get the list of tables for this group.          # Get the list of tables for this group.
490          my @tables = @{$self->{tables}};          my @tables = @{$self->{tables}};
491        # Should we skip this section?
492        if ($self->SkipIndicated($section, \@tables)) {
493            Trace("Resume mode: section $section skipped for group $self->{group}.") if T(3);
494            $self->Add("section-skips" => 1);
495        } else {
496            # Not skipping. Start a timer and protect ourselves from errors.
497            my $startTime = time();
498            eval {
499          # Get the loader hash.          # Get the loader hash.
500          my $loaderHash = $self->{loaders};          my $loaderHash = $self->{loaders};
501          # Initialize the loaders for the necessary tables.          # Initialize the loaders for the necessary tables.
# Line 389  Line 504 
504              my $loader = $loaderHash->{$table};              my $loader = $loaderHash->{$table};
505              # If it doesn't exist yet, create it.              # If it doesn't exist yet, create it.
506              if (! defined $loader) {              if (! defined $loader) {
507                  $loader = ERDBGenerate->new($db, $self->{directory}, $table);                      $loader = ERDBGenerate->new($db, $self->{directory}, $table, $self->{stats});
508                  # Save it for future use.                  # Save it for future use.
509                  $loaderHash->{$table} = $loader;                  $loaderHash->{$table} = $loader;
510                  # Count it.                  # Count it.
# Line 398  Line 513 
513              $loader->Start($section);              $loader->Start($section);
514          }          }
515          # Generate the data to put in the newly-created load files.          # Generate the data to put in the newly-created load files.
         Trace("Calling generator.") if T(3);  
516          $self->Generate();          $self->Generate();
517                # Release our hold on the source object. This allows the database object to
518                # decide whether or not we need a new one.
519                delete $self->{source};
520                # Clean up the database object.
521                $db->Cleanup();
522      };      };
523      # Did it work?      # Did it work?
524      if ($@) {      if ($@) {
# Line 417  Line 536 
536          for my $loader (values %{$self->{loaders}}) {          for my $loader (values %{$self->{loaders}}) {
537              $loader->Finish();              $loader->Finish();
538          }          }
539          # Update the load count and the timer.              # Update the load count.
540          $self->Add("section-loads" => 1);          $self->Add("section-loads" => 1);
541            }
542            # Update the timer.
543          $self->Add(duration => (time() - $startTime));          $self->Add(duration => (time() - $startTime));
544      }      }
545  }  }
# Line 468  Line 589 
589      my $retVal = {};      my $retVal = {};
590      # Loop through the list of load groups.      # Loop through the list of load groups.
591      for my $group ($erdb->LoadGroupList()) {      for my $group ($erdb->LoadGroupList()) {
         # Get a loader for this group.  
         my $loader = $erdb->Loader($group, {});  
592          # Stash the loader's tables in the output hash.          # Stash the loader's tables in the output hash.
593          $retVal->{$group} = $loader->{tables};          $retVal->{$group} = [ GetTables($erdb, $group) ];
594      }      }
595      # Return the result.      # Return the result.
596      return $retVal;      return $retVal;
597  }  }
598    
599  =head3 ComputeGroups  =head3 GetTables
600    
601      my @groupList = ERDBLoadGroup::ComputeGroups($erdb, $options, \@groups);      my @tables = ERDBLoadGroup::GetTables($group);
602    
603  Compute the actual list of groups determined by the incoming options and  Return the list of tables belonging to the specified load group.
 group list. If the list is an asterisk (C<*>), this method returns a list  
 of all the groups. If the options include C<resume>, this method returns  
 the first specified group and all the groups after it in the standard  
 ordering.  
604    
605  =over 4  =over 4
606    
607  =item erdb  =item erdb
608    
609  [[ErdbPm]] object for the database being loaded.  Return the list of tables for the specified load group.
610    
611  =item options  =item group
612    
613    Name of relevant group.
614    
615    =item RETURN
616    
617    Returns a list of a tables loaded by the specified group.
618    
619    =back
620    
621    =cut
622    
623    sub GetTables {
624        # Get the parameters.
625        my ($erdb, $group) = @_;
626        # Create a loader for the specified group.
627        my $loader = $erdb->Loader($group, undef, {});
628        # Extract the list of tables.
629        my @retVal = @{$loader->{tables}};
630        # Return the result.
631        return @retVal;
632    }
633    
634    
635    =head3 ComputeGroups
636    
637        my @groupList = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
638    
639  Reference to a hash of command-line options for the command that started  Compute the actual list of groups determined by the incoming group list.
640  this load operation.  
641    =over 4
642    
643    =item erdb
644    
645    [[ErdbPm]] object for the database being loaded.
646    
647  =item groups  =item groups
648    
649  Reference to a list of group names specified on the command line.  Reference to a list of group names specified on the command line. A plus sign
650    (C<+>) has special meaning.
651    
652  =item RETURN  =item RETURN
653    
# Line 513  Line 660 
660    
661  sub ComputeGroups {  sub ComputeGroups {
662      # Get the parameters.      # Get the parameters.
663      my ($erdb, $options, $groups) = @_;      my ($erdb, $groups) = @_;
664      # Declare the return variable.      # Get the complete group list in standard order.
665        my @allGroups = $erdb->LoadGroupList();
666        # Create a hash for validation purposes. This will map each valid group
667        # name to its position in the standard order.
668        my %allGroupHash;
669        for (my $i = 0; $i <= $#allGroups; $i++) {
670            $allGroupHash{$allGroups[$i]} = $i;
671        }
672        # This variable will be the index of the last-processed group in
673        # the standard order. We start it before the first group in the list.
674        my $lastI = -1;
675        # The listed groups will be put in here.
676      my @retVal;      my @retVal;
677      # Check the group list.      # Process the group list.
678      if ($groups->[0] eq '*') {      for my $group (@$groups) {
679          # Load all groups.          # Process this group.
680          @retVal = $erdb->LoadGroupList();          if ($group eq '+') {
681      } elsif ($options->{resume}) {              # Here we have a plus sign. Push in everything after the previous
682          # Load all groups after and including the specified one.              # group processed. Note that we'll be ending at the last position.
683          my $starter = $groups->[0];              # A second "+" after this one will generate no entries in the result
         @retVal = $erdb->LoadGroupList();  
         shift @retVal until (! @retVal) || $retVal[0] eq $starter;  
         # If we didn't find the specified group, it's an error.  
         Confess("Invalid group name \"$starter\" in parameter list.") if (! @retVal);  
     } else {  
         # Here the groups are all on the command line. Stuff them in the return  
684          # list.          # list.
685          @retVal = @{$groups};              my $firstI = $lastI + 1;
686          # Verify that they're all valid.              $lastI = $#allGroups;
687          my %checker = map { $_ => 1 } $erdb->LoadGroupList();              push @retVal, @allGroups[$firstI..$lastI];
688          for my $group (@retVal) {          } elsif (exists $allGroupHash{$group}) {
689              Confess("Invalid group name \"$group\" in parameter list.")              # Here we have a valid group name. Push it into the list.
690                  if ! $checker{$group};              push @retVal, $group;
691                # Remember its location in case there's a plus sign.
692                $lastI = $allGroupHash{$group};
693            } else {
694                # This is an error.
695                Confess("Invalid load group name $group.");
696          }          }
697      }      }
698      # Normalize the group names and return them.      # Normalize the group names and return them.
699      return map { ucfirst $_ } @retVal;      @retVal = map { ucfirst $_ } @retVal;
700        Trace("Final group list is " . join(" ", @retVal) . ".") if T(2);
701        return @retVal;
702    }
703    
704    =head3 KillFileName
705    
706        my $fileName = ERDBLoadGroup::KillFileName($erdb, $directory);
707    
708    Compute the kill file name for the specified database in the specified
709    directory. When the [[ERDBGeneratorPl]] script sees the kill file, it will
710    terminate itself at the end of the current section.
711    
712    =over 4
713    
714    =item erdb
715    
716    Database
717    
718    =item directory (optional)
719    
720    Load directory for the database.
721    
722    =item RETURN
723    
724    Returns the specified database's kill file name. If a directory is specified,
725    it is prefixed to the name with an intervening slash.
726    
727    
728    =back
729    
730    =cut
731    
732    sub KillFileName {
733        # Get the parameters.
734        my ($erdb, $directory) = @_;
735        # Compute the kill file name. We start with the database name in
736        # lower case, then prefix it with "kill_";
737        my $dbName = lc ref $erdb;
738        my $retVal = ERDBGenerate::CreateFileName("kill_$dbName", undef, 'control', $directory);
739        # Return the result.
740        return $retVal;
741    }
742    
743    =head3 SkipIndicated
744    
745        my $flag = $edbl->SkipIndicated($section, \@tables);
746    
747    Return FALSE if the current group should be run for the current section.
748    If the C<resume> option is not set, this method always returns FALSE;
749    otherwise, it will look at the files currently in the load directory and
750    if enough of them are present, it will return TRUE, indicating there's
751    no point in generating data for the indicated tables with respect to the
752    current section. In other words, it will return TRUE if, for every table,
753    there is either a load file for that table or a load file for the
754    specified section of that table.
755    
756    =over 4
757    
758    =item section
759    
760    ID of the relevant section.
761    
762    =item tables
763    
764    List of tables to check.
765    
766    =item RETURN
767    
768    Returns TRUE if load files are already generated for the specified section, else FALSE.
769    
770    =back
771    
772    =cut
773    
774    sub SkipIndicated {
775        # Get the parameters.
776        my ($self, $section, $tables) = @_;
777        # Declare the return variable. It's FALSE if there's no resume parameter.
778        my $retVal = $self->{options}->{resume};
779        # Loop through the table names while $retval is TRUE.
780        for my $table (@$tables) { last if ! $retVal;
781            # Compute the file names.
782            my @files = map { ERDBGenerate::CreateFileName($table, $_, data => $self->{directory}) }
783                (undef, $section);
784            # If neither is present, we can't skip. So, if the grep below returns an empty
785            # list, we set $retVal FALSE, which stops the loop.
786            if (scalar(grep { -f $_ } @files) == 0) {
787                $retVal = 0;
788                Trace("Section $section not found for $table in $self->{group}. Regeneration required.") if T(3);
789            }
790  }  }
791        # Return the result.
792        return $retVal;
793    }
794    
795    
796  =head2 Virtual Methods  =head2 Virtual Methods
797    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3