[Bio] / Sprout / ERDBGenerator.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBGenerator.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Thu Oct 9 17:23:54 2008 UTC revision 1.5, Mon Jan 19 21:46:21 2009 UTC
# Line 98  Line 98 
98  If specified, then this prcoess will terminate after the specified number of  If specified, then this prcoess will terminate after the specified number of
99  section load errors; otherwise, the process will keep going after a section  section load errors; otherwise, the process will keep going after a section
100  error. A value of C<0> means the process will ignore all errors. A value of  error. A value of C<0> means the process will ignore all errors. A value of
101  C<1> means it will stop after the first error.  C<1> means it will stop after the first error. The default is C<1>.
102    
103  =item phone  =item phone
104    
# Line 133  Line 133 
133    
134  Name of this process, for display during tracing.  Name of this process, for display during tracing.
135    
136    =item resume
137    
138    If specified, load files that already exist will not be regenerated.
139    
140  =item workers  =item workers
141    
142  If C<0>, then this is considered to be a worker process and the sections in the  If C<0>, then this is considered to be a worker process and the sections in the
# Line 141  Line 145 
145  then the appropriate number of worker processes are generated and the sections  then the appropriate number of worker processes are generated and the sections
146  are assigned to them in a round-robin fashion.  are assigned to them in a round-robin fashion.
147    
148    =item DBD
149    
150    Name of the DBD file. If specified, the DBD must be in the main FIG directory
151    (specified in C<$FIG_Config::fig>). This option allows the use of an alternate
152    DBD during load, so that access to the database by other processes is not
153    compromised.
154    
155  =back  =back
156    
157  =cut  =cut
# Line 151  Line 162 
162                                                clear => ["", "if specified, the entire load directory will be cleared"],                                                clear => ["", "if specified, the entire load directory will be cleared"],
163                                                clean => ["", "if specified, temporary files in the load directory will be deleted"],                                                clean => ["", "if specified, temporary files in the load directory will be deleted"],
164                                                clearGroups => ["", "if specified, pre-exising load files from the groups processed will be deleted"],                                                clearGroups => ["", "if specified, pre-exising load files from the groups processed will be deleted"],
165                                                maxErrors => ["0", "if non-zero, the maximum allowed number of section failures"],                                                maxErrors => ["1", "if non-zero, the maximum allowed number of section failures"],
166                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
167                                                trace => ["2", "tracing level"],                                                trace => ["3", "tracing level"],
168                                                workers => ["1", "number of worker processes"],                                                workers => ["1", "number of worker processes"],
169                                                label => ["Main", "name of this process"],                                                label => ["Main", "name of this process"],
170                                                resume => ["", "if specified, the specified group and all groups that normally come after it will be loaded"],                                                resume => ["", "if specified, only groups and sections that do not already have load files will be processed"],
171                                                sections => ["*", "name of a file in the database's load directory containing a list of sections to process"],                                                sections => ["*", "name of a file in the database's load directory containing a list of sections to process"],
172                                                  DBD => ["", "if specified, the name of a DBD file in the FIG directory"],
173                                             },                                             },
174                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
175                                             @ARGV);                                             @ARGV);
176  # This is a list of the options that are for manager scripts only.  # This is a list of the options that are for manager scripts only.
177  my @managerOptions = qw(clear clean clearGroups resume sections);  my @managerOptions = qw(clear clean clearGroups sections);
178  # We're doing heavy pipe stuff, so we need to throw an error on a broken-pipe signal.  # We're doing heavy pipe stuff, so we need to throw an error on a broken-pipe signal.
179  local $SIG{PIPE} = sub { Confess("Broken pipe.") };  local $SIG{PIPE} = sub { Confess("Broken pipe.") };
180  # Insure we catch errors.  # Insure we catch errors.
181  eval {  eval {
182      # Get the parameters.      # Get the parameters.
183      my ($database, @groups) = @parameters;      my ($database, @groups) = @parameters;
184        # Check for an alternate DBD.
185        my $altDBD = ($options->{DBD} ? "$FIG_Config::fig/$options->{DBD}" : undef);
186      # Connect to the database and get its load directory.      # Connect to the database and get its load directory.
187      my $erdb = ERDB::GetDatabase($database);      my $erdb = ERDB::GetDatabase($database, $altDBD);
188      my $directory = $erdb->LoadDirectory();      my $directory = $erdb->LoadDirectory();
189      # Fix the group list.      # Fix the group list.
190      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
     # Get the source object.  
     my $source = $erdb->GetSourceObject();  
191      # Are we a worker or a manager?      # Are we a worker or a manager?
192      if ($options->{workers} == 0) {      if ($options->{workers} == 0) {
193          # Yes, we're a worker.          # Yes, we're a worker.
194          Trace("Worker process $$ started.") if T(2);          Trace("Worker process $options->{label} started.") if T(2);
195          LoadFromInput(\*STDIN, $erdb, $source, \@realGroups, $options);          LoadFromInput(\*STDIN, $erdb, \@realGroups, $options);
196      } else {      } else {
197          # Here we're a manager. If the user wants us to clear the directory,          # Here we're a manager. If the user wants us to clear the directory,
198          # do that first.          # do that first.
# Line 191  Line 203 
203              my @files = ERDBGenerate::GetLoadFiles($directory);              my @files = ERDBGenerate::GetLoadFiles($directory);
204              # It's worth noting if we didn't find any.              # It's worth noting if we didn't find any.
205              if (! @files) {              if (! @files) {
206                  Tracer("Load directory is already clear.") if T(2);                  Trace("Load directory is already clear.") if T(2);
207              } else {              } else {
208                  # Delete the files we found.                  # Delete the files we found.
209                  for my $file (@files) {                  for my $file (@files) {
210                      unlink "$directory/$file";                      unlink "$directory/$file";
211                      $deleteCount++;                      $deleteCount++;
212                  }                  }
213                  Tracer("$deleteCount files deleted from load directory during Clear.") if T(2);                  Trace("$deleteCount files deleted from load directory during Clear.") if T(2);
214              }              }
215          } elsif ($options->{clearGroups}) {          } elsif ($options->{clearGroups}) {
216              # Here the user only wants to clear the load files for the specified              # Here the user only wants to clear the load files for the specified
# Line 237  Line 249 
249              # No, so we must create one.              # No, so we must create one.
250              $sectionFile = "$directory/Sections$$.txt";              $sectionFile = "$directory/Sections$$.txt";
251              Open(\*SECTIONS, ">$sectionFile");              Open(\*SECTIONS, ">$sectionFile");
252              for my $section ($erdb->SectionList($source)) {              for my $section ($erdb->SectionList()) {
253                  print SECTIONS "$section\n";                  print SECTIONS "$section\n";
254              }              }
255              close SECTIONS;              close SECTIONS;
# Line 263  Line 275 
275          if ($numWorkers == 1) {          if ($numWorkers == 1) {
276              # We're sequential, so we do all the work ourselves.              # We're sequential, so we do all the work ourselves.
277              Trace("Sequential load started.") if T(2);              Trace("Sequential load started.") if T(2);
278              LoadFromInput($ih, $erdb, $source, \@realGroups, \%workerOptions);              LoadFromInput($ih, $erdb, \@realGroups, \%workerOptions);
279          } else {          } else {
280              # Here we need to create the workers. The following arrays will contain,              # Here we need to create the workers. The following array will contain
281              # respectively, a list of open file handles and a list of worker labels.              # a descriptor for each worker.
             # Each one will correspond to a single worker. Writing to the handle in  
             # the first sends a section to the worker named in the second.  
282              my @workers = ();              my @workers = ();
283              my @labels = ();              # Compute the positional parameters to use for the workers.
             # Compute the positional parameters to use for the worker.  
284              my $commandParms = join(" ", $database, @realGroups);              my $commandParms = join(" ", $database, @realGroups);
285              my $command = $0;              my $command = $0;
286              # Create the workers.              # Create the workers.
# Line 279  Line 288 
288                  my $label = "$options->{label}$i";                  my $label = "$options->{label}$i";
289                  $workerOptions{label} = $label;                  $workerOptions{label} = $label;
290                  my $commandOptions = Tracer::UnparseOptions(\%workerOptions);                  my $commandOptions = Tracer::UnparseOptions(\%workerOptions);
291                  my $oh = Open(undef, "| $command $commandOptions $commandParms");                  my $inFile = "$FIG_Config::temp/Pipe-$label.tbl";
292                  select $oh; $| = 1; select STDOUT;                  my $oh = Open(undef, ">$inFile");
293                  push @workers, $oh;                  my $command = "$command $commandOptions $commandParms <$inFile >null &";
294                  push @labels, $label;                  push @workers, { handle => $oh, label => $label, command => $command };
295              }              }
296              # Now we assign sections to the workers.              # Now we assign sections to the workers.
297              my $w = 0;              my $w = 0;
298              while (! eof $ih) {              while (! eof $ih) {
299                  # Get the name of the next section.                  # Get the name of the next section.
300                  my $line = <$ih>;                  my $line = <$ih>;
301                  # Get the next worker in rotation.                  # Get the output handle for the next worker in rotation.
302                  my $worker = $workers[$w];                  my $wh = $workers[$w]->{handle};
303                  # Send this section to it.                  # Send this section to it.
304                  print $worker $line;                  print $wh $line;
305                  Trace(Tracer::Strip($line) . " sent to $labels[$w]") if T(3);                  Trace(Tracer::Strip($line) . " sent to $workers[$w]->{label}") if T(3);
306                  # Position on the next worker.                  # Position on the next worker.
307                  $w = ($w + 1) % $numWorkers;                  $w = ($w + 1) % $numWorkers;
308              }              }
309              # All done, wait for the workers to finish.              # All done, close the files.
310              for my $worker (@workers) {              for my $worker (@workers) {
311                  close $worker;                  close $worker->{handle};
312                }
313                # Now start the workers.
314                for my $worker (@workers) {
315                    my $cmd = $worker->{command};
316                    Trace("Starting: $cmd") if T(3);
317                    system($worker->{command});
318              }              }
319          }          }
320          Trace("Load manager completed.") if T(2);          Trace("Load manager completed.") if T(2);
# Line 337  Line 352 
352    
353  Database object containing information about the tables being loaded.  Database object containing information about the tables being loaded.
354    
 =item source  
   
 Source object used to access the data from which the load file is created.  
   
355  =item groups  =item groups
356    
357  Reference to a list of the names for the load groups to process.  Reference to a list of the names for the load groups to process.
# Line 355  Line 366 
366    
367  sub LoadFromInput {  sub LoadFromInput {
368      # Get the parameters.      # Get the parameters.
369      my ($ih, $erdb, $source, $groups, $options) = @_;      my ($ih, $erdb, $groups, $options) = @_;
     # Create a load object for each group.  
     my %loaders = map { $_ => $erdb->Loader($_, $source, $options) } @{$groups};  
370      # We'll count our errors in here.      # We'll count our errors in here.
371      my $errorCount = 0;      my $errorCount = 0;
372      my $maxErrors = $options->{maxErrors};      my $maxErrors = $options->{maxErrors};
373      # Compute the kill file name.      # Compute the kill file name.
374      my $killFileName = ERDBLoadGroup::KillFileName($erdb, $erdb->LoadDirectory());      my $killFileName = ERDBLoadGroup::KillFileName($erdb, $erdb->LoadDirectory());
375      my $killed = 0;      my $killed = 0;
376      # Loop through the sections.      # Slurp in the sections.
377      while (! $killed && ! eof $ih) {      my @sections = ();
378          # Get this section ID.      while (! eof $ih) {
379          my ($section) = Tracer::GetLine($ih);          push @sections, Tracer::GetLine($ih);
380          # Process it for each load group.      }
381        # Loop through the groups.
382          for my $group (@$groups) {          for my $group (@$groups) {
383            # Create a loader for this group.
384            my $loader = $erdb->Loader($group, $options);
385            # Loop through the sections.
386            for my $section (@sections) {
387              # Only proceed if we haven't been killed.              # Only proceed if we haven't been killed.
388              if (! $killed) {              if (! $killed) {
389                  # Check for a kill file.                  # Check for a kill file.
# Line 379  Line 393 
393                      $killed = 1;                      $killed = 1;
394                  } else {                  } else {
395                      # No kill file, so we process the section.                      # No kill file, so we process the section.
396                      Trace("Processing section $section for group $group in $options->{label}($$).") if T(3);                      Trace("Processing section $section for group $group in $options->{label}.") if T(3);
397                      my $ok = $loaders{$group}->ProcessSection($section);                      my $ok = $loader->ProcessSection($section);
398                      # Check to see if we've exceeded the maximum error count. We only care                      # Check to see if we've exceeded the maximum error count. We only care
399                      # if maxErrors is nonzero.                      # if maxErrors is nonzero.
400                      if (! $ok && $maxErrors && ++$errorCount >= $maxErrors) {                      if (! $ok && $maxErrors && ++$errorCount >= $maxErrors) {
# Line 390  Line 404 
404                  }                  }
405              }              }
406          }          }
407            Trace("Statistics for $group in $options->{label}:\n" . $loader->DisplayStats()) if T(2);
408      }      }
409      # Now we display the statistics for each group.      Trace("Processing finished for worker $options->{label}.") if T(2);
     for my $group (@$groups) {  
         Trace("Statistics for group $group\n" . $loaders{$group}->DisplayStats()) if T(2);  
     }  
     Trace("Processing finished for worker $$.") if T(2);  
410  }  }
411    
412  1;  1;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3