[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Oct 1 03:07:08 2008 UTC revision 1.4, Thu Oct 9 17:23:54 2008 UTC
# Line 69  Line 69 
69  Specifies the tracing level. The higher the tracing level, the more messages  Specifies the tracing level. The higher the tracing level, the more messages
70  will appear in the trace log. Use E to specify emergency tracing.  will appear in the trace log. Use E to specify emergency tracing.
71    
 =item resume  
   
 If specified, then the group list must contain a single group. The specified  
 group and all groups after it in the group list will be processed.  
   
72  =item user  =item user
73    
74  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 93  Line 88 
88    
89  Display this command's parameters and options.  Display this command's parameters and options.
90    
91    =item keepSections
92    
93    If specified, section files (the fragments of data load files created by
94    [[ERDBGeneratorPl]], will not be deleted after they are collated.
95    
96    =item sanityCheck
97    
98    If specified, no tables will be loaded. Instead, the first I<N> records from the
99    assembled load files will be displayed so that the file contents can be
100    visually matched against the column names.
101    
102  =item warn  =item warn
103    
104  Create an event in the RSS feed when an error occurs.  Create an event in the RSS feed when an error occurs.
# Line 108  Line 114 
114  # Get the command-line options and parameters.  # Get the command-line options and parameters.
115  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
116                                             {                                             {
117                                                  sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
118                                                trace => ["", "tracing level"],                                                trace => ["", "tracing level"],
119                                                resume => ["", "if specified, the specified group and all groups that normally come after it will be processed"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
120                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"]
121                                             },                                             },
122                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
# Line 123  Line 130 
130      # Connect to the database.      # Connect to the database.
131      my $erdb = ERDB::GetDatabase($database);      my $erdb = ERDB::GetDatabase($database);
132      # Fix the group list.      # Fix the group list.
133      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, $options, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
134      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
135      my $source = $erdb->GetSourceObject();      my $source = $erdb->GetSourceObject();
136      my $directory = $erdb->LoadDirectory();      my $directory = $erdb->LoadDirectory();
# Line 131  Line 138 
138      my @sectionList = $erdb->SectionList($source);      my @sectionList = $erdb->SectionList($source);
139      # Create a statistics object to track our progress.      # Create a statistics object to track our progress.
140      my $stats = Stats->new();      my $stats = Stats->new();
141      # Get the hash of group names to table names.      # Find out if we're doing a sanity check.
142      my $groupHash = ERDBLoadGroup::GetGroupHash($erdb);      my $sanityCheck = $options->{sanityCheck} || "";
143      # Start a timer.      # Start a timer.
144      my $totalStart = time();      my $totalStart = time();
145      # Loop through the groups.      # Loop through the groups.
146      for my $group (@realGroups) {      for my $group (@realGroups) {
147          # Get the list of tables for this group.          # Get the list of tables for this group.
148          my $tableList = $groupHash->{$group};          my @tableList = ERDBLoadGroup::GetTables($erdb, $group);
149          # We need to insure there is a data file for every table. If we fail to find one,          # We need to insure there is a data file for every table. If we fail to find one,
150          # we set the following error flag, which prevents us from loading the database.          # we set the following error flag, which prevents us from loading the database.
151          my $missingTable = 0;          my $missingTable = 0;
152          # Loop through the tables in this group.          # Loop through the tables in this group.
153          for my $table (@$tableList) {          for my $table (@tableList) {
154                Trace("Processing table $table for assembly.") if T(2);
155                # Get the section file names.
156                my @sectionFiles =
157                    map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;
158              # Get the data file name.              # Get the data file name.
159              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
160              # Do we have it?              # Do we have it?
161              if (-f $dataFile) {              if (-f $dataFile) {
162                  # Yes. This is good news.                  # Yes. This is good news.
163                  $stats->Add('tables-found' => 1);                  $stats->Add('tables-found' => 1);
164                    Trace("Table file found for $table.") if T(3);
165              } else {              } else {
166                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
                 my @sectionFiles =  
                     map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;  
167                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
168                  # Tell the user about all the missing files.                  # Tell the user about all the missing files.
169                  for my $missingFile (@missingFiles) {                  for my $missingFile (@missingFiles) {
# Line 163  Line 173 
173                  # Did we find everything?                  # Did we find everything?
174                  if (scalar @missingFiles) {                  if (scalar @missingFiles) {
175                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
176                      $missingTable = 1;                      $missingTable++;
177                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
178                  } else {                  } else {
179                      # Yes! Try to assemble the sections into a data file.                      # Yes! Try to assemble the sections into a data file.
180                      my $sortStart = time();                      my $sortStart = time();
181                      my $sortCommand = $erdb->SortNeeded($table);                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
182                      my $oh = Open(undef, "| $sortCommand >$dataFile");                      Trace("Sort command: $sortCommand") if T(3);
183                        my $oh = Open(undef, "| $sortCommand");
184                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
185                          Trace("Collating $sectionFile.") if T(4);                          Trace("Collating $sectionFile.") if T(3);
186                          $stats->Add('sections-loaded' => 1);                          $stats->Add("$table-sections" => 1);
187                          for my $line (Tracer::GetFile($sectionFile)) {                          for my $line (Tracer::GetFile($sectionFile)) {
188                              print $oh "$line\n";                              print $oh "$line\n";
189                              $stats->Add('lines-collated' => 1);                              $stats->Add("$table-collations" => 1);
190                          }                          }
191                      }                      }
192                      # Finish the sort step.                      # Finish the sort step.
193                        Trace("Finishing collate for $table.") if T(3);
194                      close $oh;                      close $oh;
195                      $stats->Add('tables-collated' => 1);                      $stats->Add('tables-collated' => 1);
196                      # Now that we've collated the section files, we can delete them                      $stats->Add('collate-time' => time() - $sortStart);
197                      # to make room in the data directory.                  }
198                }
199                # Now that we know we have a full data file, we can delete the
200                # section files to make room in the data directory. The user can
201                # turn this behavior off with the keepSections option.
202                if (! $options->{keepSections}) {
203                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
204                        if (-e $sectionFile) {
205                          unlink $sectionFile;                          unlink $sectionFile;
206                          $stats->Add('files-deleted' => 1);                          $stats->Add('files-deleted' => 1);
207                      }                      }
                     $stats->Add('collate-time' => time() - $sortStart);  
208                  }                  }
209                    Trace("Section files for $table deleted.") if T(3);
210              }              }
211          }          }
212          # Were any tables missing?          # Were any tables missing?
213          if ($missingTable) {          if ($missingTable) {
214              # Yes, skip this group.              # Yes, skip this group.
215              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
216                Trace("Skipping $group group: $missingTable missing tables.") if T(3);
217          } else {          } else {
218              # No! Load this group into the database.              # No! Process this group's files.
219                if ($sanityCheck eq "") {
220                    Trace("Loading group $group into database.") if T(2);
221                } else {
222                    Trace("Sanity check for group $group.") if T(2);
223                }
224              my $loadStart = time();              my $loadStart = time();
225              for my $table (@$tableList) {              for my $table (@tableList) {
226                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
227                    # Do we want a real load or a sanity check?
228                    if ($sanityCheck eq "") {
229                        # Real load.
230                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);
231                  $stats->Accumulate($newStats);                  $stats->Accumulate($newStats);
232                        Trace("$fileName loaded into $table.") if T(3);
233                    } elsif ($sanityCheck > 0) {
234                        # Here we want a sanity check. Note that if the check value is 0,
235                        # we don't bother. The user just wants to suppress the load step.
236                        CheckLoadFile($erdb, $table, $fileName, $sanityCheck);
237                    }
238              }              }
239              $stats->Add("groups-loaded" => 1);              $stats->Add("groups-loaded" => 1);
240              $stats->Add('load-time' => 1);              $stats->Add('load-time' => 1);
# Line 225  Line 258 
258      }      }
259  }  }
260    
261    =head3 CheckLoadFile
262    
263        CheckLoadFile($erdb, $table, $fileName, $count);
264    
265    Read the first few records of a load file and trace the contents at level
266    2. This allows the user to visually compare the load file contents with
267    the database definition.
268    
269    =over 4
270    
271    =item erdb
272    
273    [[ErdbPm]] object describing the database.
274    
275    =item table
276    
277    Name of the table to check.
278    
279    =item fileName
280    
281    Name of the load file to check.
282    
283    =item count
284    
285    Number of records to check.
286    
287    =back
288    
289    =cut
290    
291    sub CheckLoadFile {
292        # Get the parameters.
293        my ($erdb, $table, $fileName, $count) = @_;
294        # Open the file for input.
295        my $ih = Open(undef, "<$fileName");
296        # Slurp the first N records.
297        my @records;
298        while (! eof $ih && scalar(@records) < $count) {
299            push @records, [ Tracer::GetLine($ih) ];
300        }
301        my $found = scalar(@records);
302        Trace("$found records for $table found in sanity check.") if T(3);
303        # Do we have any data at all?
304        if ($found) {
305            # Yes. Get the table's descriptor. We use this to determine the field names.
306            my $relationData = $erdb->FindRelation($table);
307            Confess("Relation $table not found in database.") if (! defined $relationData);
308            my @fields = @{$relationData->{Fields}};
309            # Loop through the fields. We generate one message per field.
310            for (my $i = 0; $i <= $#fields; $i++) {
311                # Get this field's information.
312                my $fieldInfo = $fields[$i];
313                my $type = $fieldInfo->{type};
314                # This is going to be a multi-line trace message. We start with the field name and type.
315                my @lines = ("Values for $fieldInfo->{name}, type $type:\n");
316                # Loop through the records. We generate one line of data per record.
317                for (my $j = 0; $j < $found; $j++) {
318                    # Get the field value.
319                    my $field = $records[$j]->[$i];
320                    # Compute the record label.
321                    my $line = "Record $j";
322                    # Check for unusual cases.
323                    if (! defined $field) {
324                        $line .= "= <null>";
325                    } elsif ($field eq '') {
326                        $line .= "= <empty>";
327                    } else {
328                        # Make sure we don't trace something ungodly.
329                        my $excess = (length $field) - 40;
330                        if ($excess > 0) {
331                            $field = substr($field, 0, 40) . " >> + $excess characters";
332                        }
333                        $line .= ": $field";
334                    }
335                    # Save this line. We indent a little for readability.
336                    push @lines, "   $line";
337                }
338                # Trace this field.
339                Trace(join("\n", @lines)) if T(2);
340            }
341        }
342    }
343    
344    
345  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3