[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Thu Oct 2 16:32:42 2008 UTC revision 1.4, Thu Oct 9 17:23:54 2008 UTC
# Line 93  Line 93 
93  If specified, section files (the fragments of data load files created by  If specified, section files (the fragments of data load files created by
94  [[ERDBGeneratorPl]], will not be deleted after they are collated.  [[ERDBGeneratorPl]], will not be deleted after they are collated.
95    
96    =item sanityCheck
97    
98    If specified, no tables will be loaded. Instead, the first I<N> records from the
99    assembled load files will be displayed so that the file contents can be
100    visually matched against the column names.
101    
102  =item warn  =item warn
103    
104  Create an event in the RSS feed when an error occurs.  Create an event in the RSS feed when an error occurs.
# Line 108  Line 114 
114  # Get the command-line options and parameters.  # Get the command-line options and parameters.
115  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
116                                             {                                             {
117                                                  sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
118                                                trace => ["", "tracing level"],                                                trace => ["", "tracing level"],
119                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
120                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"]
# Line 124  Line 131 
131      my $erdb = ERDB::GetDatabase($database);      my $erdb = ERDB::GetDatabase($database);
132      # Fix the group list.      # Fix the group list.
133      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
     Trace("Real groups are: " . join(" ", @realGroups)) if T(3);  
134      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
135      my $source = $erdb->GetSourceObject();      my $source = $erdb->GetSourceObject();
136      my $directory = $erdb->LoadDirectory();      my $directory = $erdb->LoadDirectory();
# Line 132  Line 138 
138      my @sectionList = $erdb->SectionList($source);      my @sectionList = $erdb->SectionList($source);
139      # Create a statistics object to track our progress.      # Create a statistics object to track our progress.
140      my $stats = Stats->new();      my $stats = Stats->new();
141        # Find out if we're doing a sanity check.
142        my $sanityCheck = $options->{sanityCheck} || "";
143      # Start a timer.      # Start a timer.
144      my $totalStart = time();      my $totalStart = time();
145      # Loop through the groups.      # Loop through the groups.
# Line 143  Line 151 
151          my $missingTable = 0;          my $missingTable = 0;
152          # Loop through the tables in this group.          # Loop through the tables in this group.
153          for my $table (@tableList) {          for my $table (@tableList) {
154              Trace("Processing table $table.") if T(2);              Trace("Processing table $table for assembly.") if T(2);
155                # Get the section file names.
156                my @sectionFiles =
157                    map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;
158              # Get the data file name.              # Get the data file name.
159              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
160              # Do we have it?              # Do we have it?
161              if (-f $dataFile) {              if (-f $dataFile) {
162                  # Yes. This is good news.                  # Yes. This is good news.
163                  $stats->Add('tables-found' => 1);                  $stats->Add('tables-found' => 1);
164                    Trace("Table file found for $table.") if T(3);
165              } else {              } else {
166                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
                 my @sectionFiles =  
                     map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;  
167                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
168                  # Tell the user about all the missing files.                  # Tell the user about all the missing files.
169                  for my $missingFile (@missingFiles) {                  for my $missingFile (@missingFiles) {
# Line 168  Line 178 
178                  } else {                  } else {
179                      # Yes! Try to assemble the sections into a data file.                      # Yes! Try to assemble the sections into a data file.
180                      my $sortStart = time();                      my $sortStart = time();
181                      my $sortCommand = $erdb->SortNeeded($table);                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
182                      my $oh = Open(undef, "| $sortCommand >$dataFile");                      Trace("Sort command: $sortCommand") if T(3);
183                        my $oh = Open(undef, "| $sortCommand");
184                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
185                          Trace("Collating $sectionFile.") if T(4);                          Trace("Collating $sectionFile.") if T(3);
186                          $stats->Add('sections-loaded' => 1);                          $stats->Add("$table-sections" => 1);
187                          for my $line (Tracer::GetFile($sectionFile)) {                          for my $line (Tracer::GetFile($sectionFile)) {
188                              print $oh "$line\n";                              print $oh "$line\n";
189                              $stats->Add('lines-collated' => 1);                              $stats->Add("$table-collations" => 1);
190                          }                          }
191                      }                      }
192                      # Finish the sort step.                      # Finish the sort step.
193                        Trace("Finishing collate for $table.") if T(3);
194                      close $oh;                      close $oh;
195                      $stats->Add('tables-collated' => 1);                      $stats->Add('tables-collated' => 1);
196                      # Now that we've collated the section files, we can delete them                      $stats->Add('collate-time' => time() - $sortStart);
197                      # to make room in the data directory. The user can turn this                  }
198                      # behavior off with the keepSections option.              }
199                # Now that we know we have a full data file, we can delete the
200                # section files to make room in the data directory. The user can
201                # turn this behavior off with the keepSections option.
202                      if (! $options->{keepSections}) {                      if (! $options->{keepSections}) {
203                          for my $sectionFile (@sectionFiles) {                          for my $sectionFile (@sectionFiles) {
204                        if (-e $sectionFile) {
205                              unlink $sectionFile;                              unlink $sectionFile;
206                              $stats->Add('files-deleted' => 1);                              $stats->Add('files-deleted' => 1);
207                          }                          }
                         Trace("Section files for $table deleted.") if T(3);  
                     }  
                     $stats->Add('collate-time' => time() - $sortStart);  
208                  }                  }
209                    Trace("Section files for $table deleted.") if T(3);
210              }              }
211          }          }
212          # Were any tables missing?          # Were any tables missing?
# Line 201  Line 215 
215              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
216              Trace("Skipping $group group: $missingTable missing tables.") if T(3);              Trace("Skipping $group group: $missingTable missing tables.") if T(3);
217          } else {          } else {
218              # No! Load this group into the database.              # No! Process this group's files.
219              Trace("Loading $group group into database.") if T(2);              if ($sanityCheck eq "") {
220                    Trace("Loading group $group into database.") if T(2);
221                } else {
222                    Trace("Sanity check for group $group.") if T(2);
223                }
224              my $loadStart = time();              my $loadStart = time();
225              for my $table (@tableList) {              for my $table (@tableList) {
226                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
227                    # Do we want a real load or a sanity check?
228                    if ($sanityCheck eq "") {
229                        # Real load.
230                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);
231                  $stats->Accumulate($newStats);                  $stats->Accumulate($newStats);
232                  Trace("$fileName loaded into $table.") if T(3);                  Trace("$fileName loaded into $table.") if T(3);
233                    } elsif ($sanityCheck > 0) {
234                        # Here we want a sanity check. Note that if the check value is 0,
235                        # we don't bother. The user just wants to suppress the load step.
236                        CheckLoadFile($erdb, $table, $fileName, $sanityCheck);
237                    }
238              }              }
239              $stats->Add("groups-loaded" => 1);              $stats->Add("groups-loaded" => 1);
240              $stats->Add('load-time' => 1);              $stats->Add('load-time' => 1);
# Line 232  Line 258 
258      }      }
259  }  }
260    
261    =head3 CheckLoadFile
262    
263        CheckLoadFile($erdb, $table, $fileName, $count);
264    
265    Read the first few records of a load file and trace the contents at level
266    2. This allows the user to visually compare the load file contents with
267    the database definition.
268    
269    =over 4
270    
271    =item erdb
272    
273    [[ErdbPm]] object describing the database.
274    
275    =item table
276    
277    Name of the table to check.
278    
279    =item fileName
280    
281    Name of the load file to check.
282    
283    =item count
284    
285    Number of records to check.
286    
287    =back
288    
289    =cut
290    
291    sub CheckLoadFile {
292        # Get the parameters.
293        my ($erdb, $table, $fileName, $count) = @_;
294        # Open the file for input.
295        my $ih = Open(undef, "<$fileName");
296        # Slurp the first N records.
297        my @records;
298        while (! eof $ih && scalar(@records) < $count) {
299            push @records, [ Tracer::GetLine($ih) ];
300        }
301        my $found = scalar(@records);
302        Trace("$found records for $table found in sanity check.") if T(3);
303        # Do we have any data at all?
304        if ($found) {
305            # Yes. Get the table's descriptor. We use this to determine the field names.
306            my $relationData = $erdb->FindRelation($table);
307            Confess("Relation $table not found in database.") if (! defined $relationData);
308            my @fields = @{$relationData->{Fields}};
309            # Loop through the fields. We generate one message per field.
310            for (my $i = 0; $i <= $#fields; $i++) {
311                # Get this field's information.
312                my $fieldInfo = $fields[$i];
313                my $type = $fieldInfo->{type};
314                # This is going to be a multi-line trace message. We start with the field name and type.
315                my @lines = ("Values for $fieldInfo->{name}, type $type:\n");
316                # Loop through the records. We generate one line of data per record.
317                for (my $j = 0; $j < $found; $j++) {
318                    # Get the field value.
319                    my $field = $records[$j]->[$i];
320                    # Compute the record label.
321                    my $line = "Record $j";
322                    # Check for unusual cases.
323                    if (! defined $field) {
324                        $line .= "= <null>";
325                    } elsif ($field eq '') {
326                        $line .= "= <empty>";
327                    } else {
328                        # Make sure we don't trace something ungodly.
329                        my $excess = (length $field) - 40;
330                        if ($excess > 0) {
331                            $field = substr($field, 0, 40) . " >> + $excess characters";
332                        }
333                        $line .= ": $field";
334                    }
335                    # Save this line. We indent a little for readability.
336                    push @lines, "   $line";
337                }
338                # Trace this field.
339                Trace(join("\n", @lines)) if T(2);
340            }
341        }
342    }
343    
344    
345  1;  1;

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3