[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Oct 1 03:07:08 2008 UTC revision 1.5, Wed Oct 15 11:41:12 2008 UTC
# Line 57  Line 57 
57  Name of the ERDB database. This should be the class name for the subclass used  Name of the ERDB database. This should be the class name for the subclass used
58  to access the database.  to access the database.
59    
   
60  =back  =back
61    
62  =head2 Command-Line Options  =head2 Command-Line Options
# Line 69  Line 68 
68  Specifies the tracing level. The higher the tracing level, the more messages  Specifies the tracing level. The higher the tracing level, the more messages
69  will appear in the trace log. Use E to specify emergency tracing.  will appear in the trace log. Use E to specify emergency tracing.
70    
 =item resume  
   
 If specified, then the group list must contain a single group. The specified  
 group and all groups after it in the group list will be processed.  
   
71  =item user  =item user
72    
73  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 93  Line 87 
87    
88  Display this command's parameters and options.  Display this command's parameters and options.
89    
90    =item keepSections
91    
92    If specified, section files (the fragments of data load files created by
93    [[ERDBGeneratorPl]], will not be deleted after they are collated.
94    
95    =item sanityCheck
96    
97    If specified, no tables will be loaded. Instead, the first I<N> records from the
98    assembled load files will be displayed so that the file contents can be
99    visually matched against the column names.
100    
101  =item warn  =item warn
102    
103  Create an event in the RSS feed when an error occurs.  Create an event in the RSS feed when an error occurs.
# Line 108  Line 113 
113  # Get the command-line options and parameters.  # Get the command-line options and parameters.
114  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
115                                             {                                             {
116                                                  sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
117                                                trace => ["", "tracing level"],                                                trace => ["", "tracing level"],
118                                                resume => ["", "if specified, the specified group and all groups that normally come after it will be processed"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
119                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"]
120                                             },                                             },
121                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
# Line 123  Line 129 
129      # Connect to the database.      # Connect to the database.
130      my $erdb = ERDB::GetDatabase($database);      my $erdb = ERDB::GetDatabase($database);
131      # Fix the group list.      # Fix the group list.
132      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, $options, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
133      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
134      my $source = $erdb->GetSourceObject();      my $source = $erdb->GetSourceObject();
135      my $directory = $erdb->LoadDirectory();      my $directory = $erdb->LoadDirectory();
# Line 131  Line 137 
137      my @sectionList = $erdb->SectionList($source);      my @sectionList = $erdb->SectionList($source);
138      # Create a statistics object to track our progress.      # Create a statistics object to track our progress.
139      my $stats = Stats->new();      my $stats = Stats->new();
140      # Get the hash of group names to table names.      # Find out if we're doing a sanity check.
141      my $groupHash = ERDBLoadGroup::GetGroupHash($erdb);      my $sanityCheck = $options->{sanityCheck} || "";
142      # Start a timer.      # Start a timer.
143      my $totalStart = time();      my $totalStart = time();
144      # Loop through the groups.      # Loop through the groups.
145      for my $group (@realGroups) {      for my $group (@realGroups) {
146          # Get the list of tables for this group.          # Get the list of tables for this group.
147          my $tableList = $groupHash->{$group};          my @tableList = ERDBLoadGroup::GetTables($erdb, $group);
148          # We need to insure there is a data file for every table. If we fail to find one,          # We need to insure there is a data file for every table. If we fail to find one,
149          # we set the following error flag, which prevents us from loading the database.          # we set the following error flag, which prevents us from loading the database.
150          my $missingTable = 0;          my $missingTable = 0;
151          # Loop through the tables in this group.          # Loop through the tables in this group.
152          for my $table (@$tableList) {          for my $table (@tableList) {
153                Trace("Processing table $table for assembly.") if T(2);
154                # Get the section file names.
155                my @sectionFiles =
156                    map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;
157              # Get the data file name.              # Get the data file name.
158              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
159              # Do we have it?              # Do we have it?
160              if (-f $dataFile) {              if (-f $dataFile) {
161                  # Yes. This is good news.                  # Yes. This is good news.
162                  $stats->Add('tables-found' => 1);                  $stats->Add('tables-found' => 1);
163                    Trace("Table file found for $table.") if T(3);
164              } else {              } else {
165                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
                 my @sectionFiles =  
                     map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;  
166                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
167                  # Tell the user about all the missing files.                  # Tell the user about all the missing files.
168                  for my $missingFile (@missingFiles) {                  for my $missingFile (@missingFiles) {
# Line 163  Line 172 
172                  # Did we find everything?                  # Did we find everything?
173                  if (scalar @missingFiles) {                  if (scalar @missingFiles) {
174                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
175                      $missingTable = 1;                      $missingTable++;
176                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
177                  } else {                  } else {
178                      # Yes! Try to assemble the sections into a data file.                      # Yes! Try to assemble the sections into a data file.
179                      my $sortStart = time();                      my $sortStart = time();
180                      my $sortCommand = $erdb->SortNeeded($table);                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
181                      my $oh = Open(undef, "| $sortCommand >$dataFile");                      Trace("Sort command: $sortCommand") if T(3);
182                        my $oh = Open(undef, "| $sortCommand");
183                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
184                          Trace("Collating $sectionFile.") if T(4);                          Trace("Collating $sectionFile.") if T(3);
185                          $stats->Add('sections-loaded' => 1);                          $stats->Add("$table-sections" => 1);
186                          for my $line (Tracer::GetFile($sectionFile)) {                          for my $line (Tracer::GetFile($sectionFile)) {
187                              print $oh "$line\n";                              print $oh "$line\n";
188                              $stats->Add('lines-collated' => 1);                              $stats->Add("$table-collations" => 1);
189                          }                          }
190                      }                      }
191                      # Finish the sort step.                      # Finish the sort step.
192                        Trace("Finishing collate for $table.") if T(3);
193                      close $oh;                      close $oh;
194                      $stats->Add('tables-collated' => 1);                      $stats->Add('tables-collated' => 1);
195                      # Now that we've collated the section files, we can delete them                      $stats->Add('collate-time' => time() - $sortStart);
196                      # to make room in the data directory.                  }
197                }
198                # Now that we know we have a full data file, we can delete the
199                # section files to make room in the data directory. The user can
200                # turn this behavior off with the keepSections option.
201                if (! $options->{keepSections}) {
202                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
203                        if (-e $sectionFile) {
204                          unlink $sectionFile;                          unlink $sectionFile;
205                          $stats->Add('files-deleted' => 1);                          $stats->Add('files-deleted' => 1);
206                      }                      }
                     $stats->Add('collate-time' => time() - $sortStart);  
207                  }                  }
208                    Trace("Section files for $table deleted.") if T(3);
209              }              }
210          }          }
211          # Were any tables missing?          # Were any tables missing?
212          if ($missingTable) {          if ($missingTable) {
213              # Yes, skip this group.              # Yes, skip this group.
214              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
215                Trace("Skipping $group group: $missingTable missing tables.") if T(3);
216          } else {          } else {
217              # No! Load this group into the database.              # No! Process this group's files.
218                if ($sanityCheck eq "") {
219                    Trace("Loading group $group into database.") if T(2);
220                } else {
221                    Trace("Sanity check for group $group.") if T(2);
222                }
223              my $loadStart = time();              my $loadStart = time();
224              for my $table (@$tableList) {              for my $table (@tableList) {
225                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
226                    # Do we want a real load or a sanity check?
227                    if ($sanityCheck eq "") {
228                        # Real load.
229                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);
230                  $stats->Accumulate($newStats);                  $stats->Accumulate($newStats);
231                        Trace("$fileName loaded into $table.") if T(3);
232                    } elsif ($sanityCheck > 0) {
233                        # Here we want a sanity check. Note that if the check value is 0,
234                        # we don't bother. The user just wants to suppress the load step.
235                        CheckLoadFile($erdb, $table, $fileName, $sanityCheck);
236                    }
237              }              }
238              $stats->Add("groups-loaded" => 1);              $stats->Add("groups-loaded" => 1);
239              $stats->Add('load-time' => 1);              $stats->Add('load-time' => 1);
# Line 225  Line 257 
257      }      }
258  }  }
259    
260    =head3 CheckLoadFile
261    
262        CheckLoadFile($erdb, $table, $fileName, $count);
263    
264    Read the first few records of a load file and trace the contents at level
265    2. This allows the user to visually compare the load file contents with
266    the database definition.
267    
268    =over 4
269    
270    =item erdb
271    
272    [[ErdbPm]] object describing the database.
273    
274    =item table
275    
276    Name of the table to check.
277    
278    =item fileName
279    
280    Name of the load file to check.
281    
282    =item count
283    
284    Number of records to check.
285    
286    =back
287    
288    =cut
289    
290    sub CheckLoadFile {
291        # Get the parameters.
292        my ($erdb, $table, $fileName, $count) = @_;
293        # Open the file for input.
294        my $ih = Open(undef, "<$fileName");
295        # Slurp the first N records.
296        my @records;
297        while (! eof $ih && scalar(@records) < $count) {
298            push @records, [ Tracer::GetLine($ih) ];
299        }
300        my $found = scalar(@records);
301        Trace("$found records for $table found in sanity check.") if T(3);
302        # Do we have any data at all?
303        if ($found) {
304            # Yes. Get the table's descriptor. We use this to determine the field names.
305            my $relationData = $erdb->FindRelation($table);
306            Confess("Relation $table not found in database.") if (! defined $relationData);
307            my @fields = @{$relationData->{Fields}};
308            # Loop through the fields. We generate one message per field.
309            for (my $i = 0; $i <= $#fields; $i++) {
310                # Get this field's information.
311                my $fieldInfo = $fields[$i];
312                my $type = $fieldInfo->{type};
313                # This is going to be a multi-line trace message. We start with the field name and type.
314                my @lines = ("Values for $fieldInfo->{name}, type $type:\n");
315                # Loop through the records. We generate one line of data per record.
316                for (my $j = 0; $j < $found; $j++) {
317                    # Get the field value.
318                    my $field = $records[$j]->[$i];
319                    # Compute the record label.
320                    my $line = "Record $j";
321                    # Check for unusual cases.
322                    if (! defined $field) {
323                        $line .= "= <null>";
324                    } elsif ($field eq '') {
325                        $line .= "= <empty>";
326                    } else {
327                        # Make sure we don't trace something ungodly.
328                        my $excess = (length $field) - 40;
329                        if ($excess > 0) {
330                            $field = substr($field, 0, 40) . " >> + $excess characters";
331                        }
332                        $line .= ": $field";
333                    }
334                    # Save this line. We indent a little for readability.
335                    push @lines, "   $line";
336                }
337                # Trace this field.
338                Trace(join("\n", @lines)) if T(2);
339            }
340        }
341    }
342    
343    
344  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3