[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Oct 1 03:07:08 2008 UTC revision 1.7, Thu Feb 5 06:50:12 2009 UTC
# Line 57  Line 57 
57  Name of the ERDB database. This should be the class name for the subclass used  Name of the ERDB database. This should be the class name for the subclass used
58  to access the database.  to access the database.
59    
   
60  =back  =back
61    
62  =head2 Command-Line Options  =head2 Command-Line Options
# Line 69  Line 68 
68  Specifies the tracing level. The higher the tracing level, the more messages  Specifies the tracing level. The higher the tracing level, the more messages
69  will appear in the trace log. Use E to specify emergency tracing.  will appear in the trace log. Use E to specify emergency tracing.
70    
 =item resume  
   
 If specified, then the group list must contain a single group. The specified  
 group and all groups after it in the group list will be processed.  
   
71  =item user  =item user
72    
73  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 82  Line 76 
76    
77  If specified, turns on tracing of SQL activity.  If specified, turns on tracing of SQL activity.
78    
79    =item clear
80    
81    If specified, existing load files will be recreated from sections if the sections
82    are present.
83    
84  =item background  =item background
85    
86  Save the standard and error output to files. The files will be created  Save the standard and error output to files. The files will be created
# Line 93  Line 92 
92    
93  Display this command's parameters and options.  Display this command's parameters and options.
94    
95    =item keepSections
96    
97    If specified, section files (the fragments of data load files created by
98    [[ERDBGeneratorPl]], will not be deleted after they are collated.
99    
100    =item sanityCheck
101    
102    If specified, no tables will be loaded. Instead, the first I<N> records from the
103    assembled load files will be displayed so that the file contents can be
104    visually matched against the column names.
105    
106  =item warn  =item warn
107    
108  Create an event in the RSS feed when an error occurs.  Create an event in the RSS feed when an error occurs.
# Line 101  Line 111 
111    
112  Phone number to message when the script is complete.  Phone number to message when the script is complete.
113    
114    =item DBD
115    
116    Name of the DBD file. If specified, the DBD must be in the main FIG directory
117    (specified in C<$FIG_Config::fig>). This option allows the use of an alternate
118    DBD during load, so that access to the database by other processes is not
119    compromised.
120    
121  =back  =back
122    
123  =cut  =cut
# Line 108  Line 125 
125  # Get the command-line options and parameters.  # Get the command-line options and parameters.
126  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
127                                             {                                             {
128                                                trace => ["", "tracing level"],                                                clear => ["", "overwrite existing load files if sections are present"],
129                                                resume => ["", "if specified, the specified group and all groups that normally come after it will be processed"],                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
130                                                phone => ["", "phone number (international format) to call when load finishes"]                                                trace => ["2", "tracing level"],
131                                                  keepSections => ["", "if specified, section files will not be deleted after being collated"],
132                                                  phone => ["", "phone number (international format) to call when load finishes"],
133                                                  DBD => ["", "if specified, the name of a DBD file in the FIG directory"],
134                                             },                                             },
135                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
136                                             @ARGV);                                             @ARGV);
# Line 120  Line 140 
140  eval {  eval {
141      # Get the parameters.      # Get the parameters.
142      my ($database, @groups) = @parameters;      my ($database, @groups) = @parameters;
143      # Connect to the database.      # Check for an alternate DBD.
144      my $erdb = ERDB::GetDatabase($database);      my $altDBD = ($options->{DBD} ? "$FIG_Config::fig/$options->{DBD}" : undef);
145        # Connect to the database and get its load directory.
146        my $erdb = ERDB::GetDatabase($database, $altDBD);
147      # Fix the group list.      # Fix the group list.
148      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, $options, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
149      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
150      my $source = $erdb->GetSourceObject();      my $source = $erdb->GetSourceObject();
151      my $directory = $erdb->LoadDirectory();      my $directory = $erdb->LoadDirectory();
# Line 131  Line 153 
153      my @sectionList = $erdb->SectionList($source);      my @sectionList = $erdb->SectionList($source);
154      # Create a statistics object to track our progress.      # Create a statistics object to track our progress.
155      my $stats = Stats->new();      my $stats = Stats->new();
156      # Get the hash of group names to table names.      # Find out if we're doing a sanity check.
157      my $groupHash = ERDBLoadGroup::GetGroupHash($erdb);      my $sanityCheck = $options->{sanityCheck} || "";
158      # Start a timer.      # Start a timer.
159      my $totalStart = time();      my $totalStart = time();
160      # Loop through the groups.      # Loop through the groups.
161      for my $group (@realGroups) {      for my $group (@realGroups) {
162          # Get the list of tables for this group.          # Get the list of tables for this group.
163          my $tableList = $groupHash->{$group};          my @tableList = ERDBLoadGroup::GetTables($erdb, $group);
164          # We need to insure there is a data file for every table. If we fail to find one,          # We need to insure there is a data file for every table. If we fail to find one,
165          # we set the following error flag, which prevents us from loading the database.          # we set the following error flag, which prevents us from loading the database.
166          my $missingTable = 0;          my $missingTable = 0;
167          # Loop through the tables in this group.          # Loop through the tables in this group.
168          for my $table (@$tableList) {          for my $table (@tableList) {
169                Trace("Processing table $table for assembly.") if T(2);
170                # Get the section file names.
171                my @sectionFiles =
172                    map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;
173              # Get the data file name.              # Get the data file name.
174              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
175              # Do we have it?              # Do we have it?
176              if (-f $dataFile) {              if (-f $dataFile && ! $options->{clear}) {
177                  # Yes. This is good news.                  # Yes. This is good news.
178                  $stats->Add('tables-found' => 1);                  $stats->Add('tables-found' => 1);
179                    Trace("Table file found for $table.") if T(3);
180              } else {              } else {
181                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
                 my @sectionFiles =  
                     map { ERDBGenerate::CreateFileName($table, $_, 'data', $directory) } @sectionList;  
182                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
                 # Tell the user about all the missing files.  
                 for my $missingFile (@missingFiles) {  
                     $stats->Add('sections-missing' => 1);  
                     $stats->AddMessage("Data file $missingFile not found for table $table.");  
                 }  
183                  # Did we find everything?                  # Did we find everything?
184                  if (scalar @missingFiles) {                  if (scalar @missingFiles) {
185                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
186                      $missingTable = 1;                      $missingTable++;
187                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
188                        # If the user wants a sanity check, we want to give him some
189                        # data anyway.
190                        if ($sanityCheck) {
191                            # Get some data lines in the sections. Note we stop when we've exceeded
192                            # the number of lines expected by the sanity check.
193                            my @lines;
194                            for my $sectionFile (@sectionFiles) {
195                                if (-s $sectionFile && scalar(@lines) < $sanityCheck) {
196                                    Trace("Reading from $sectionFile for $table.") if T(3);
197                                    push @lines, Tracer::GetFile($sectionFile);
198                                }
199                            }
200                            # Create a new temporary file.
201                            my $tmpFile = "$FIG_Config::temp/$table$$.temp.dtx";
202                            my $oh = Open(undef, ">$tmpFile");
203                            # Put all the data into it.
204                            Trace(scalar(@lines) . " data lines found.") if T(3);
205                            print $oh join("\n", @lines);
206                            close $oh;
207                            # Sanity check the temp file.
208                            CheckLoadFile($erdb, $table, $tmpFile, $sanityCheck);
209                            # Clean it up.
210                            unlink $tmpFile;
211                        } else {
212                            # Otherwise tell the user about all the missing files.
213                            for my $missingFile (@missingFiles) {
214                                $stats->Add('sections-missing' => 1);
215                                $stats->AddMessage("Data file $missingFile not found for table $table.");
216                            }
217                        }
218                  } else {                  } else {
219                      # Yes! Try to assemble the sections into a data file.                      # We have all the sections. Try to assemble them into a data file.
220                      my $sortStart = time();                      my $sortStart = time();
221                      my $sortCommand = $erdb->SortNeeded($table);                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
222                      my $oh = Open(undef, "| $sortCommand >$dataFile");                      Trace("Sort command: $sortCommand") if T(3);
223                        # Pipe to the sort command. Note that we turn on autoflush
224                        # so there's no buffering.
225                        my $oh = Open(undef, "| $sortCommand");
226                        select $oh; $| = 1; select STDOUT;
227                        # Loop through the sections.
228                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
229                          Trace("Collating $sectionFile.") if T(4);                          Trace("Collating $sectionFile.") if T(3);
230                          $stats->Add('sections-loaded' => 1);                          $stats->Add("$table-sections" => 1);
231                          for my $line (Tracer::GetFile($sectionFile)) {                          # Loop through the section file.
232                              print $oh "$line\n";                          my $ih = Open(undef, "<$sectionFile");
233                              $stats->Add('lines-collated' => 1);                          while (defined (my $line = <$ih>)) {
234                                print $oh $line;
235                                $stats->Add("$table-collations" => 1);
236                          }                          }
237                      }                      }
238                      # Finish the sort step.                      # Finish the sort step.
239                        Trace("Finishing collate for $table.") if T(3);
240                      close $oh;                      close $oh;
241                      $stats->Add('tables-collated' => 1);                      $stats->Add('tables-collated' => 1);
242                      # Now that we've collated the section files, we can delete them                      $stats->Add('collate-time' => time() - $sortStart);
243                      # to make room in the data directory.                  }
244                }
245                # Now that we know we have a full data file, we can delete the
246                # section files to make room in the data directory. The user can
247                # turn this behavior off with the keepSections option.
248                if (! $options->{keepSections}) {
249                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
250                        if (-e $sectionFile) {
251                          unlink $sectionFile;                          unlink $sectionFile;
252                          $stats->Add('files-deleted' => 1);                          $stats->Add('files-deleted' => 1);
253                      }                      }
                     $stats->Add('collate-time' => time() - $sortStart);  
254                  }                  }
255                    Trace("Section files for $table deleted.") if T(3);
256              }              }
257          }          }
258          # Were any tables missing?          # Were any tables missing?
259          if ($missingTable) {          if ($missingTable) {
260              # Yes, skip this group.              # Yes, skip this group.
261              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
262                Trace("Skipping $group group: $missingTable missing tables.") if T(2);
263            } else {
264                # No! Process this group's files.
265                if ($sanityCheck eq "") {
266                    Trace("Loading group $group into database.") if T(2);
267          } else {          } else {
268              # No! Load this group into the database.                  Trace("Sanity check for group $group.") if T(2);
269                }
270              my $loadStart = time();              my $loadStart = time();
271              for my $table (@$tableList) {              for my $table (@tableList) {
272                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
273                    # Do we want a real load or a sanity check?
274                    if ($sanityCheck eq "") {
275                        # Real load.
276                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);                  my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);
277                  $stats->Accumulate($newStats);                  $stats->Accumulate($newStats);
278                        Trace("$fileName loaded into $table.") if T(3);
279                    } elsif ($sanityCheck > 0) {
280                        # Here we want a sanity check. Note that if the check value is 0,
281                        # we don't bother. The user just wants to suppress the load step.
282                        CheckLoadFile($erdb, $table, $fileName, $sanityCheck);
283                    }
284              }              }
285              $stats->Add("groups-loaded" => 1);              $stats->Add("groups-loaded" => 1);
286              $stats->Add('load-time' => 1);              $stats->Add('load-time' => 1);
# Line 225  Line 304 
304      }      }
305  }  }
306    
307    =head3 CheckLoadFile
308    
309        CheckLoadFile($erdb, $table, $fileName, $count);
310    
311    Read the first few records of a load file and trace the contents at level
312    2. This allows the user to visually compare the load file contents with
313    the database definition.
314    
315    =over 4
316    
317    =item erdb
318    
319    [[ErdbPm]] object describing the database.
320    
321    =item table
322    
323    Name of the table to check.
324    
325    =item fileName
326    
327    Name of the load file to check.
328    
329    =item count
330    
331    Number of records to check.
332    
333    =back
334    
335    =cut
336    
337    sub CheckLoadFile {
338        # Get the parameters.
339        my ($erdb, $table, $fileName, $count) = @_;
340        # Open the file for input.
341        my $ih = Open(undef, "<$fileName");
342        # Slurp the first N records.
343        my @records;
344        while (! eof $ih && scalar(@records) < $count) {
345            push @records, [ Tracer::GetLine($ih) ];
346        }
347        my $found = scalar(@records);
348        Trace("$found records for $table found in sanity check using $fileName.") if T(2);
349        # Do we have any data at all?
350        if ($found) {
351            # Yes. Get the table's descriptor. We use this to determine the field names.
352            my $relationData = $erdb->FindRelation($table);
353            Confess("Relation $table not found in database.") if (! defined $relationData);
354            my @fields = @{$relationData->{Fields}};
355            # If this is a relationship, we need the FROM and TO data.
356            my %ends; ($ends{from}, $ends{to}) = $erdb->GetRelationshipEntities($table);
357            # Loop through the fields. We generate one message per field.
358            for (my $i = 0; $i <= $#fields; $i++) {
359                # Get this field's information.
360                my $fieldInfo = $fields[$i];
361                my $type = $fieldInfo->{type};
362                my $name = $fieldInfo->{name};
363                if ($name =~ /^(from|to)-link$/) {
364                    # Here it's a relationship link, so add the name of the target table to
365                    # the type.
366                    $type .= " ($ends{$1})";
367                }
368                # This is going to be a multi-line trace message. We start with the field name and type.
369                my @lines = ("Values for $table($name), type $type:\n");
370                # Loop through the records. We generate one line of data per record.
371                for (my $j = 0; $j < $found; $j++) {
372                    # Get the field value.
373                    my $field = $records[$j]->[$i];
374                    # Compute the record label.
375                    my $line = "Record $j";
376                    # Check for unusual cases.
377                    if (! defined $field || $field eq '') {
378                        $line .= "= <empty>";
379                    } else {
380                        # Make sure we don't trace something ungodly.
381                        my $excess = (length $field) - 40;
382                        if ($excess > 0) {
383                            $field = substr($field, 0, 40) . " >> + $excess characters";
384                        }
385                        $line .= ": $field";
386                    }
387                    # Save this line. We indent a little for readability.
388                    push @lines, "   $line";
389                }
390                # Trace this field.
391                Trace(join("\n", @lines)) if T(2);
392            }
393        }
394    }
395    
396    
397  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3