[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10, Mon May 4 18:49:49 2009 UTC revision 1.11, Thu May 28 18:06:58 2009 UTC
# Line 22  Line 22 
22  use ERDB;  use ERDB;
23  use ERDBLoadGroup;  use ERDBLoadGroup;
24  use ERDBGenerate;  use ERDBGenerate;
25    use ERDBExtras;
26  use Stats;  use Stats;
27  use Time::HiRes;  use Time::HiRes;
28    
# Line 97  Line 98 
98  If specified, section files (the fragments of data load files created by  If specified, section files (the fragments of data load files created by
99  [[ERDBGeneratorPl]], will not be deleted after they are collated.  [[ERDBGeneratorPl]], will not be deleted after they are collated.
100    
 =item sanityCheck  
   
 If specified, no tables will be loaded. Instead, the first I<N> records from the  
 assembled load files will be displayed so that the file contents can be  
 visually matched against the column names.  
   
101  =item warn  =item warn
102    
103  Create an event in the RSS feed when an error occurs.  Create an event in the RSS feed when an error occurs.
# Line 139  Line 134 
134                                             {                                             {
135                                                dbName => ["", "if specified, the SQL name of the target database"],                                                dbName => ["", "if specified, the SQL name of the target database"],
136                                                clear => ["", "overwrite existing load files if sections are present"],                                                clear => ["", "overwrite existing load files if sections are present"],
                                               sanityCheck => ["", "don't load, trace contents of first N load file records instead"],  
137                                                trace => ["2", "tracing level"],                                                trace => ["2", "tracing level"],
138                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
139                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
# Line 165  Line 159 
159      my @sectionList = $erdb->SectionList($source);      my @sectionList = $erdb->SectionList($source);
160      # Create a statistics object to track our progress.      # Create a statistics object to track our progress.
161      my $stats = Stats->new();      my $stats = Stats->new();
162      # Find out if we're doing a sanity check.      # We make one pass to assemble all the tables in all the groups, and
163      my $sanityCheck = $options->{sanityCheck} || "";      # then another to do the actual loads. The groups that are ready to load
164        # in the second pass will go in this list.
165        my @goodGroups;
166      # Start a timer.      # Start a timer.
167      my $totalStart = time();      my $totalStart = time();
168      # Loop through the groups.      # Loop through the groups.
# Line 197  Line 193 
193                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
194                      $missingTable++;                      $missingTable++;
195                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
196                      # If the user wants a sanity check, we want to give him some                      # Tell the user about all the missing files.
                     # data anyway.  
                     if ($sanityCheck) {  
                         # Get some data lines in the sections. Note we stop when we've exceeded  
                         # the number of lines expected by the sanity check.  
                         my @lines;  
                         for my $sectionFile (@sectionFiles) {  
                             if (-s $sectionFile && scalar(@lines) < $sanityCheck) {  
                                 Trace("Reading from $sectionFile for $table.") if T(3);  
                                 push @lines, Tracer::GetFile($sectionFile);  
                             }  
                         }  
                         # Create a new temporary file.  
                         my $tmpFile = "$FIG_Config::temp/$table$$.temp.dtx";  
                         my $oh = Open(undef, ">$tmpFile");  
                         # Put all the data into it.  
                         Trace(scalar(@lines) . " data lines found.") if T(3);  
                         print $oh join("\n", @lines);  
                         close $oh;  
                         # Sanity check the temp file.  
                         CheckLoadFile($erdb, $table, $tmpFile, $sanityCheck);  
                         # Clean it up.  
                         unlink $tmpFile;  
                     } else {  
                         # Otherwise tell the user about all the missing files.  
197                          for my $missingFile (@missingFiles) {                          for my $missingFile (@missingFiles) {
198                              $stats->Add('sections-missing' => 1);                              $stats->Add('sections-missing' => 1);
199                              $stats->AddMessage("Data file $missingFile not found for table $table.");                              $stats->AddMessage("Data file $missingFile not found for table $table.");
200                          }                          }
                     }  
201                  } else {                  } else {
202                      # We have all the sections. Try to assemble them into a data file.                      # We have all the sections. Try to assemble them into a data file.
203                      my $sortStart = time();                      my $sortStart = time();
# Line 273  Line 244 
244              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
245              Trace("Skipping $group group: $missingTable missing tables.") if T(2);              Trace("Skipping $group group: $missingTable missing tables.") if T(2);
246          } else {          } else {
247              # No! Process this group's files.              # No! File this group for processing in the second pass.
248              if ($sanityCheck eq "") {              push @goodGroups, $group;
249                  Trace("Loading group $group into database.") if T(2);          }
250        }
251        # Now we loop through the good groups, doing the actual loads.
252        for my $group (@goodGroups) {
253            # Get a group object.
254            my $groupData = $erdb->Loader($group);
255            # Do the post-processing.
256            my $postStats = $groupData->PostProcess();
257            # Determine what happened.
258            if (! defined $postStats) {
259                Trace("Post-processing not required for $group.") if T(3);
260              } else {              } else {
261                  Trace("Sanity check for group $group.") if T(2);              $stats->Accumulate($postStats);
262                $stats->Add('post-processing' => 1);
263              }              }
264            # Process this group's files.
265            Trace("Loading group $group into database.") if T(2);
266            # Get the list of tables.
267            my @tableList = $groupData->GetTables();
268            # Start a timer.
269              my $loadStart = time();              my $loadStart = time();
270              for my $table (@tableList) {              for my $table (@tableList) {
271                # Compute the load file name.
272                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);                  my $fileName = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
273                  # Do we want a real load or a sanity check?              # Do the actual load.
                 if ($sanityCheck eq "") {  
                     # Real load.  
274                      my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);                      my $newStats = $erdb->LoadTable($fileName, $table, truncate => 1, failOnError => 1);
275                      $stats->Accumulate($newStats);                      $stats->Accumulate($newStats);
276                      Trace("$fileName loaded into $table.") if T(3);                      Trace("$fileName loaded into $table.") if T(3);
                 } elsif ($sanityCheck > 0) {  
                     # Here we want a sanity check. Note that if the check value is 0,  
                     # we don't bother. The user just wants to suppress the load step.  
                     CheckLoadFile($erdb, $table, $fileName, $sanityCheck);  
                 }  
277              }              }
278              $stats->Add("groups-loaded" => 1);              $stats->Add("groups-loaded" => 1);
279              $stats->Add('load-time' => 1);              $stats->Add('load-time' => 1);
280          }          }
     }  
281      $stats->Add('total-time' => time() - $totalStart);      $stats->Add('total-time' => time() - $totalStart);
282      # Display the statistics from this run.      # Display the statistics from this run.
283      Trace("Statistics for load:\n" . $stats->Show()) if T(2);      Trace("Statistics for load:\n" . $stats->Show()) if T(2);
# Line 316  Line 296 
296      }      }
297  }  }
298    
 =head2 Internal Methods  
   
 =head3 CheckLoadFile  
   
     CheckLoadFile($erdb, $table, $fileName, $count);  
   
 Read the first few records of a load file and trace the contents at level  
 2. This allows the user to visually compare the load file contents with  
 the database definition.  
   
 =over 4  
   
 =item erdb  
   
 [[ErdbPm]] object describing the database.  
   
 =item table  
   
 Name of the table to check.  
   
 =item fileName  
   
 Name of the load file to check.  
   
 =item count  
   
 Number of records to check.  
   
 =back  
   
 =cut  
   
 sub CheckLoadFile {  
     # Get the parameters.  
     my ($erdb, $table, $fileName, $count) = @_;  
     # Open the file for input.  
     my $ih = Open(undef, "<$fileName");  
     # Slurp the first N records.  
     my @records;  
     while (! eof $ih && scalar(@records) < $count) {  
         push @records, [ Tracer::GetLine($ih) ];  
     }  
     my $found = scalar(@records);  
     Trace("$found records for $table found in sanity check using $fileName.") if T(2);  
     # Do we have any data at all?  
     if ($found) {  
         # Yes. Get the table's descriptor. We use this to determine the field names.  
         my $relationData = $erdb->FindRelation($table);  
         Confess("Relation $table not found in database.") if (! defined $relationData);  
         my @fields = @{$relationData->{Fields}};  
         # If this is a relationship, we need the FROM and TO data.  
         my %ends; ($ends{from}, $ends{to}) = $erdb->GetRelationshipEntities($table);  
         # Loop through the fields. We generate one message per field.  
         for (my $i = 0; $i <= $#fields; $i++) {  
             # Get this field's information.  
             my $fieldInfo = $fields[$i];  
             my $type = $fieldInfo->{type};  
             my $name = $fieldInfo->{name};  
             if ($name =~ /^(from|to)-link$/) {  
                 # Here it's a relationship link, so add the name of the target table to  
                 # the type.  
                 $type .= " ($ends{$1})";  
             }  
             # This is going to be a multi-line trace message. We start with the field name and type.  
             my @lines = ("Values for $table($name), type $type:\n");  
             # Loop through the records. We generate one line of data per record.  
             for (my $j = 0; $j < $found; $j++) {  
                 # Get the field value.  
                 my $field = $records[$j]->[$i];  
                 # Compute the record label.  
                 my $line = "Record $j";  
                 # Check for unusual cases.  
                 if (! defined $field || $field eq '') {  
                     $line .= "= <empty>";  
                 } else {  
                     # Make sure we don't trace something ungodly.  
                     my $excess = (length $field) - 40;  
                     if ($excess > 0) {  
                         $field = substr($field, 0, 40) . " >> + $excess characters";  
                     }  
                     $line .= ": $field";  
                 }  
                 # Save this line. We indent a little for readability.  
                 push @lines, "   $line";  
             }  
             # Trace this field.  
             Trace(join("\n", @lines)) if T(2);  
         }  
     }  
 }  
   
299    
300  1;  1;

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3