[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Wed Oct 15 11:41:12 2008 UTC revision 1.7, Thu Feb 5 06:50:12 2009 UTC
# Line 76  Line 76 
76    
77  If specified, turns on tracing of SQL activity.  If specified, turns on tracing of SQL activity.
78    
79    =item clear
80    
81    If specified, existing load files will be recreated from sections if the sections
82    are present.
83    
84  =item background  =item background
85    
86  Save the standard and error output to files. The files will be created  Save the standard and error output to files. The files will be created
# Line 106  Line 111 
111    
112  Phone number to message when the script is complete.  Phone number to message when the script is complete.
113    
114    =item DBD
115    
116    Name of the DBD file. If specified, the DBD must be in the main FIG directory
117    (specified in C<$FIG_Config::fig>). This option allows the use of an alternate
118    DBD during load, so that access to the database by other processes is not
119    compromised.
120    
121  =back  =back
122    
123  =cut  =cut
# Line 113  Line 125 
125  # Get the command-line options and parameters.  # Get the command-line options and parameters.
126  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
127                                             {                                             {
128                                                  clear => ["", "overwrite existing load files if sections are present"],
129                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
130                                                trace => ["", "tracing level"],                                                trace => ["2", "tracing level"],
131                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
132                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"],
133                                                  DBD => ["", "if specified, the name of a DBD file in the FIG directory"],
134                                             },                                             },
135                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
136                                             @ARGV);                                             @ARGV);
# Line 126  Line 140 
140  eval {  eval {
141      # Get the parameters.      # Get the parameters.
142      my ($database, @groups) = @parameters;      my ($database, @groups) = @parameters;
143      # Connect to the database.      # Check for an alternate DBD.
144      my $erdb = ERDB::GetDatabase($database);      my $altDBD = ($options->{DBD} ? "$FIG_Config::fig/$options->{DBD}" : undef);
145        # Connect to the database and get its load directory.
146        my $erdb = ERDB::GetDatabase($database, $altDBD);
147      # Fix the group list.      # Fix the group list.
148      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
149      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
# Line 157  Line 173 
173              # Get the data file name.              # Get the data file name.
174              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
175              # Do we have it?              # Do we have it?
176              if (-f $dataFile) {              if (-f $dataFile && ! $options->{clear}) {
177                  # Yes. This is good news.                  # Yes. This is good news.
178                  $stats->Add('tables-found' => 1);                  $stats->Add('tables-found' => 1);
179                  Trace("Table file found for $table.") if T(3);                  Trace("Table file found for $table.") if T(3);
180              } else {              } else {
181                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
182                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
                 # Tell the user about all the missing files.  
                 for my $missingFile (@missingFiles) {  
                     $stats->Add('sections-missing' => 1);  
                     $stats->AddMessage("Data file $missingFile not found for table $table.");  
                 }  
183                  # Did we find everything?                  # Did we find everything?
184                  if (scalar @missingFiles) {                  if (scalar @missingFiles) {
185                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
186                      $missingTable++;                      $missingTable++;
187                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
188                        # If the user wants a sanity check, we want to give him some
189                        # data anyway.
190                        if ($sanityCheck) {
191                            # Get some data lines in the sections. Note we stop when we've exceeded
192                            # the number of lines expected by the sanity check.
193                            my @lines;
194                            for my $sectionFile (@sectionFiles) {
195                                if (-s $sectionFile && scalar(@lines) < $sanityCheck) {
196                                    Trace("Reading from $sectionFile for $table.") if T(3);
197                                    push @lines, Tracer::GetFile($sectionFile);
198                                }
199                            }
200                            # Create a new temporary file.
201                            my $tmpFile = "$FIG_Config::temp/$table$$.temp.dtx";
202                            my $oh = Open(undef, ">$tmpFile");
203                            # Put all the data into it.
204                            Trace(scalar(@lines) . " data lines found.") if T(3);
205                            print $oh join("\n", @lines);
206                            close $oh;
207                            # Sanity check the temp file.
208                            CheckLoadFile($erdb, $table, $tmpFile, $sanityCheck);
209                            # Clean it up.
210                            unlink $tmpFile;
211                  } else {                  } else {
212                      # Yes! Try to assemble the sections into a data file.                          # Otherwise tell the user about all the missing files.
213                            for my $missingFile (@missingFiles) {
214                                $stats->Add('sections-missing' => 1);
215                                $stats->AddMessage("Data file $missingFile not found for table $table.");
216                            }
217                        }
218                    } else {
219                        # We have all the sections. Try to assemble them into a data file.
220                      my $sortStart = time();                      my $sortStart = time();
221                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
222                      Trace("Sort command: $sortCommand") if T(3);                      Trace("Sort command: $sortCommand") if T(3);
223                        # Pipe to the sort command. Note that we turn on autoflush
224                        # so there's no buffering.
225                      my $oh = Open(undef, "| $sortCommand");                      my $oh = Open(undef, "| $sortCommand");
226                        select $oh; $| = 1; select STDOUT;
227                        # Loop through the sections.
228                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
229                          Trace("Collating $sectionFile.") if T(3);                          Trace("Collating $sectionFile.") if T(3);
230                          $stats->Add("$table-sections" => 1);                          $stats->Add("$table-sections" => 1);
231                          for my $line (Tracer::GetFile($sectionFile)) {                          # Loop through the section file.
232                              print $oh "$line\n";                          my $ih = Open(undef, "<$sectionFile");
233                            while (defined (my $line = <$ih>)) {
234                                print $oh $line;
235                              $stats->Add("$table-collations" => 1);                              $stats->Add("$table-collations" => 1);
236                          }                          }
237                      }                      }
# Line 212  Line 259 
259          if ($missingTable) {          if ($missingTable) {
260              # Yes, skip this group.              # Yes, skip this group.
261              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
262              Trace("Skipping $group group: $missingTable missing tables.") if T(3);              Trace("Skipping $group group: $missingTable missing tables.") if T(2);
263          } else {          } else {
264              # No! Process this group's files.              # No! Process this group's files.
265              if ($sanityCheck eq "") {              if ($sanityCheck eq "") {
# Line 298  Line 345 
345          push @records, [ Tracer::GetLine($ih) ];          push @records, [ Tracer::GetLine($ih) ];
346      }      }
347      my $found = scalar(@records);      my $found = scalar(@records);
348      Trace("$found records for $table found in sanity check.") if T(3);      Trace("$found records for $table found in sanity check using $fileName.") if T(2);
349      # Do we have any data at all?      # Do we have any data at all?
350      if ($found) {      if ($found) {
351          # Yes. Get the table's descriptor. We use this to determine the field names.          # Yes. Get the table's descriptor. We use this to determine the field names.
352          my $relationData = $erdb->FindRelation($table);          my $relationData = $erdb->FindRelation($table);
353          Confess("Relation $table not found in database.") if (! defined $relationData);          Confess("Relation $table not found in database.") if (! defined $relationData);
354          my @fields = @{$relationData->{Fields}};          my @fields = @{$relationData->{Fields}};
355            # If this is a relationship, we need the FROM and TO data.
356            my %ends; ($ends{from}, $ends{to}) = $erdb->GetRelationshipEntities($table);
357          # Loop through the fields. We generate one message per field.          # Loop through the fields. We generate one message per field.
358          for (my $i = 0; $i <= $#fields; $i++) {          for (my $i = 0; $i <= $#fields; $i++) {
359              # Get this field's information.              # Get this field's information.
360              my $fieldInfo = $fields[$i];              my $fieldInfo = $fields[$i];
361              my $type = $fieldInfo->{type};              my $type = $fieldInfo->{type};
362                my $name = $fieldInfo->{name};
363                if ($name =~ /^(from|to)-link$/) {
364                    # Here it's a relationship link, so add the name of the target table to
365                    # the type.
366                    $type .= " ($ends{$1})";
367                }
368              # This is going to be a multi-line trace message. We start with the field name and type.              # This is going to be a multi-line trace message. We start with the field name and type.
369              my @lines = ("Values for $fieldInfo->{name}, type $type:\n");              my @lines = ("Values for $table($name), type $type:\n");
370              # Loop through the records. We generate one line of data per record.              # Loop through the records. We generate one line of data per record.
371              for (my $j = 0; $j < $found; $j++) {              for (my $j = 0; $j < $found; $j++) {
372                  # Get the field value.                  # Get the field value.
# Line 319  Line 374 
374                  # Compute the record label.                  # Compute the record label.
375                  my $line = "Record $j";                  my $line = "Record $j";
376                  # Check for unusual cases.                  # Check for unusual cases.
377                  if (! defined $field) {                  if (! defined $field || $field eq '') {
                     $line .= "= <null>";  
                 } elsif ($field eq '') {  
378                      $line .= "= <empty>";                      $line .= "= <empty>";
379                  } else {                  } else {
380                      # Make sure we don't trace something ungodly.                      # Make sure we don't trace something ungodly.

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3