[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Wed Oct 15 11:41:12 2008 UTC revision 1.9, Mon Mar 23 19:34:35 2009 UTC
# Line 76  Line 76 
76    
77  If specified, turns on tracing of SQL activity.  If specified, turns on tracing of SQL activity.
78    
79    =item clear
80    
81    If specified, existing load files will be recreated from sections if the sections
82    are present.
83    
84  =item background  =item background
85    
86  Save the standard and error output to files. The files will be created  Save the standard and error output to files. The files will be created
# Line 106  Line 111 
111    
112  Phone number to message when the script is complete.  Phone number to message when the script is complete.
113    
114    =item DBD
115    
116    Name of the DBD file. If specified, the DBD must be in the main FIG directory
117    (specified in C<$FIG_Config::fig>). This option allows the use of an alternate
118    DBD during load so that access to the database by other processes is not
119    compromised.
120    
121    =item loadDirectory
122    
123    Directoty containing the load files. This option allows you to request that
124    load files from another version of the NMPDR be used, which is useful when
125    creating a new NMPDR: we can yank in the data from the previous database while
126    waiting for the new load files to be generated.
127    
128  =back  =back
129    
130  =cut  =cut
# Line 113  Line 132 
132  # Get the command-line options and parameters.  # Get the command-line options and parameters.
133  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
134                                             {                                             {
135                                                  clear => ["", "overwrite existing load files if sections are present"],
136                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
137                                                trace => ["", "tracing level"],                                                trace => ["2", "tracing level"],
138                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
139                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"],
140                                                  DBD => ["", "if specified, the name of a DBD file in the FIG directory"],
141                                                  loadDirectory => ["", "if specified, an alternate directory containing the load files"],
142                                             },                                             },
143                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
144                                             @ARGV);                                             @ARGV);
# Line 126  Line 148 
148  eval {  eval {
149      # Get the parameters.      # Get the parameters.
150      my ($database, @groups) = @parameters;      my ($database, @groups) = @parameters;
151      # Connect to the database.      # Check for an alternate DBD.
152      my $erdb = ERDB::GetDatabase($database);      my $altDBD = ($options->{DBD} ? "$FIG_Config::fig/$options->{DBD}" : undef);
153        # Connect to the database and get its load directory.
154        my $erdb = ERDB::GetDatabase($database, $altDBD);
155      # Fix the group list.      # Fix the group list.
156      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
157      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
158      my $source = $erdb->GetSourceObject();      my $source = $erdb->GetSourceObject();
159      my $directory = $erdb->LoadDirectory();      my $directory = $options->{loadDirectory} || $erdb->LoadDirectory();
160      # Get the list of sections.      # Get the list of sections.
161      my @sectionList = $erdb->SectionList($source);      my @sectionList = $erdb->SectionList($source);
162      # Create a statistics object to track our progress.      # Create a statistics object to track our progress.
# Line 157  Line 181 
181              # Get the data file name.              # Get the data file name.
182              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);              my $dataFile = ERDBGenerate::CreateFileName($table, undef, 'data', $directory);
183              # Do we have it?              # Do we have it?
184              if (-f $dataFile) {              if (-f $dataFile && ! $options->{clear}) {
185                  # Yes. This is good news.                  # Yes. This is good news.
186                  $stats->Add('tables-found' => 1);                  $stats->Add('tables-found' => 1);
187                  Trace("Table file found for $table.") if T(3);                  Trace("Table file found for $table.") if T(3);
188              } else {              } else {
189                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
190                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
                 # Tell the user about all the missing files.  
                 for my $missingFile (@missingFiles) {  
                     $stats->Add('sections-missing' => 1);  
                     $stats->AddMessage("Data file $missingFile not found for table $table.");  
                 }  
191                  # Did we find everything?                  # Did we find everything?
192                  if (scalar @missingFiles) {                  if (scalar @missingFiles) {
193                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
194                      $missingTable++;                      $missingTable++;
195                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
196                        # If the user wants a sanity check, we want to give him some
197                        # data anyway.
198                        if ($sanityCheck) {
199                            # Get some data lines in the sections. Note we stop when we've exceeded
200                            # the number of lines expected by the sanity check.
201                            my @lines;
202                            for my $sectionFile (@sectionFiles) {
203                                if (-s $sectionFile && scalar(@lines) < $sanityCheck) {
204                                    Trace("Reading from $sectionFile for $table.") if T(3);
205                                    push @lines, Tracer::GetFile($sectionFile);
206                                }
207                            }
208                            # Create a new temporary file.
209                            my $tmpFile = "$FIG_Config::temp/$table$$.temp.dtx";
210                            my $oh = Open(undef, ">$tmpFile");
211                            # Put all the data into it.
212                            Trace(scalar(@lines) . " data lines found.") if T(3);
213                            print $oh join("\n", @lines);
214                            close $oh;
215                            # Sanity check the temp file.
216                            CheckLoadFile($erdb, $table, $tmpFile, $sanityCheck);
217                            # Clean it up.
218                            unlink $tmpFile;
219                        } else {
220                            # Otherwise tell the user about all the missing files.
221                            for my $missingFile (@missingFiles) {
222                                $stats->Add('sections-missing' => 1);
223                                $stats->AddMessage("Data file $missingFile not found for table $table.");
224                            }
225                        }
226                  } else {                  } else {
227                      # Yes! Try to assemble the sections into a data file.                      # We have all the sections. Try to assemble them into a data file.
228                      my $sortStart = time();                      my $sortStart = time();
229                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
230                      Trace("Sort command: $sortCommand") if T(3);                      Trace("Sort command: $sortCommand") if T(3);
231                        # Pipe to the sort command. Note that we turn on autoflush
232                        # so there's no buffering.
233                      my $oh = Open(undef, "| $sortCommand");                      my $oh = Open(undef, "| $sortCommand");
234                        select $oh; $| = 1; select STDOUT;
235                        # Loop through the sections.
236                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
237                          Trace("Collating $sectionFile.") if T(3);                          Trace("Collating $sectionFile.") if T(3);
238                          $stats->Add("$table-sections" => 1);                          $stats->Add("$table-sections" => 1);
239                          for my $line (Tracer::GetFile($sectionFile)) {                          # Loop through the section file.
240                              print $oh "$line\n";                          my $ih = Open(undef, "<$sectionFile");
241                            while (defined (my $line = <$ih>)) {
242                                print $oh $line;
243                              $stats->Add("$table-collations" => 1);                              $stats->Add("$table-collations" => 1);
244                          }                          }
245                      }                      }
# Line 212  Line 267 
267          if ($missingTable) {          if ($missingTable) {
268              # Yes, skip this group.              # Yes, skip this group.
269              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
270              Trace("Skipping $group group: $missingTable missing tables.") if T(3);              Trace("Skipping $group group: $missingTable missing tables.") if T(2);
271          } else {          } else {
272              # No! Process this group's files.              # No! Process this group's files.
273              if ($sanityCheck eq "") {              if ($sanityCheck eq "") {
# Line 257  Line 312 
312      }      }
313  }  }
314    
315    =head2 Internal Methods
316    
317  =head3 CheckLoadFile  =head3 CheckLoadFile
318    
319      CheckLoadFile($erdb, $table, $fileName, $count);      CheckLoadFile($erdb, $table, $fileName, $count);
# Line 298  Line 355 
355          push @records, [ Tracer::GetLine($ih) ];          push @records, [ Tracer::GetLine($ih) ];
356      }      }
357      my $found = scalar(@records);      my $found = scalar(@records);
358      Trace("$found records for $table found in sanity check.") if T(3);      Trace("$found records for $table found in sanity check using $fileName.") if T(2);
359      # Do we have any data at all?      # Do we have any data at all?
360      if ($found) {      if ($found) {
361          # Yes. Get the table's descriptor. We use this to determine the field names.          # Yes. Get the table's descriptor. We use this to determine the field names.
362          my $relationData = $erdb->FindRelation($table);          my $relationData = $erdb->FindRelation($table);
363          Confess("Relation $table not found in database.") if (! defined $relationData);          Confess("Relation $table not found in database.") if (! defined $relationData);
364          my @fields = @{$relationData->{Fields}};          my @fields = @{$relationData->{Fields}};
365            # If this is a relationship, we need the FROM and TO data.
366            my %ends; ($ends{from}, $ends{to}) = $erdb->GetRelationshipEntities($table);
367          # Loop through the fields. We generate one message per field.          # Loop through the fields. We generate one message per field.
368          for (my $i = 0; $i <= $#fields; $i++) {          for (my $i = 0; $i <= $#fields; $i++) {
369              # Get this field's information.              # Get this field's information.
370              my $fieldInfo = $fields[$i];              my $fieldInfo = $fields[$i];
371              my $type = $fieldInfo->{type};              my $type = $fieldInfo->{type};
372                my $name = $fieldInfo->{name};
373                if ($name =~ /^(from|to)-link$/) {
374                    # Here it's a relationship link, so add the name of the target table to
375                    # the type.
376                    $type .= " ($ends{$1})";
377                }
378              # This is going to be a multi-line trace message. We start with the field name and type.              # This is going to be a multi-line trace message. We start with the field name and type.
379              my @lines = ("Values for $fieldInfo->{name}, type $type:\n");              my @lines = ("Values for $table($name), type $type:\n");
380              # Loop through the records. We generate one line of data per record.              # Loop through the records. We generate one line of data per record.
381              for (my $j = 0; $j < $found; $j++) {              for (my $j = 0; $j < $found; $j++) {
382                  # Get the field value.                  # Get the field value.
# Line 319  Line 384 
384                  # Compute the record label.                  # Compute the record label.
385                  my $line = "Record $j";                  my $line = "Record $j";
386                  # Check for unusual cases.                  # Check for unusual cases.
387                  if (! defined $field) {                  if (! defined $field || $field eq '') {
                     $line .= "= <null>";  
                 } elsif ($field eq '') {  
388                      $line .= "= <empty>";                      $line .= "= <empty>";
389                  } else {                  } else {
390                      # Make sure we don't trace something ungodly.                      # Make sure we don't trace something ungodly.

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3