[Bio] / Sprout / ERDBLoader.pl Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoader.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Thu Oct 9 17:23:54 2008 UTC revision 1.6, Mon Jan 19 21:46:21 2009 UTC
# Line 57  Line 57 
57  Name of the ERDB database. This should be the class name for the subclass used  Name of the ERDB database. This should be the class name for the subclass used
58  to access the database.  to access the database.
59    
   
60  =back  =back
61    
62  =head2 Command-Line Options  =head2 Command-Line Options
# Line 107  Line 106 
106    
107  Phone number to message when the script is complete.  Phone number to message when the script is complete.
108    
109    =item DBD
110    
111    Name of the DBD file. If specified, the DBD must be in the main FIG directory
112    (specified in C<$FIG_Config::fig>). This option allows the use of an alternate
113    DBD during load, so that access to the database by other processes is not
114    compromised.
115    
116  =back  =back
117    
118  =cut  =cut
# Line 115  Line 121 
121  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],  my ($options, @parameters) = StandardSetup([qw(ERDBLoadGroup ERDB Stats) ],
122                                             {                                             {
123                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],                                                sanityCheck => ["", "don't load, trace contents of first N load file records instead"],
124                                                trace => ["", "tracing level"],                                                trace => ["2", "tracing level"],
125                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],                                                keepSections => ["", "if specified, section files will not be deleted after being collated"],
126                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"],
127                                                  DBD => ["", "if specified, the name of a DBD file in the FIG directory"],
128                                             },                                             },
129                                             "<database> <group1> <group2> ...",                                             "<database> <group1> <group2> ...",
130                                             @ARGV);                                             @ARGV);
# Line 127  Line 134 
134  eval {  eval {
135      # Get the parameters.      # Get the parameters.
136      my ($database, @groups) = @parameters;      my ($database, @groups) = @parameters;
137      # Connect to the database.      # Check for an alternate DBD.
138      my $erdb = ERDB::GetDatabase($database);      my $altDBD = ($options->{DBD} ? "$FIG_Config::fig/$options->{DBD}" : undef);
139        # Connect to the database and get its load directory.
140        my $erdb = ERDB::GetDatabase($database, $altDBD);
141      # Fix the group list.      # Fix the group list.
142      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);      my @realGroups = ERDBLoadGroup::ComputeGroups($erdb, \@groups);
143      # Get the source object and load directory for this database.      # Get the source object and load directory for this database.
# Line 165  Line 174 
174              } else {              } else {
175                  # No, we must build it. Verify that we have all the sections.                  # No, we must build it. Verify that we have all the sections.
176                  my @missingFiles = grep { ! -f $_ } @sectionFiles;                  my @missingFiles = grep { ! -f $_ } @sectionFiles;
                 # Tell the user about all the missing files.  
                 for my $missingFile (@missingFiles) {  
                     $stats->Add('sections-missing' => 1);  
                     $stats->AddMessage("Data file $missingFile not found for table $table.");  
                 }  
177                  # Did we find everything?                  # Did we find everything?
178                  if (scalar @missingFiles) {                  if (scalar @missingFiles) {
179                      # No! Denote that we have a missing table.                      # No! Denote that we have a missing table.
180                      $missingTable++;                      $missingTable++;
181                      $stats->Add('tables-skipped' => 1);                      $stats->Add('tables-skipped' => 1);
182                        # If the user wants a sanity check, we want to give him some
183                        # data anyway.
184                        if ($sanityCheck) {
185                            # Get some data lines in the sections. Note we stop when we've exceeded
186                            # the number of lines expected by the sanity check.
187                            my @lines;
188                            for my $sectionFile (@sectionFiles) {
189                                if (-s $sectionFile && scalar(@lines) < $sanityCheck) {
190                                    Trace("Reading from $sectionFile for $table.") if T(3);
191                                    push @lines, Tracer::GetFile($sectionFile);
192                                }
193                            }
194                            # Create a new temporary file.
195                            my $tmpFile = "$FIG_Config::temp/$table$$.temp.dtx";
196                            my $oh = Open(undef, ">$tmpFile");
197                            # Put all the data into it.
198                            Trace(scalar(@lines) . " data lines found.") if T(3);
199                            print $oh join("\n", @lines);
200                            close $oh;
201                            # Sanity check the temp file.
202                            CheckLoadFile($erdb, $table, $tmpFile, $sanityCheck);
203                            # Clean it up.
204                            unlink $tmpFile;
205                  } else {                  } else {
206                      # Yes! Try to assemble the sections into a data file.                          # Otherwise tell the user about all the missing files.
207                            for my $missingFile (@missingFiles) {
208                                $stats->Add('sections-missing' => 1);
209                                $stats->AddMessage("Data file $missingFile not found for table $table.");
210                            }
211                        }
212                    } else {
213                        # We have all the sections. Try to assemble them into a data file.
214                      my $sortStart = time();                      my $sortStart = time();
215                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";                      my $sortCommand = $erdb->SortNeeded($table) . " >$dataFile";
216                      Trace("Sort command: $sortCommand") if T(3);                      Trace("Sort command: $sortCommand") if T(3);
217                        # Pipe to the sort command. Note that we turn on autoflush
218                        # so there's no buffering.
219                      my $oh = Open(undef, "| $sortCommand");                      my $oh = Open(undef, "| $sortCommand");
220                        select $oh; $| = 1; select STDOUT;
221                        # Loop through the sections.
222                      for my $sectionFile (@sectionFiles) {                      for my $sectionFile (@sectionFiles) {
223                          Trace("Collating $sectionFile.") if T(3);                          Trace("Collating $sectionFile.") if T(3);
224                          $stats->Add("$table-sections" => 1);                          $stats->Add("$table-sections" => 1);
225                          for my $line (Tracer::GetFile($sectionFile)) {                          # Loop through the section file.
226                              print $oh "$line\n";                          my $ih = Open(undef, "<$sectionFile");
227                            while (defined (my $line = <$ih>)) {
228                                print $oh $line;
229                              $stats->Add("$table-collations" => 1);                              $stats->Add("$table-collations" => 1);
230                          }                          }
231                      }                      }
# Line 213  Line 253 
253          if ($missingTable) {          if ($missingTable) {
254              # Yes, skip this group.              # Yes, skip this group.
255              $stats->Add('groups-skipped' => 1);              $stats->Add('groups-skipped' => 1);
256              Trace("Skipping $group group: $missingTable missing tables.") if T(3);              Trace("Skipping $group group: $missingTable missing tables.") if T(2);
257          } else {          } else {
258              # No! Process this group's files.              # No! Process this group's files.
259              if ($sanityCheck eq "") {              if ($sanityCheck eq "") {
# Line 299  Line 339 
339          push @records, [ Tracer::GetLine($ih) ];          push @records, [ Tracer::GetLine($ih) ];
340      }      }
341      my $found = scalar(@records);      my $found = scalar(@records);
342      Trace("$found records for $table found in sanity check.") if T(3);      Trace("$found records for $table found in sanity check using $fileName.") if T(2);
343      # Do we have any data at all?      # Do we have any data at all?
344      if ($found) {      if ($found) {
345          # Yes. Get the table's descriptor. We use this to determine the field names.          # Yes. Get the table's descriptor. We use this to determine the field names.
346          my $relationData = $erdb->FindRelation($table);          my $relationData = $erdb->FindRelation($table);
347          Confess("Relation $table not found in database.") if (! defined $relationData);          Confess("Relation $table not found in database.") if (! defined $relationData);
348          my @fields = @{$relationData->{Fields}};          my @fields = @{$relationData->{Fields}};
349            # If this is a relationship, we need the FROM and TO data.
350            my %ends; ($ends{from}, $ends{to}) = $erdb->GetRelationshipEntities($table);
351          # Loop through the fields. We generate one message per field.          # Loop through the fields. We generate one message per field.
352          for (my $i = 0; $i <= $#fields; $i++) {          for (my $i = 0; $i <= $#fields; $i++) {
353              # Get this field's information.              # Get this field's information.
354              my $fieldInfo = $fields[$i];              my $fieldInfo = $fields[$i];
355              my $type = $fieldInfo->{type};              my $type = $fieldInfo->{type};
356                my $name = $fieldInfo->{name};
357                if ($name =~ /^(from|to)-link$/) {
358                    # Here it's a relationship link, so add the name of the target table to
359                    # the type.
360                    $type .= " ($ends{$1})";
361                }
362              # This is going to be a multi-line trace message. We start with the field name and type.              # This is going to be a multi-line trace message. We start with the field name and type.
363              my @lines = ("Values for $fieldInfo->{name}, type $type:\n");              my @lines = ("Values for $table($name), type $type:\n");
364              # Loop through the records. We generate one line of data per record.              # Loop through the records. We generate one line of data per record.
365              for (my $j = 0; $j < $found; $j++) {              for (my $j = 0; $j < $found; $j++) {
366                  # Get the field value.                  # Get the field value.
# Line 320  Line 368 
368                  # Compute the record label.                  # Compute the record label.
369                  my $line = "Record $j";                  my $line = "Record $j";
370                  # Check for unusual cases.                  # Check for unusual cases.
371                  if (! defined $field) {                  if (! defined $field || $field eq '') {
                     $line .= "= <null>";  
                 } elsif ($field eq '') {  
372                      $line .= "= <empty>";                      $line .= "= <empty>";
373                  } else {                  } else {
374                      # Make sure we don't trace something ungodly.                      # Make sure we don't trace something ungodly.

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3