[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Mon Jul 10 09:06:46 2006 UTC revision 1.24, Sat Sep 20 14:30:21 2008 UTC
# Line 28  Line 28 
28    
29  =head3 new  =head3 new
30    
31  C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory, $loadOnly, $ignore); >>      my $erload = ERDBLoad->new($erdb, $relationName, $directory, $loadOnly, $ignore);
32    
33  Begin loading an ERDB relation.  Begin loading an ERDB relation.
34    
# Line 72  Line 72 
72      my $fileName = "$directory/$relationName.dtx";      my $fileName = "$directory/$relationName.dtx";
73      # Declare the file handle variable.      # Declare the file handle variable.
74      my $fileHandle;      my $fileHandle;
75      # Determine whether or not this is a primary relation.      # Determine whether or not this is a simply keyed relation. For a simply keyed
76      my $primary = ($erdb->_IsPrimary($relationName) ? 1 : 0);      # relation, we can determine at run time if it is pre-sorted, and if so, skip
77        # the sort step.
78        my $sortString = $erdb->SortNeeded($relationName);
79        # Get all of the key specifiers in the sort string.
80        my @specs = grep { $_ =~ /-k\S+/ } split /\s+/, $sortString;
81        # We are pre-sortable if the key is a single, non-numeric field at the beginning. If
82        # we are pre-sortable, we'll check each incoming key and skip the sort step if the
83        # keys are already in the correct order.
84        my $preSortable = (scalar(@specs) == 1 && $specs[0] eq "-k1,1");
85      # Check to see if this is a load-only, ignore, or a generate-and-load.      # Check to see if this is a load-only, ignore, or a generate-and-load.
86      if ($ignore) {      if ($ignore) {
87          Trace("Relation $relationName will be ignored.") if T(2);          Trace("Relation $relationName will be ignored.") if T(2);
# Line 82  Line 90 
90          Trace("Relation $relationName will be loaded from $fileName.") if T(2);          Trace("Relation $relationName will be loaded from $fileName.") if T(2);
91          $fileHandle = "";          $fileHandle = "";
92      } else {      } else {
93          # Determine the sort for this relation.          # Compute the file namefor this relation. We will build a file on
94          my $fileString = "| " . $erdb->SortNeeded($relationName) . " >$fileName";          # disk and then sort it into the real file when we're done.
95          Trace("Load file creation string is \"$fileString\".") if T(3);          my $fileString = ">$fileName.tmp";
96          # Open the output file and remember its handle.          # Open the output file and remember its handle.
97          $fileHandle = Open(undef, $fileString);          $fileHandle = Open(undef, $fileString);
98          Trace("Relation $relationName load file created with primary flag $primary.") if T(2);          Trace("Relation $relationName load file created.") if T(2);
99      }      }
100      # Create the $erload object.      # Create the $erload object.
101      my $retVal = {      my $retVal = {
# Line 98  Line 106 
106                    fileSize => 0,                    fileSize => 0,
107                    lineCount => 0,                    lineCount => 0,
108                    stats => Stats->new(),                    stats => Stats->new(),
109                    primary => $primary,                    presorted => $preSortable,
110                    ignore => ($ignore ? 1 : 0)                    ignore => ($ignore ? 1 : 0),
111                      sortString => $sortString,
112                      presorted => $preSortable,
113                      lastKey => ""
114                   };                   };
115      # Bless and return it.      # Bless and return it.
116      bless $retVal, $class;      bless $retVal, $class;
# Line 108  Line 119 
119    
120  =head3 Ignore  =head3 Ignore
121    
122  C<< my $flag = $erload->Ignore; >>      my $flag = $erload->Ignore;
123    
124  Return TRUE if we are ignoring this table, else FALSE.  Return TRUE if we are ignoring this table, else FALSE.
125    
# Line 123  Line 134 
134    
135  =head3 Put  =head3 Put
136    
137  C<< my  = $erload->Put($field1, $field2, ..., $fieldN); >>      my  = $erload->Put($field1, $field2, ..., $fieldN);
138    
139  Write a line of data to the load file. This may also cause the load file to be closed  Write a line of data to the load file. This may also cause the load file to be closed
140  and data read into the table.  and data read into the table.
# Line 151  Line 162 
162          my $truncates = $self->{dbh}->VerifyFields($self->{relName}, \@rawFields);          my $truncates = $self->{dbh}->VerifyFields($self->{relName}, \@rawFields);
163          # Run through the list of field values, escaping them.          # Run through the list of field values, escaping them.
164          my @fields = map { Tracer::Escape($_) } @rawFields;          my @fields = map { Tracer::Escape($_) } @rawFields;
         # If this is a primary relation, append the new-record field.  
         if ($self->{primary}) {  
             push @fields, '0';  
         }  
165          # Form a data line from the fields.          # Form a data line from the fields.
166          my $line = join("\t", @fields) . "\n";          my $line = join("\t", @fields) . "\n";
167          # Write the new record to the load file.          # Write the new record to the load file.
# Line 162  Line 169 
169          print $fh $line;          print $fh $line;
170          # Determine how long this will make the load file.          # Determine how long this will make the load file.
171          my $lineLength = length $line;          my $lineLength = length $line;
172            # Check to see if we're still pre-sorted.
173            if ($self->{presorted}) {
174                if ($fields[0] lt $self->{lastKey}) {
175                    # This key is out of order, so we're not pre-sorded any more.
176                    $self->{presorted} = 0;
177                } else {
178                    # We're still pre-sorted, so save this key.
179                    $self->{lastKey} = $fields[0];
180                }
181            }
182          # Update the statistics.          # Update the statistics.
183          $self->{fileSize} += $lineLength;          $self->{fileSize} += $lineLength;
184          $self->{lineCount} ++;          $self->{lineCount} ++;
# Line 174  Line 191 
191    
192  =head3 Add  =head3 Add
193    
194  C<< my  = $stats->Add($statName, $value); >>      my  = $stats->Add($statName, $value);
195    
196  Increment the specified statistic.  Increment the specified statistic.
197    
# Line 205  Line 222 
222    
223  =head3 Finish  =head3 Finish
224    
225  C<< my $stats = $erload->Finish(); >>      my $stats = $erload->Finish();
226    
227  Finish loading the table. This closes the load file.  Finish loading the table. This closes and sorts the load file.
228    
229  =over 4  =over 4
230    
# Line 226  Line 243 
243      if ($self->{fh}) {      if ($self->{fh}) {
244          # Close the load file.          # Close the load file.
245          close $self->{fh};          close $self->{fh};
246            # Get the ERDB object.
247            my $erdb = $self->{dbh};
248            # Get the output file name.
249            my $fileName = $self->{fileName};
250            # Do we need a sort?
251            if ($self->{presorted}) {
252                # No, so just rename the file.
253                Trace("$fileName is pre-sorted.") if T(3);
254                unlink $fileName;
255                rename "$fileName.tmp", $fileName;
256            } else {
257                # Get the sort command for this relation.
258                my $sortCommand = $erdb->SortNeeded($self->{relName});
259                Trace("Sorting into $fileName with command: $sortCommand") if T(3);
260                # Set up a timer.
261                my $start = time();
262                # Execute the sort command and save the error output.
263                my @messages = `$sortCommand 2>&1 1>$fileName <$fileName.tmp`;
264                # Record the time spent
265                $self->{stats}->Add(sortTime => (time() - $start));
266                # If there was no error, delete the temp file.
267                if (! scalar(@messages)) {
268                    unlink "$fileName.tmp";
269                } else {
270                    # Here there was an error.
271                    Confess("Error messages from $sortCommand:\n" . join("\n", @messages));
272                }
273            }
274            # Tell the user we're done.
275            Trace("Load file $fileName created.") if T(3);
276      }      }
277      # Return the statistics object.      # Return the statistics object.
278      return $self->{stats};      return $self->{stats};
279  }  }
280    
281    =head3 FinishAndLoad
282    
283        my $stats = $erload->FinishAndLoad();
284    
285    Finish the load and load the table, returning the statistics.
286    
287    =cut
288    
289    sub FinishAndLoad {
290        # Get the parameters.
291        my ($self) = @_;
292        # Finish the load file.
293        my $retVal = $self->Finish();
294        # Load the table.
295        my $newStats = $self->LoadTable();
296        # Accumulate the stats.
297        $retVal->Accumulate($newStats);
298        # Return the result.
299        return $retVal;
300    }
301    
302  =head3 RelName  =head3 RelName
303    
304  C<< my $name = $erload->RelName; >>      my $name = $erload->RelName;
305    
306  Name of the relation being loaded by this object.  Name of the relation being loaded by this object.
307    
# Line 246  Line 314 
314      return $self->{relName};      return $self->{relName};
315  }  }
316    
317    =head3 LoadTable
318    
319        my $stats = $erload->LoadTable();
320    
321    Load the database table from the load file and return a statistics object.
322    
323    =cut
324    
325    sub LoadTable {
326        # Get the parameters.
327        my ($self) = @_;
328        # Get the database object, the file name, and the relation name.
329        my $erdb = $self->{dbh};
330        my $fileName = $self->{fileName};
331        my $relName = $self->{relName};
332        # Load the table. The third parameter indicates this is a drop and reload.
333        my $retVal = $erdb->LoadTable($fileName, $relName, truncate => 1);
334        # Return the result.
335        return $retVal;
336    }
337    
338  1;  1;
339    

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.24

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3