[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sun Aug 14 23:32:08 2005 UTC revision 1.22, Tue Aug 12 06:07:06 2008 UTC
# Line 12  Line 12 
12    
13  =head2 Introduction  =head2 Introduction
14    
15  This object is designed to assist with loading an ERDB data relation. The user  This object is designed to assist with creating the load file for an ERDB
16  constructs the object by specifying an ERDB object and a relation name. This  data relation. The user constructs the object by specifying an ERDB object
17  re-creates the relevant relation. The client then passes in data lines which  and a relation name. This create the load file for the relevant relation. The client
18  are written to a file. When the file gets big enough, it is loaded into the  then passes in data lines which are written to a file, and calls
19  table. Calling the L</Finish> method loads any leftover data and optionally  L</Finish> to close the file and get the statistics.
 creates the index.  
20    
21  This module makes use of the internal ERDB property C<_dbh> and the internal  This module makes use of the internal ERDB method C<_IsPrimary>.
 method C<_IsPrimary>.  
22    
23  =cut  =cut
24    
# Line 30  Line 28 
28    
29  =head3 new  =head3 new
30    
31  C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>      my $erload = ERDBLoad->new($erdb, $relationName, $directory, $loadOnly, $ignore);
32    
33  Begin loading an ERDB relation.  Begin loading an ERDB relation.
34    
# Line 48  Line 46 
46    
47  Name of the directory to use for the load files, WITHOUT a trailing slash.  Name of the directory to use for the load files, WITHOUT a trailing slash.
48    
49  =item estimatedRows (optional)  =item loadOnly
50    
51  Estimated maximum number of table rows. If omitted, the table will be created in  TRUE if the data is to be loaded from an existing file, FALSE if a file is
52  a format that permits an essentially unlimited number of rows.  to be created.
53    
54    =item ignore
55    
56    TRUE if the data is to be discarded. This is used to save time when only
57    a subset of the tables need to be loaded: the data for the ignored tables
58    is simply discarded.
59    
60  =back  =back
61    
# Line 59  Line 63 
63    
64  sub new {  sub new {
65      # Get the parameters.      # Get the parameters.
66      my ($class, $erdb, $relationName, $directory, $estimatedRows) = @_;      my ($class, $erdb, $relationName, $directory, $loadOnly, $ignore) = @_;
67      # Validate the directory name.      # Validate the directory name.
68      if (! -d $directory) {      if (! -d $directory) {
69          Confess("Load directory \"$directory\" not found.");          Confess("Load directory \"$directory\" not found.");
70      }      }
71      # Determine the name for this relation's load file.      # Determine the name for this relation's load file.
72      my $fileName = "$directory/$relationName.dtx";      my $fileName = "$directory/$relationName.dtx";
73      # Decide whether or not we should specify estimated rows.      # Declare the file handle variable.
74      my $rowEstimate = ($FIG_Config::estimate_rows ? $estimatedRows : undef);      my $fileHandle;
75      # Create the target table. If this is a pre-index DBMS, we      # Determine whether or not this is a primary relation.
76      # also create the indexes. If the table already exists,      my $primary = ($erdb->_IsPrimary($relationName) ? 1 : 0);
77      # it will be dropped.      # Check to see if this is a load-only, ignore, or a generate-and-load.
78      $erdb->CreateTable($relationName, $FIG_Config::preIndex, $rowEstimate);      if ($ignore) {
79            Trace("Relation $relationName will be ignored.") if T(2);
80            $fileHandle = "";
81        } elsif ($loadOnly) {
82            Trace("Relation $relationName will be loaded from $fileName.") if T(2);
83            $fileHandle = "";
84        } else {
85            # Determine the sort for this relation.
86            my $fileString = "| " . $erdb->SortNeeded($relationName) . " >$fileName";
87            Trace("Load file creation string is \"$fileString\".") if T(3);
88      # Open the output file and remember its handle.      # Open the output file and remember its handle.
89      my $fileHandle = Open(undef, ">$fileName");          $fileHandle = Open(undef, $fileString);
90            Trace("Relation $relationName load file created with primary flag $primary.") if T(2);
91        }
92      # Create the $erload object.      # Create the $erload object.
93      my $retVal = {      my $retVal = {
94                    dbh => $erdb,                    dbh => $erdb,
# Line 83  Line 98 
98                    fileSize => 0,                    fileSize => 0,
99                    lineCount => 0,                    lineCount => 0,
100                    stats => Stats->new(),                    stats => Stats->new(),
101                    primary => $erdb->_IsPrimary($relationName)                    primary => $primary,
102                      ignore => ($ignore ? 1 : 0)
103                   };                   };
104      # Bless and return it.      # Bless and return it.
105      bless $retVal, $class;      bless $retVal, $class;
106      return $retVal;      return $retVal;
107  }  }
108    
109    =head3 Ignore
110    
111        my $flag = $erload->Ignore;
112    
113    Return TRUE if we are ignoring this table, else FALSE.
114    
115    =cut
116    #: Return Type $;
117    sub Ignore {
118        # Get the parameters.
119        my ($self) = @_;
120        # Return the result.
121        return $self->{ignore};
122    }
123    
124  =head3 Put  =head3 Put
125    
126  C<< my  = $erload->Put($field1, $field2, ..., $fieldN); >>      my  = $erload->Put($field1, $field2, ..., $fieldN);
127    
128  Write a line of data to the load file. This may also cause the load file to be closed  Write a line of data to the load file. This may also cause the load file to be closed
129  and data read into the table.  and data read into the table.
# Line 110  Line 141 
141  =cut  =cut
142  #: Return Type ;  #: Return Type ;
143  sub Put {  sub Put {
144      # Get the ERDBLoad instance.      # Get the ERDBLoad instance and the field list.
145      my $self = shift @_;      my ($self, @rawFields) = @_;
146        # Only proceed if we're not ignoring.
147        if (! $self->{ignore}) {
148            # Convert the hash-string fields to their digested value.
149            $self->{dbh}->DigestFields($self->{relName}, \@rawFields);
150            # Insure the field values are okay.
151            my $truncates = $self->{dbh}->VerifyFields($self->{relName}, \@rawFields);
152      # Run through the list of field values, escaping them.      # Run through the list of field values, escaping them.
153      my @fields = map { Tracer::Escape($_) } @_;          my @fields = map { Tracer::Escape($_) } @rawFields;
     # If this is a primary relation, append the new-record field.  
     if ($self->{primary}) {  
         push @fields, '0';  
     }  
154      # Form a data line from the fields.      # Form a data line from the fields.
155      my $line = join("\t", @fields) . "\n";      my $line = join("\t", @fields) . "\n";
     # Determine how long this will make the load file.  
     my $lineLength = length $line;  
     if ($lineLength > (200000000 - $self->{fileSize})) {  
         # Here it would be too long, so we force a load.  
         $self->Flush();  
     }  
156      # Write the new record to the load file.      # Write the new record to the load file.
157      my $fh = $self->{fh};      my $fh = $self->{fh};
158      print $fh $line;      print $fh $line;
159            # Determine how long this will make the load file.
160            my $lineLength = length $line;
161      # Update the statistics.      # Update the statistics.
162      $self->{fileSize} += $lineLength;      $self->{fileSize} += $lineLength;
163      $self->{lineCount} ++;      $self->{lineCount} ++;
164            $self->Add("lineOut");
165            if ($truncates > 0) {
166                $self->Add("truncated", $truncates);
167  }  }
168        }
169    }
170    
171    =head3 Add
172    
173        my  = $stats->Add($statName, $value);
174    
175    Increment the specified statistic.
176    
177    =over 4
178    
179    =item statName
180    
181    Name of the statistic to increment.
182    
183  =head3 Flush  =item value (optional)
184    
185  C<< $erload->Flush(); >>  Value by which to increment it. If omitted, C<1> is assumed.
186    
187  Load all the data currently in the load file into the database. This clears the load  =back
 file and re-opens it.  
188    
189  =cut  =cut
190  #: Return Type ;  #: Return Type ;
191  sub Flush {  sub Add {
192      # Get the parameters.      # Get the parameters.
193      my ($self) = @_;      my ($self, $statName, $value) = @_;
194      # Flush the data in the load file.      # Fix the value.
195      $self->_FlushData();      if (! defined $value) {
196      # Re-open the file so it can accept more data.          $value = 1;
197      $self->_ReOpen();      }
198        # Increment the statistic.
199        $self->{stats}->Add($statName, $value);
200  }  }
201    
202  =head3 Finish  =head3 Finish
203    
204  C<< my $stats = $erload->Finish(); >>      my $stats = $erload->Finish();
205    
206  Finish loading the table. This closes the load file and loads its contents into the database.  Finish loading the table. This closes the load file.
 It also creates the indexes if the DBMS uses post-indexing.  
207    
208  =over 4  =over 4
209    
# Line 173  Line 219 
219  sub Finish {  sub Finish {
220      # Get this object instance.      # Get this object instance.
221      my ($self) = @_;      my ($self) = @_;
222      # Flush out the data in the load file.      if ($self->{fh}) {
223      $self->_FlushData();          # Close the load file.
224      # If this is a post-index DBMS, create the indexes.          close $self->{fh};
     if (! $FIG_Config::preIndex) {  
         $self->{dbh}->CreateIndex($self->RelName);  
225      }      }
     # Delete the load file.  
     unlink $self->{fileName};  
226      # Return the statistics object.      # Return the statistics object.
227      return $self->{stats};      return $self->{stats};
228  }  }
229    
230  =head3 RelName  =head3 FinishAndLoad
231    
232  C<< my $name = $erload->RelName; >>      my $stats = $erload->FinishAndLoad();
233    
234  Name of the relation being loaded by this object.  Finish the load and load the table, returning the statistics.
235    
236  =cut  =cut
237    
238  sub RelName {  sub FinishAndLoad {
239      # Get the object instance.      # Get the parameters.
240      my ($self) = @_;      my ($self) = @_;
241      # Return the relation name.      # Finish the load file.
242      return $self->{relName};      my $retVal = $self->Finish();
243        # Load the table.
244        my $newStats = $self->LoadTable();
245        # Accumulate the stats.
246        $retVal->Accumulate($newStats);
247        # Return the result.
248        return $retVal;
249  }  }
250    
251  =head2 Internal Methods  =head3 RelName
   
 =head3 ReOpen  
252    
253  Re-open the load file.      my $name = $erload->RelName;
254    
255  This is an instance method.  Name of the relation being loaded by this object.
256    
257  =cut  =cut
258    
259  sub _ReOpen {  sub RelName {
260      # Get this instance.      # Get the object instance.
261      my ($self) = @_;      my ($self) = @_;
262      # Open the file with the current filehandle in truncate mode.      # Return the relation name.
263      Open($self->{fh}, ">" . $self->{fileName});      return $self->{relName};
     # Denote the file is empty.  
     $self->{fileSize} = 0;  
     $self->{lineCount} = 0;  
264  }  }
265    
266  =head3 FlushData  =head3 LoadTable
267    
268  Close the load file and load all its data into the table.      my $stats = $erload->LoadTable();
269    
270  This is an instance method.  Load the database table from the load file and return a statistics object.
271    
272  =cut  =cut
273    
274  sub _FlushData {  sub LoadTable {
275      # Get this instance.      # Get the parameters.
276      my ($self) = @_;      my ($self) = @_;
277      # Get the relation name.      # Get the database object, the file name, and the relation name.
278      my $relName = $self->RelName;      my $erdb = $self->{dbh};
279      Trace("Flushing data to table $relName.") if T(2);      my $fileName = $self->{fileName};
280      # Close the load file.      my $relName = $self->{relName};
281      close $self->{fh};      # Load the table. The third parameter indicates this is a drop and reload.
282      # We must use the load file to load the table. First, we get the DBKernel      my $retVal = $erdb->LoadTable($fileName, $relName, truncate => 1);
283      # handle and the statistics object.      # Return the result.
284      my $stats = $self->{stats};      return $retVal;
     my $dbh = $self->{dbh}->{_dbh};  
     # Begin a database transaction. This is not actually for integrity reasons; it  
     # speeds up the slow load process.  
     $dbh->begin_tran();  
     # Load the database table safely.  
     my $rv;  
     eval {  
         Trace("Loading file into relation $relName.") if T(3);  
         $rv = $dbh->load_table(file => $self->{fileName}, tbl => $relName);  
     };  
     # Check to see if we succeeded.  
     if (!defined $rv) {  
         # We've failed. Format a useful message. If we have an error message from  
         # EVAL, we use it.  
         my $msg = "Table load failed for $relName" . ($@ ? ": $@" : ".");  
         $stats->AddMessage($msg);  
         Trace($msg) if T(1);  
     } else {  
         # Here we successfully loaded the table. Trace the number of records loaded.  
         my $lineCount = $self->{lineCount};  
         my $byteCount = $self->{fileSize};  
         Trace("$lineCount records ($byteCount bytes) loaded into $relName.") if T(2);  
         # Accumulate the statistics.  
         $stats->Add("records", $lineCount);  
         $stats->Add("bytes", $byteCount);  
     }  
     # Close the database transaction.  
     $dbh->commit_tran();  
285  }  }
286    
287  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3