[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Sun Sep 11 17:05:57 2005 UTC revision 1.23, Wed Sep 3 20:56:20 2008 UTC
# Line 28  Line 28 
28    
29  =head3 new  =head3 new
30    
31  C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>      my $erload = ERDBLoad->new($erdb, $relationName, $directory, $loadOnly, $ignore);
32    
33  Begin loading an ERDB relation.  Begin loading an ERDB relation.
34    
# Line 46  Line 46 
46    
47  Name of the directory to use for the load files, WITHOUT a trailing slash.  Name of the directory to use for the load files, WITHOUT a trailing slash.
48    
49  =item estimatedRows (optional)  =item loadOnly
50    
51  Estimated maximum number of table rows. If omitted, the table will be created in  TRUE if the data is to be loaded from an existing file, FALSE if a file is
52  a format that permits an essentially unlimited number of rows.  to be created.
53    
54    =item ignore
55    
56    TRUE if the data is to be discarded. This is used to save time when only
57    a subset of the tables need to be loaded: the data for the ignored tables
58    is simply discarded.
59    
60  =back  =back
61    
# Line 57  Line 63 
63    
64  sub new {  sub new {
65      # Get the parameters.      # Get the parameters.
66      my ($class, $erdb, $relationName, $directory, $estimatedRows) = @_;      my ($class, $erdb, $relationName, $directory, $loadOnly, $ignore) = @_;
67      # Validate the directory name.      # Validate the directory name.
68      if (! -d $directory) {      if (! -d $directory) {
69          Confess("Load directory \"$directory\" not found.");          Confess("Load directory \"$directory\" not found.");
70      }      }
71      # Determine the name for this relation's load file.      # Determine the name for this relation's load file.
72      my $fileName = "$directory/$relationName.dtx";      my $fileName = "$directory/$relationName.dtx";
73      # If this is a primary entity relation, sort the output to remove      # Declare the file handle variable.
74      # duplicate keys.      my $fileHandle;
75      my $fileString = ($erdb->IsEntity($relationName) ?      # Determine whether or not this is a primary relation.
76                          "| sort +0 -1 -u >$fileName" :      my $primary = ($erdb->_IsPrimary($relationName) ? 1 : 0);
77                          ">$fileName");      # Check to see if this is a load-only, ignore, or a generate-and-load.
78        if ($ignore) {
79            Trace("Relation $relationName will be ignored.") if T(2);
80            $fileHandle = "";
81        } elsif ($loadOnly) {
82            Trace("Relation $relationName will be loaded from $fileName.") if T(2);
83            $fileHandle = "";
84        } else {
85            # Compute the file namefor this relation. We will build a file on
86            # disk and then sort it into the real file when we're done.
87            my $fileString = ">$fileName.tmp";
88      # Open the output file and remember its handle.      # Open the output file and remember its handle.
89      my $fileHandle = Open(undef, $fileString);          $fileHandle = Open(undef, $fileString);
90            Trace("Relation $relationName load file created with primary flag $primary.") if T(2);
91        }
92      # Create the $erload object.      # Create the $erload object.
93      my $retVal = {      my $retVal = {
94                    dbh => $erdb,                    dbh => $erdb,
# Line 80  Line 98 
98                    fileSize => 0,                    fileSize => 0,
99                    lineCount => 0,                    lineCount => 0,
100                    stats => Stats->new(),                    stats => Stats->new(),
101                    primary => $erdb->_IsPrimary($relationName)                    primary => $primary,
102                      ignore => ($ignore ? 1 : 0)
103                   };                   };
104      # Bless and return it.      # Bless and return it.
105      bless $retVal, $class;      bless $retVal, $class;
106      return $retVal;      return $retVal;
107  }  }
108    
109    =head3 Ignore
110    
111        my $flag = $erload->Ignore;
112    
113    Return TRUE if we are ignoring this table, else FALSE.
114    
115    =cut
116    #: Return Type $;
117    sub Ignore {
118        # Get the parameters.
119        my ($self) = @_;
120        # Return the result.
121        return $self->{ignore};
122    }
123    
124  =head3 Put  =head3 Put
125    
126  C<< my  = $erload->Put($field1, $field2, ..., $fieldN); >>      my  = $erload->Put($field1, $field2, ..., $fieldN);
127    
128  Write a line of data to the load file. This may also cause the load file to be closed  Write a line of data to the load file. This may also cause the load file to be closed
129  and data read into the table.  and data read into the table.
# Line 107  Line 141 
141  =cut  =cut
142  #: Return Type ;  #: Return Type ;
143  sub Put {  sub Put {
144      # Get the ERDBLoad instance.      # Get the ERDBLoad instance and the field list.
145      my $self = shift @_;      my ($self, @rawFields) = @_;
146        # Only proceed if we're not ignoring.
147        if (! $self->{ignore}) {
148            # Convert the hash-string fields to their digested value.
149            $self->{dbh}->DigestFields($self->{relName}, \@rawFields);
150            # Insure the field values are okay.
151            my $truncates = $self->{dbh}->VerifyFields($self->{relName}, \@rawFields);
152      # Run through the list of field values, escaping them.      # Run through the list of field values, escaping them.
153      my @fields = map { Tracer::Escape($_) } @_;          my @fields = map { Tracer::Escape($_) } @rawFields;
     # If this is a primary relation, append the new-record field.  
     if ($self->{primary}) {  
         push @fields, '0';  
     }  
154      # Form a data line from the fields.      # Form a data line from the fields.
155      my $line = join("\t", @fields) . "\n";      my $line = join("\t", @fields) . "\n";
156      # Write the new record to the load file.      # Write the new record to the load file.
# Line 126  Line 162 
162      $self->{fileSize} += $lineLength;      $self->{fileSize} += $lineLength;
163      $self->{lineCount} ++;      $self->{lineCount} ++;
164      $self->Add("lineOut");      $self->Add("lineOut");
165            if ($truncates > 0) {
166                $self->Add("truncated", $truncates);
167            }
168        }
169  }  }
170    
171  =head3 Add  =head3 Add
172    
173  C<< my  = $stats->Add($statName); >>      my  = $stats->Add($statName, $value);
174    
175  Increment the specified statistic.  Increment the specified statistic.
176    
# Line 140  Line 180 
180    
181  Name of the statistic to increment.  Name of the statistic to increment.
182    
183    =item value (optional)
184    
185    Value by which to increment it. If omitted, C<1> is assumed.
186    
187  =back  =back
188    
189  =cut  =cut
190  #: Return Type ;  #: Return Type ;
191  sub Add {  sub Add {
192      # Get the parameters.      # Get the parameters.
193      my ($self, $statName) = @_;      my ($self, $statName, $value) = @_;
194        # Fix the value.
195        if (! defined $value) {
196            $value = 1;
197        }
198      # Increment the statistic.      # Increment the statistic.
199      $self->{stats}->Add($statName);      $self->{stats}->Add($statName, $value);
200  }  }
201    
202  =head3 Finish  =head3 Finish
203    
204  C<< my $stats = $erload->Finish(); >>      my $stats = $erload->Finish();
205    
206  Finish loading the table. This closes the load file and loads its contents into the database.  Finish loading the table. This closes and sorts the load file.
 It also creates the indexes if the DBMS uses post-indexing.  
207    
208  =over 4  =over 4
209    
# Line 172  Line 219 
219  sub Finish {  sub Finish {
220      # Get this object instance.      # Get this object instance.
221      my ($self) = @_;      my ($self) = @_;
222        if ($self->{fh}) {
223      # Close the load file.      # Close the load file.
224      close $self->{fh};      close $self->{fh};
225            # Get the ERDB object.
226            my $erdb = $self->{dbh};
227            # Get the output file name.
228            my $fileName = $self->{fileName};
229            # Get the sort command for this relation.
230            my $sortCommand = $erdb->SortNeeded($self->{relName});
231            # Open the temp file for input via sort.
232            my $ih = Open(undef, "$sortCommand <$fileName.tmp |");
233            # Open the real file for output.
234            my $oh = Open(undef, ">$fileName");
235            Trace("Sorting into $fileName with command: $sortCommand") if T(3);
236            # Set up a timer.
237            my $start = time();
238            # Sort from the temp file into the output file.
239            while (! eof $ih) {
240                print $oh (<$ih>);
241            }
242            # Record the time spent
243            $self->{stats}->Add(sortTime => (time() - $start));
244            # Delete the temp file.
245            unlink "$fileName.tmp";
246            # Tell the user we're done.
247            Trace("Load file $fileName created.") if T(3);
248    
249        }
250      # Return the statistics object.      # Return the statistics object.
251      return $self->{stats};      return $self->{stats};
252  }  }
253    
254    =head3 FinishAndLoad
255    
256        my $stats = $erload->FinishAndLoad();
257    
258    Finish the load and load the table, returning the statistics.
259    
260    =cut
261    
262    sub FinishAndLoad {
263        # Get the parameters.
264        my ($self) = @_;
265        # Finish the load file.
266        my $retVal = $self->Finish();
267        # Load the table.
268        my $newStats = $self->LoadTable();
269        # Accumulate the stats.
270        $retVal->Accumulate($newStats);
271        # Return the result.
272        return $retVal;
273    }
274    
275  =head3 RelName  =head3 RelName
276    
277  C<< my $name = $erload->RelName; >>      my $name = $erload->RelName;
278    
279  Name of the relation being loaded by this object.  Name of the relation being loaded by this object.
280    
# Line 193  Line 287 
287      return $self->{relName};      return $self->{relName};
288  }  }
289    
290    =head3 LoadTable
291    
292        my $stats = $erload->LoadTable();
293    
294    Load the database table from the load file and return a statistics object.
295    
296    =cut
297    
298    sub LoadTable {
299        # Get the parameters.
300        my ($self) = @_;
301        # Get the database object, the file name, and the relation name.
302        my $erdb = $self->{dbh};
303        my $fileName = $self->{fileName};
304        my $relName = $self->{relName};
305        # Load the table. The third parameter indicates this is a drop and reload.
306        my $retVal = $erdb->LoadTable($fileName, $relName, truncate => 1);
307        # Return the result.
308        return $retVal;
309    }
310    
311  1;  1;
312    

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.23

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3