[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package ERDBLoad;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use ERDB;
9 :     use Stats;
10 :    
11 :     =head1 ERDB Table Load Utility Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     This object is designed to assist with loading an ERDB data relation. The user
16 :     constructs the object by specifying an ERDB object and a relation name. This
17 :     re-creates the relevant relation. The client then passes in data lines which
18 :     are written to a file. When the file gets big enough, it is loaded into the
19 :     table. Calling the L</Finish> method loads any leftover data and optionally
20 :     creates the index.
21 :    
22 :     This module makes use of the internal ERDB property C<_dbh> and the internal
23 :     method C<_IsPrimary>.
24 :    
25 :     =cut
26 :    
27 :     #
28 :    
29 :     =head2 Public Methods
30 :    
31 :     =head3 new
32 :    
33 :     C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>
34 :    
35 :     Begin loading an ERDB relation.
36 :    
37 :     =over 4
38 :    
39 :     =item erdb
40 :    
41 :     ERDB object representing the target database.
42 :    
43 :     =item relationName
44 :    
45 :     Name of the relation being loaded.
46 :    
47 :     =item directory
48 :    
49 :     Name of the directory to use for the load files, WITHOUT a trailing slash.
50 :    
51 : parrello 1.2 =item estimatedRows (optional)
52 :    
53 :     Estimated maximum number of table rows. If omitted, the table will be created in
54 :     a format that permits an essentially unlimited number of rows.
55 :    
56 : parrello 1.1 =back
57 :    
58 :     =cut
59 :    
60 :     sub new {
61 :     # Get the parameters.
62 : parrello 1.2 my ($class, $erdb, $relationName, $directory, $estimatedRows) = @_;
63 : parrello 1.1 # Validate the directory name.
64 :     if (! -d $directory) {
65 :     Confess("Load directory \"$directory\" not found.");
66 :     }
67 :     # Determine the name for this relation's load file.
68 :     my $fileName = "$directory/$relationName.dtx";
69 : parrello 1.2 # Decide whether or not we should specify estimated rows.
70 :     my $rowEstimate = ($FIG_Config::estimate_rows ? $estimatedRows : undef);
71 : parrello 1.1 # Create the target table. If this is a pre-index DBMS, we
72 :     # also create the indexes. If the table already exists,
73 :     # it will be dropped.
74 : parrello 1.2 $erdb->CreateTable($relationName, $FIG_Config::preIndex, $rowEstimate);
75 : parrello 1.1 # Open the output file and remember its handle.
76 :     my $fileHandle = Open(undef, ">$fileName");
77 :     # Create the $erload object.
78 :     my $retVal = {
79 :     dbh => $erdb,
80 :     fh => $fileHandle,
81 :     fileName => $fileName,
82 :     relName => $relationName,
83 :     fileSize => 0,
84 :     lineCount => 0,
85 :     stats => Stats->new(),
86 :     primary => $erdb->_IsPrimary($relationName)
87 :     };
88 :     # Bless and return it.
89 :     bless $retVal, $class;
90 :     return $retVal;
91 :     }
92 :    
93 :     =head3 Put
94 :    
95 :     C<< my = $erload->Put($field1, $field2, ..., $fieldN); >>
96 :    
97 :     Write a line of data to the load file. This may also cause the load file to be closed
98 :     and data read into the table.
99 :    
100 :     =over 4
101 :    
102 :     =item field1, field2, ..., fieldN
103 :    
104 :     List of field values to be put into the data line. The field values must be in the
105 :     order determined shown in the documentation for the table. Internal tabs and
106 :     new-lines will automatically be escaped before the data line is formatted.
107 :    
108 :     =back
109 :    
110 :     =cut
111 :     #: Return Type ;
112 :     sub Put {
113 :     # Get the ERDBLoad instance.
114 :     my $self = shift @_;
115 :     # Run through the list of field values, escaping them.
116 :     my @fields = map { Tracer::Escape($_) } @_;
117 :     # If this is a primary relation, append the new-record field.
118 :     if ($self->{primary}) {
119 :     push @fields, '0';
120 :     }
121 :     # Form a data line from the fields.
122 :     my $line = join("\t", @fields) . "\n";
123 :     # Determine how long this will make the load file.
124 :     my $lineLength = length $line;
125 :     if ($lineLength > (200000000 - $self->{fileSize})) {
126 :     # Here it would be too long, so we force a load.
127 :     $self->Flush();
128 :     }
129 :     # Write the new record to the load file.
130 :     my $fh = $self->{fh};
131 :     print $fh $line;
132 :     # Update the statistics.
133 :     $self->{fileSize} += $lineLength;
134 :     $self->{lineCount} ++;
135 :     }
136 :    
137 :     =head3 Flush
138 :    
139 :     C<< $erload->Flush(); >>
140 :    
141 :     Load all the data currently in the load file into the database. This clears the load
142 :     file and re-opens it.
143 :    
144 :     =cut
145 :     #: Return Type ;
146 :     sub Flush {
147 :     # Get the parameters.
148 :     my ($self) = @_;
149 :     # Flush the data in the load file.
150 :     $self->_FlushData();
151 :     # Re-open the file so it can accept more data.
152 :     $self->_ReOpen();
153 :     }
154 :    
155 :     =head3 Finish
156 :    
157 :     C<< my $stats = $erload->Finish(); >>
158 :    
159 :     Finish loading the table. This closes the load file and loads its contents into the database.
160 :     It also creates the indexes if the DBMS uses post-indexing.
161 :    
162 :     =over 4
163 :    
164 :     =item RETURN
165 :    
166 :     Returns a statistics object describing what happened during the load and containing any
167 :     error messages.
168 :    
169 :     =back
170 :    
171 :     =cut
172 :    
173 :     sub Finish {
174 :     # Get this object instance.
175 :     my ($self) = @_;
176 :     # Flush out the data in the load file.
177 :     $self->_FlushData();
178 :     # If this is a post-index DBMS, create the indexes.
179 :     if (! $FIG_Config::preIndex) {
180 : parrello 1.2 $self->{dbh}->CreateIndex($self->RelName);
181 : parrello 1.1 }
182 :     # Delete the load file.
183 :     unlink $self->{fileName};
184 :     # Return the statistics object.
185 :     return $self->{stats};
186 :     }
187 :    
188 :     =head3 RelName
189 :    
190 :     C<< my $name = $erload->RelName; >>
191 :    
192 :     Name of the relation being loaded by this object.
193 :    
194 :     =cut
195 :    
196 :     sub RelName {
197 :     # Get the object instance.
198 :     my ($self) = @_;
199 :     # Return the relation name.
200 :     return $self->{relName};
201 :     }
202 :    
203 :     =head2 Internal Methods
204 :    
205 :     =head3 ReOpen
206 :    
207 :     Re-open the load file.
208 :    
209 :     This is an instance method.
210 :    
211 :     =cut
212 :    
213 :     sub _ReOpen {
214 :     # Get this instance.
215 :     my ($self) = @_;
216 :     # Open the file with the current filehandle in truncate mode.
217 :     Open($self->{fh}, ">" . $self->{fileName});
218 :     # Denote the file is empty.
219 :     $self->{fileSize} = 0;
220 :     $self->{lineCount} = 0;
221 :     }
222 :    
223 :     =head3 FlushData
224 :    
225 :     Close the load file and load all its data into the table.
226 :    
227 :     This is an instance method.
228 :    
229 :     =cut
230 :    
231 :     sub _FlushData {
232 :     # Get this instance.
233 :     my ($self) = @_;
234 :     # Get the relation name.
235 :     my $relName = $self->RelName;
236 :     Trace("Flushing data to table $relName.") if T(2);
237 :     # Close the load file.
238 :     close $self->{fh};
239 :     # We must use the load file to load the table. First, we get the DBKernel
240 :     # handle and the statistics object.
241 :     my $stats = $self->{stats};
242 :     my $dbh = $self->{dbh}->{_dbh};
243 :     # Begin a database transaction. This is not actually for integrity reasons; it
244 :     # speeds up the slow load process.
245 :     $dbh->begin_tran();
246 :     # Load the database table safely.
247 :     my $rv;
248 :     eval {
249 :     Trace("Loading file into relation $relName.") if T(3);
250 :     $rv = $dbh->load_table(file => $self->{fileName}, tbl => $relName);
251 :     };
252 :     # Check to see if we succeeded.
253 :     if (!defined $rv) {
254 :     # We've failed. Format a useful message. If we have an error message from
255 :     # EVAL, we use it.
256 :     my $msg = "Table load failed for $relName" . ($@ ? ": $@" : ".");
257 :     $stats->AddMessage($msg);
258 :     Trace($msg) if T(1);
259 :     } else {
260 :     # Here we successfully loaded the table. Trace the number of records loaded.
261 :     my $lineCount = $self->{lineCount};
262 :     my $byteCount = $self->{fileSize};
263 :     Trace("$lineCount records ($byteCount bytes) loaded into $relName.") if T(2);
264 :     # Accumulate the statistics.
265 :     $stats->Add("records", $lineCount);
266 :     $stats->Add("bytes", $byteCount);
267 :     }
268 :     # Close the database transaction.
269 :     $dbh->commit_tran();
270 :     }
271 :    
272 :     1;
273 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3