[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package ERDBLoad;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use ERDB;
9 :     use Stats;
10 :    
11 :     =head1 ERDB Table Load Utility Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     This object is designed to assist with loading an ERDB data relation. The user
16 :     constructs the object by specifying an ERDB object and a relation name. This
17 :     re-creates the relevant relation. The client then passes in data lines which
18 :     are written to a file. When the file gets big enough, it is loaded into the
19 :     table. Calling the L</Finish> method loads any leftover data and optionally
20 :     creates the index.
21 :    
22 :     This module makes use of the internal ERDB property C<_dbh> and the internal
23 :     method C<_IsPrimary>.
24 :    
25 :     =cut
26 :    
27 :     #
28 :    
29 :     =head2 Public Methods
30 :    
31 :     =head3 new
32 :    
33 :     C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>
34 :    
35 :     Begin loading an ERDB relation.
36 :    
37 :     =over 4
38 :    
39 :     =item erdb
40 :    
41 :     ERDB object representing the target database.
42 :    
43 :     =item relationName
44 :    
45 :     Name of the relation being loaded.
46 :    
47 :     =item directory
48 :    
49 :     Name of the directory to use for the load files, WITHOUT a trailing slash.
50 :    
51 :     =back
52 :    
53 :     =cut
54 :    
55 :     sub new {
56 :     # Get the parameters.
57 :     my ($class, $erdb, $relationName, $directory) = @_;
58 :     # Validate the directory name.
59 :     if (! -d $directory) {
60 :     Confess("Load directory \"$directory\" not found.");
61 :     }
62 :     # Determine the name for this relation's load file.
63 :     my $fileName = "$directory/$relationName.dtx";
64 :     # Create the target table. If this is a pre-index DBMS, we
65 :     # also create the indexes. If the table already exists,
66 :     # it will be dropped.
67 :     $erdb->CreateTable($relationName, $FIG_Config::preIndex);
68 :     # Open the output file and remember its handle.
69 :     my $fileHandle = Open(undef, ">$fileName");
70 :     # Create the $erload object.
71 :     my $retVal = {
72 :     dbh => $erdb,
73 :     fh => $fileHandle,
74 :     fileName => $fileName,
75 :     relName => $relationName,
76 :     fileSize => 0,
77 :     lineCount => 0,
78 :     stats => Stats->new(),
79 :     primary => $erdb->_IsPrimary($relationName)
80 :     };
81 :     # Bless and return it.
82 :     bless $retVal, $class;
83 :     return $retVal;
84 :     }
85 :    
86 :     =head3 Put
87 :    
88 :     C<< my = $erload->Put($field1, $field2, ..., $fieldN); >>
89 :    
90 :     Write a line of data to the load file. This may also cause the load file to be closed
91 :     and data read into the table.
92 :    
93 :     =over 4
94 :    
95 :     =item field1, field2, ..., fieldN
96 :    
97 :     List of field values to be put into the data line. The field values must be in the
98 :     order determined shown in the documentation for the table. Internal tabs and
99 :     new-lines will automatically be escaped before the data line is formatted.
100 :    
101 :     =back
102 :    
103 :     =cut
104 :     #: Return Type ;
105 :     sub Put {
106 :     # Get the ERDBLoad instance.
107 :     my $self = shift @_;
108 :     # Run through the list of field values, escaping them.
109 :     my @fields = map { Tracer::Escape($_) } @_;
110 :     # If this is a primary relation, append the new-record field.
111 :     if ($self->{primary}) {
112 :     push @fields, '0';
113 :     }
114 :     # Form a data line from the fields.
115 :     my $line = join("\t", @fields) . "\n";
116 :     # Determine how long this will make the load file.
117 :     my $lineLength = length $line;
118 :     if ($lineLength > (200000000 - $self->{fileSize})) {
119 :     # Here it would be too long, so we force a load.
120 :     $self->Flush();
121 :     }
122 :     # Write the new record to the load file.
123 :     my $fh = $self->{fh};
124 :     print $fh $line;
125 :     # Update the statistics.
126 :     $self->{fileSize} += $lineLength;
127 :     $self->{lineCount} ++;
128 :     }
129 :    
130 :     =head3 Flush
131 :    
132 :     C<< $erload->Flush(); >>
133 :    
134 :     Load all the data currently in the load file into the database. This clears the load
135 :     file and re-opens it.
136 :    
137 :     =cut
138 :     #: Return Type ;
139 :     sub Flush {
140 :     # Get the parameters.
141 :     my ($self) = @_;
142 :     # Flush the data in the load file.
143 :     $self->_FlushData();
144 :     # Re-open the file so it can accept more data.
145 :     $self->_ReOpen();
146 :     }
147 :    
148 :     =head3 Finish
149 :    
150 :     C<< my $stats = $erload->Finish(); >>
151 :    
152 :     Finish loading the table. This closes the load file and loads its contents into the database.
153 :     It also creates the indexes if the DBMS uses post-indexing.
154 :    
155 :     =over 4
156 :    
157 :     =item RETURN
158 :    
159 :     Returns a statistics object describing what happened during the load and containing any
160 :     error messages.
161 :    
162 :     =back
163 :    
164 :     =cut
165 :    
166 :     sub Finish {
167 :     # Get this object instance.
168 :     my ($self) = @_;
169 :     # Flush out the data in the load file.
170 :     $self->_FlushData();
171 :     # If this is a post-index DBMS, create the indexes.
172 :     if (! $FIG_Config::preIndex) {
173 :     $self->{erdb}->CreateIndex($self->RelName);
174 :     }
175 :     # Delete the load file.
176 :     unlink $self->{fileName};
177 :     # Return the statistics object.
178 :     return $self->{stats};
179 :     }
180 :    
181 :     =head3 RelName
182 :    
183 :     C<< my $name = $erload->RelName; >>
184 :    
185 :     Name of the relation being loaded by this object.
186 :    
187 :     =cut
188 :    
189 :     sub RelName {
190 :     # Get the object instance.
191 :     my ($self) = @_;
192 :     # Return the relation name.
193 :     return $self->{relName};
194 :     }
195 :    
196 :     =head2 Internal Methods
197 :    
198 :     =head3 ReOpen
199 :    
200 :     Re-open the load file.
201 :    
202 :     This is an instance method.
203 :    
204 :     =cut
205 :    
206 :     sub _ReOpen {
207 :     # Get this instance.
208 :     my ($self) = @_;
209 :     # Open the file with the current filehandle in truncate mode.
210 :     Open($self->{fh}, ">" . $self->{fileName});
211 :     # Denote the file is empty.
212 :     $self->{fileSize} = 0;
213 :     $self->{lineCount} = 0;
214 :     }
215 :    
216 :     =head3 FlushData
217 :    
218 :     Close the load file and load all its data into the table.
219 :    
220 :     This is an instance method.
221 :    
222 :     =cut
223 :    
224 :     sub _FlushData {
225 :     # Get this instance.
226 :     my ($self) = @_;
227 :     # Get the relation name.
228 :     my $relName = $self->RelName;
229 :     Trace("Flushing data to table $relName.") if T(2);
230 :     # Close the load file.
231 :     close $self->{fh};
232 :     # We must use the load file to load the table. First, we get the DBKernel
233 :     # handle and the statistics object.
234 :     my $stats = $self->{stats};
235 :     my $dbh = $self->{dbh}->{_dbh};
236 :     # Begin a database transaction. This is not actually for integrity reasons; it
237 :     # speeds up the slow load process.
238 :     $dbh->begin_tran();
239 :     # Load the database table safely.
240 :     my $rv;
241 :     eval {
242 :     Trace("Loading file into relation $relName.") if T(3);
243 :     $rv = $dbh->load_table(file => $self->{fileName}, tbl => $relName);
244 :     };
245 :     # Check to see if we succeeded.
246 :     if (!defined $rv) {
247 :     # We've failed. Format a useful message. If we have an error message from
248 :     # EVAL, we use it.
249 :     my $msg = "Table load failed for $relName" . ($@ ? ": $@" : ".");
250 :     $stats->AddMessage($msg);
251 :     Trace($msg) if T(1);
252 :     } else {
253 :     # Here we successfully loaded the table. Trace the number of records loaded.
254 :     my $lineCount = $self->{lineCount};
255 :     my $byteCount = $self->{fileSize};
256 :     Trace("$lineCount records ($byteCount bytes) loaded into $relName.") if T(2);
257 :     # Accumulate the statistics.
258 :     $stats->Add("records", $lineCount);
259 :     $stats->Add("bytes", $byteCount);
260 :     }
261 :     # Close the database transaction.
262 :     $dbh->commit_tran();
263 :     }
264 :    
265 :     1;
266 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3