[Bio] / FigKernelPackages / DBKernel.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/DBKernel.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.63 - (view) (download) (as text)

1 : gdpusch 1.52 # -*- perl -*-
2 :     ########################################################################
3 : olson 1.18 # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 : parrello 1.49 #
8 : olson 1.18 # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 : parrello 1.49 # Public License.
11 : olson 1.18 #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 : gdpusch 1.52 ########################################################################
18 : olson 1.18
19 : parrello 1.1 package DBKernel;
20 :    
21 :     use strict;
22 :     use DBI;
23 :     use Tracer;
24 :     use Data::Dumper;
25 : olson 1.22 use FileHandle;
26 : olson 1.11 use Carp;
27 : parrello 1.1
28 :     =head1 Reduced-Instruction Database Kernel
29 :    
30 :     This is a reduced-function subset of the B<DBRtns> package that was created for
31 :     reasons that made sense to Bruce before they changed his medication but which he
32 :     cannot now remember. At some point it will be merged into DBRtns proper. For now,
33 :     it functions as the DBRtns base class.
34 :    
35 :     =cut
36 :    
37 :     #
38 :    
39 :     =head2 Public Methods
40 :    
41 :     =head3 new
42 :    
43 : parrello 1.49 my $dbh = DBKernel->new($dbms, $dbname, $dbuser, $dbpass, $dbport, $dbhost, $dbsock);
44 : parrello 1.1
45 :     Construct a database object. This process creates a standard PERL DBI handle and
46 :     caches it for our use.
47 :    
48 :     =over 4
49 :    
50 :     =item dbms
51 :    
52 :     The name of the DBMS system. Currently, this is either C<mysql> for MySQL or
53 :     C<Pg> for PostGres.
54 :    
55 :     =item dbname
56 :    
57 :     The name of the database to use.
58 :    
59 :     =item dbuser
60 :    
61 :     The user whose credentials should be used to open the database.
62 :    
63 :     =item dbpass
64 :    
65 :     Password associated with the specified user.
66 :    
67 :     =item dbport
68 :    
69 :     TCP/IP port to use. Usually this is 3306.
70 :    
71 : olson 1.3 =item dbhost
72 :    
73 :     Hostname of the database server to use. Undefined means to use the local host (note
74 :     that this may be different than a hostname of localhost - postgres, for instance, will
75 :     use a more efficient mechansim if no hostname is specified).
76 :    
77 : olson 1.23 =item dbsock
78 :    
79 : parrello 1.36 Pathname to the Unix socket the database is listening on. Undefined means the local host.
80 : olson 1.23
81 : parrello 1.1 =item RETURN
82 :    
83 :     A newly-constructed object connected to the specified database.
84 :    
85 :     =back
86 :    
87 :     =cut
88 :     sub new {
89 : olson 1.23 my ($class, $dbms, $dbname, $dbuser, $dbpass, $dbport, $dbhost, $dbsock) = @_;
90 : parrello 1.1
91 :     my @opts;
92 :    
93 : olson 1.3 if (defined($dbport))
94 :     {
95 : parrello 1.5 push(@opts, "port=${dbport}");
96 : olson 1.3 }
97 :    
98 : olson 1.23 if ($dbms eq "mysql")
99 : olson 1.3 {
100 : parrello 1.37 if ($dbhost)
101 : olson 1.23 {
102 :     push(@opts, "hostname=$dbhost");
103 :     }
104 : parrello 1.37 if ($dbsock)
105 : olson 1.23 {
106 :     push(@opts, "mysql_socket=$dbsock");
107 :     }
108 : olson 1.3 }
109 : olson 1.23 elsif ($dbms eq "Pg")
110 : olson 1.4 {
111 : olson 1.23 if (defined($dbhost))
112 :     {
113 :     push(@opts, "host=$dbhost");
114 :     }
115 : olson 1.4 }
116 : parrello 1.49
117 : parrello 1.1
118 :     #
119 :     # Late-model mysql needs to have the client enable loading from local files.
120 :     #
121 :     if ($dbms eq "mysql") {
122 :     push(@opts, "mysql_local_infile=1");
123 :     }
124 :    
125 : parrello 1.36 # Decide if this is a pre-index or post-index DBMS. The "preIndex" variable in
126 :     # FIG_Config determines whether this is a pre-index or post-index. This capability
127 :     # was introduced for performance testing.
128 : parrello 1.7 my $preload = $FIG_Config::preIndex;
129 : parrello 1.36 # Now connect to the database.
130 : parrello 1.1 my $opts = join(";", @opts);
131 :     my $data_source = "DBI:$dbms(AutoCommit => 1):dbname=$dbname;$opts";
132 : parrello 1.51 Trace("Connect string is: $data_source") if T(3);
133 : parrello 1.56 my $dbh = Connect($data_source, $dbuser, $dbpass, $dbms);
134 : parrello 1.1 bless {
135 : parrello 1.56 _connect => [$data_source, $dbuser, $dbpass],
136 : parrello 1.1 _dbh => $dbh,
137 :     _dbms => $dbms,
138 : parrello 1.5 _preIndex => $preload,
139 : parrello 1.53 _host => ($dbhost || "localhost"),
140 : parrello 1.56 _retries => 0,
141 : parrello 1.5 }, $class;
142 : parrello 1.1 }
143 :    
144 : parrello 1.56 =head3 Connect
145 :    
146 :     my $dbh = DBKernel::Connect($data_source, $dbuser, $dbpass, $dbms);
147 :    
148 :     Connect to the database using the specified information. This method has
149 :     been separated out from the constructor to make it possible to reconnect
150 :     after a connection failure.
151 :    
152 :     =over 4
153 :    
154 :     =item data_source
155 :    
156 :     Connection string for the database itself.
157 :    
158 :     =item dbuser
159 :    
160 :     User name for accessing the database.
161 :    
162 :     =item dbpass
163 :    
164 :     Password for the user name.
165 :    
166 :     =item dbms
167 :    
168 :     Database type (C<mysql>, C<Pg>, C<SQLite>).
169 :    
170 :     =item RETURN
171 :    
172 :     Returns the handle to the database.
173 :    
174 :     =back
175 :    
176 :     =cut
177 :    
178 :     sub Connect {
179 :     my ($data_source, $dbuser, $dbpass, $dbms) = @_;
180 : parrello 1.63 my $retVal = DBI->connect( $data_source, $dbuser, $dbpass );
181 :     if (! $retVal) {
182 :     my $msg = ErrorMessage($dbms);
183 :     Confess($msg);
184 :     }
185 : parrello 1.56 $retVal->{PrintError} = 1;
186 :     $retVal->{RaiseError} = 0;
187 :     if ($dbms eq "Pg") {
188 :     $retVal->do(qq(SET "ENABLE_SEQSCAN" TO "OFF"));
189 :     $retVal->do(qq(SET DATESTYLE TO Postgres,US));
190 : parrello 1.58 } elsif ($dbms eq "SQLite") {
191 : parrello 1.62 $retVal->do("pragma synchronous = OFF;");
192 :     } elsif ($dbms eq "mysql") {
193 :     $retVal->{mysql_auto_reconnect} = 1;
194 : parrello 1.56 }
195 :     return $retVal;
196 :     }
197 :    
198 :     =head3 set_retries
199 :    
200 :     $db->set_retries($count);
201 :    
202 :     Specify the number of times a SELECT should be retried before failing.
203 : parrello 1.53
204 : parrello 1.56 =cut
205 :    
206 :     sub set_retries {
207 :     my ($self, $count) = @_;
208 :     $self->{_retries} = $count;
209 :     }
210 : parrello 1.53
211 : parrello 1.59 =head3 test_mode
212 :    
213 :     $db->test_mode();
214 :    
215 :     Denote that this connection is in test mode. Certain
216 :     performance-enhancing features may be disabled in test mode.
217 :    
218 :     =cut
219 :    
220 :     sub test_mode {
221 :     # Get the parameters.
222 :     my ($self) = @_;
223 :     # Denote that we're in test mode.
224 :     $self->{testFlag} = 1;
225 :     # If we're mySQL, turn off the query cache.
226 :     if ($self->{_dbms} eq 'mysql') {
227 :     $self->{_dbh}->do("SET SESSION query_cache_type = OFF");
228 :     }
229 :     }
230 :    
231 :    
232 : overbeek 1.35 =head3 set_readonly_handle
233 :    
234 :     C<<$db->set_readonly_handle($readonly_db); >>
235 :    
236 :     Set up a DBKernel instance that should be use to make readonly (select) queries
237 :     with. This is used in a mirroring setup where any queries that change the database
238 :     are made on an external database, but readonly queries can be made on a local mirror
239 :     for better performance.
240 :    
241 :     =cut
242 :    
243 :     sub set_readonly_handle
244 :     {
245 :     my($self, $h) = @_;
246 :    
247 :     #warn "setting readonly handle for db\n";
248 :    
249 :     $self->{_ro_dbobj} = $h;
250 :     $self->{_ro_dbh} = $h->{_dbh};
251 :     }
252 : parrello 1.49
253 : parrello 1.1 =head3 set_raise_exceptions
254 :    
255 : parrello 1.49 my $oldValue = $db->set_raise_exceptions($newValue);
256 : parrello 1.1
257 :     Set the B<RaiseError> flag to a new value. If the flag is C<1>, then a database
258 :     error will throw an exception. If it is C<0>, an error will be reflected by a
259 :     return value.
260 :    
261 :     =over 4
262 :    
263 :     =item newValue
264 :    
265 :     C<1> if you want errors to throw an exception, C<0> if you want to continue
266 :     processing after errors.
267 :    
268 :     =item RETURN
269 :    
270 :     Returns the previous value of the flag.
271 :    
272 :     =back
273 :    
274 :     =cut
275 :    
276 :     sub set_raise_exceptions {
277 :     my($self, $enable) = @_;
278 :     my $dbh = $self->{_dbh};
279 :     my $old = $dbh->{RaiseError};
280 :     $dbh->{RaiseError} = $enable;
281 :     return $old;
282 :     }
283 :    
284 : parrello 1.29 =head3 CreateDB
285 :    
286 : parrello 1.49 DBKernel::CreateDB($dbname);
287 : parrello 1.29
288 :     Drop and create a database with the specified name. If the drop fails it will generate
289 :     an error message, but will not be considered an error.
290 :    
291 : parrello 1.56 This method is deprecated, since the database will be created without the necessary
292 :     security privileges.
293 :    
294 : parrello 1.29 =over 4
295 :    
296 :     =item dbname
297 :    
298 :     Name of the database to drop and create.
299 :    
300 :     =back
301 :    
302 :     =cut
303 :    
304 :     sub CreateDB {
305 :     # Get the database name.
306 :     my ($dbname) = @_;
307 :     # Check the database type, since we'll be doing direct database utility calls.
308 :     if ($FIG_Config::dbms eq "Pg") {
309 :     my $dbport = $FIG_Config::dbport;
310 :     my $dbuser = $FIG_Config::dbuser;
311 : gdpusch 1.52 Trace("Dropping old database $dbname (failure is okay)") if T(2);
312 : parrello 1.29 system("dropdb -p $dbport -U $dbuser $dbname");
313 : gdpusch 1.52 Trace("Creating new database: $dbname $dbuser $dbport") if T(2);
314 : parrello 1.29 &FIG::run("createdb -p $dbport -U $dbuser $dbname");
315 :     } elsif ($FIG_Config::dbms eq "mysql") {
316 : gdpusch 1.52 Trace("Dropping old database $dbname (failure is okay).") if T(2);
317 : parrello 1.29 system("mysqladmin -u $FIG_Config::dbuser -p drop $dbname");
318 : gdpusch 1.52 Trace("Creating new database: $dbname $FIG_Config::dbuser") if T(2);
319 : parrello 1.29 &FIG::run("mysqladmin -u $FIG_Config::dbuser -p create $dbname");
320 :     }
321 : parrello 1.49
322 : parrello 1.29 }
323 :    
324 :    
325 : parrello 1.1 =head3 SQL
326 :    
327 : parrello 1.49 my $rv = $db->SQL($sql, $verbose, @bind_values);
328 : parrello 1.1
329 :     Execute an SQL statement. If used for a SELECT statement, the entire result set will be
330 :     returned via an array reference. If used for another statement type, the result will be
331 :     a count of the number of rows affected. Note that the type of statement is determined by
332 :     a simple case-insensitive prefix match. If the first 6 characters of the command are
333 :     C<SELECT> in any combination of upper- and lower-case, then the statement is treated as
334 :     a query; otherwise it's treated as a command.
335 :    
336 :     =over 4
337 :    
338 :     =item sql
339 :    
340 :     SQL statement to execute.
341 :    
342 :     =item verbose
343 :    
344 :     C<1> if the command should be traced, else C<0>. This option is deprecated. You
345 :     can cause SQL commands to be traced by setting the trace level for C<DBKernel>
346 : parrello 1.26 to 3 (information).
347 : parrello 1.1
348 : parrello 1.2 =item bind_values1, bind_values2, ... bind_valuesN
349 :    
350 :     List of bound values to be used to replace the parameter markers (C<?>) in the
351 :     SQL statement.
352 :    
353 : parrello 1.1 =item RETURN
354 :    
355 :     For a C<SELECT> statement, returns a reference to a list of lists. Each element in
356 :     the big list is a result row; the elements inside a result row correspond to the
357 :     columns of the query.
358 :    
359 :     For a command, returns the number of rows affected. If no rows are affected,
360 :     a I<true 0> is returned (that is, the return value acts as 0 when used numerically and
361 : parrello 1.45 TRUE when used in a boolean expression). If an error occurs, this method will
362 :     throw an exception.
363 : parrello 1.1
364 :     =back
365 :    
366 :     =cut
367 :     sub SQL {
368 : parrello 1.2 my($self,$sql,$verbose, @bind_values) = @_;
369 : parrello 1.1
370 :     if ($verbose) {
371 : parrello 1.2 Trace("Executing SQL statement: $sql") if T(0);
372 : parrello 1.1 }
373 :    
374 :     my $dbh = $self->{_dbh};
375 : parrello 1.5 my $retVal;
376 : overbeek 1.35 if ($sql =~ /^\s*select/i) {
377 : parrello 1.49
378 : overbeek 1.35 # Choose to use the readonly handle if one exists.
379 : parrello 1.49
380 : overbeek 1.35 my $ro = $self->{_ro_dbh};
381 : parrello 1.61 if (ref($ro))
382 :     {
383 :     $dbh = $ro if ref($ro);
384 :     #warn "using RO for $sql\n";
385 :     }
386 :     # We may need to try multiple times.
387 :     my $tries_left = $self->{_retries};
388 :     # In MySQL test mode, we turn off query caching.
389 :     # If we run out of retries, we'll confess. Otherwise, $retVal will get a
390 :     # value put in it.
391 :     while (! defined $retVal) {
392 :     Trace("Executing SQL query: $sql") if T(SQL => 3);
393 :     eval {
394 :     $retVal = $dbh->selectall_arrayref($sql, undef, @bind_values);
395 :     };
396 :     if ($@) {
397 :     Confess("Query failed: $@");
398 :     } elsif (! defined $retVal) {
399 :     # We have a soft error. Save the message.
400 :     my $msg = $dbh->errstr;
401 :     # See if we can retry. A retry is possible if the error is
402 :     # timeout or connection-related.
403 :     if ($tries_left && $msg =~ /connect|gone|lost|timeout/) {
404 :     # Yes. Attempt a reconect.
405 :     $self->Reconnect();
406 :     # Get back the database handle.
407 :     $dbh = $self->{_dbh};
408 :     # Denote we've used up a retry.
409 :     $tries_left--;
410 :     } else {
411 :     # We can't recover, so confess.
412 :     Confess("SELECT failed: $msg");
413 :     }
414 :     Confess("Query failed: " . $dbh->errstr);
415 :     } else {
416 :     Trace(@{$retVal} . " rows returned from query.") if T(SQL => 3);
417 :     }
418 :     }
419 : parrello 1.1 } else {
420 : parrello 1.8 Trace("Executing SQL command: $sql") if T(SQL => 3);
421 : parrello 1.2 eval {
422 :     $retVal = $dbh->do($sql, undef, @bind_values);
423 :     };
424 : parrello 1.38 if ($@) {
425 : parrello 1.19 Confess("Query '$sql' failed: $@");
426 : parrello 1.40 } elsif (! defined $retVal) {
427 : parrello 1.39 Confess("Query failed: " . $dbh->errstr);
428 : parrello 1.2 } else {
429 : parrello 1.8 Trace("$retVal rows altered by command.") if T(SQL => 3);
430 : parrello 1.5 }
431 : parrello 1.1 }
432 :     return $retVal;
433 :     }
434 :    
435 : parrello 1.61 =head3 Reconnect
436 :    
437 :     $db->Reconnect();
438 :    
439 :     Attempt to reconnect to the database. This is useful when it appears that the
440 :     connection has been lost.
441 :    
442 :     =cut
443 :    
444 :     sub Reconnect {
445 :     # Get the parameters.
446 :     my ($self) = @_;
447 :     # Get the database handle.
448 :     my $dbh = $self->{_dbh};
449 :     # Force a close just in case.
450 :     eval { $dbh->disconnect() };
451 :     # Reconnect.
452 :     Trace("Reconnecting after error.") if T(1);
453 :     $dbh = Connect(@{$self->{_connect}}, $self->{_dbms});
454 :     # Save the new handle.
455 :     $self->{_dbh} = $dbh;
456 :    
457 :     }
458 :    
459 : parrello 1.63 =head3 ErrorMessage
460 :    
461 :     my $msg = $db->ErrorMessage($handle);
462 :    
463 :     Return the error message on the specified handle. Some analysis will be
464 :     performed to determine whether the error is on the server or is the fault
465 :     of the client. If no handle is supplied, then the error information will
466 :     be taken from he last DBI request. If this method is called statically,
467 :     the DBMS type should be supplied as the first parameter.
468 :    
469 :     =over 4
470 :    
471 :     =item handle
472 :    
473 :     Handle on which the error occurred.
474 :    
475 :     =item RETURN
476 :    
477 :     Returns the appropriate error message with a message prefix of C<DBServer Error>
478 :     if it looks like the error permits a retry.
479 :    
480 :     =back
481 :    
482 :     =cut
483 :    
484 :     use constant MYSQL_RETRY_ERRORS =>
485 :     { 2002 => 1, 2006 => 1, 2013 => 1, 2055 => 1, 1040 => 1, 19 => 1 };
486 :    
487 :     sub ErrorMessage {
488 :     # Get the parameters.
489 :     my ($self, $handle) = @_;
490 :     # Get the error message, number, and DBMS type.
491 :     my ($num, $msg, $dbms);
492 :     if (defined $handle) {
493 :     ($num, $msg) = ($handle->err, $handle->errstr);
494 :     } else {
495 :     ($num, $msg) = (DBI::err, DBI::errstr);
496 :     }
497 :     if (ref $self) {
498 :     $dbms = $self->{_dbms};
499 :     } else {
500 :     $dbms = $self;
501 :     }
502 :     # Declare the return variable.
503 :     my $retVal;
504 :     # Is this MySQL?
505 :     if ($dbms eq 'mysql') {
506 :     # Yes. Check the error number.
507 :     Trace("Database error check. Error number is $num.") if T(3);
508 :     if (MYSQL_RETRY_ERRORS->{$num}) {
509 :     # Here it's a server-related error.
510 :     $retVal = "DBServer Error: ";
511 :     } else {
512 :     # Otherwise, it's a normal error.
513 :     $retVal = "MySQL Error: ";
514 :     }
515 :     } else {
516 :     # Here all errors are normal.
517 :     $retVal = "Database Error: ";
518 :     }
519 :     # Add the message text to the error.
520 :     $retVal .= $msg;
521 :     # Return the result.
522 :     return $retVal;
523 :     }
524 :    
525 :    
526 :    
527 :    
528 : parrello 1.27 =head3 SetUsing
529 :    
530 : parrello 1.49 my $usingClause = $db->SetUsing(@tableNames);
531 : parrello 1.27
532 :     Return the body of a DELETE statement that is appropriate to the
533 :     particular DBMS. For example, in MySQL the USING statement must contain the
534 :     name of the table being deleted, but in PostGres it cannot contain the
535 :     name of the table being deleted. The delete statement returned will
536 :     not contain a WHERE; that must be added by the client.
537 :    
538 :     =over 4
539 :    
540 :     =item $tableName1, $tableName2, ... $tableNameN
541 :    
542 :     List of the names of the tables involved. The last table is the one being
543 :     deleted.
544 :    
545 :     =item RETURN
546 :    
547 :     Returns a DELETE statement that allows deletion of the last table named
548 :     using a WHERE clause that may contain fields from any of the tables in
549 :     the list.
550 :    
551 :     =back
552 :    
553 :     =cut
554 :     #: Return Type $;
555 :     sub SetUsing {
556 :     # Get the parameters.
557 :     my ($self, @tableNames) = @_;
558 :     # Count the tables.
559 :     my $N = $#tableNames;
560 :     # Declare the return variable.
561 :     my $retVal = "DELETE FROM $tableNames[$N]";
562 :     if ($N > 0) {
563 : parrello 1.28 if ($self->{_dbms} eq "Pg") {
564 : parrello 1.27 # It's PostGres, so pop off the target table's name to keep it
565 :     # out of the USING clause.
566 :     pop @tableNames;
567 :     }
568 :     $retVal .= " USING " . join(", ", @tableNames);
569 :     }
570 :     # Return the result.
571 :     return $retVal;
572 :     }
573 :    
574 : parrello 1.1 =head3 get_tables
575 :    
576 : parrello 1.49 my @tableNames = $db->get_tables();
577 : parrello 1.1
578 :     Return a list of the table names for the current database. If there are no tables, an
579 :     empty list will be returned.
580 :    
581 :     It is worth remembering that most DBMS packages are case-insensitive with respect to
582 :     column and table names. Therefore, when manipulating this list, be sure to do
583 :     case-insensitive matching. For example, if you want to find out if there's a table named
584 :     C<Genome>, PostGres will have changed the name to C<genome>, and Paradox will have changed
585 :     the name to C<GENOME>. MySQL's behaviour depends on the collating sequence and character
586 :     set selected when the database was created, which is almost worse.
587 :    
588 :     =cut
589 :    
590 :     sub get_tables {
591 :    
592 :     my($self) = @_;
593 :    
594 : olson 1.30 if (ref($self->{table_cache}) eq "ARRAY")
595 :     {
596 :     return @{$self->{table_cache}};
597 :     }
598 :    
599 : parrello 1.1 my $dbh = $self->{_dbh};
600 :    
601 :     my $quote = $dbh->get_info(29); # SQL_IDENTIFIER_QUOTE_CHAR
602 :    
603 :     my @tables = $dbh->tables();
604 :    
605 : olson 1.47 #
606 :     # Mysql might have names in the form '`metagenome`.`protein_sequence_seeks`'
607 :     #
608 :     my @ret;
609 :     if ($self->{_dbms} eq 'mysql')
610 :     {
611 :     @ret = map {
612 :     if ($quote)
613 :     {
614 :     if (/^($quote[^$quote]*$quote\.)?$quote([^$quote]*)$quote/)
615 :     {
616 :     $2;
617 :     }
618 :     else
619 :     {
620 :     $_;
621 :     }
622 :     }
623 :     else
624 :     {
625 :     s/^[^.]+\.//;
626 :     $_;
627 :     }
628 :     } @tables;
629 :     }
630 :     else
631 :     {
632 :     @ret = map { $quote ne "" && s/^$quote(.*?)$quote$/$1/; s/^[^.]+\.//; $_ } @tables;
633 :     }
634 : olson 1.30
635 :     $self->{table_cache} = [@ret];
636 :    
637 :     return @ret;
638 : parrello 1.1 }
639 :    
640 :     =head3 table_exists
641 :    
642 : parrello 1.49 my $existFlag = $db->table_exists($table);
643 : parrello 1.1
644 :     Return TRUE if the specified table exists in the database, else FALSE. The table
645 :     name is considered case-insensitive, for reasons explained in L</get_tables>.
646 :    
647 :     =over 4
648 :    
649 :     =item table
650 :    
651 :     Name of the table whose existence is under question.
652 :    
653 :     =item RETURN
654 :    
655 :     Returns C<1> if the specified table exists in the database, else FALSE.
656 :    
657 :     =back
658 :    
659 :     =cut
660 :    
661 :     sub table_exists {
662 : parrello 1.2
663 : parrello 1.1 my($self, $table) = @_;
664 : parrello 1.5 $table = lc $table;
665 : parrello 1.1
666 :     return (grep { $table eq lc $_ } $self->get_tables()) > 0;
667 :     }
668 :    
669 :     =head3 drop_table
670 :    
671 : parrello 1.49 $db->drop_table(tbl => $table);
672 : parrello 1.1
673 :     Remove the named table from the database if it exists.
674 :    
675 :     =over 4
676 :    
677 :     =item table
678 :    
679 :     Name of the table to be dropped.
680 :    
681 :     =back
682 :    
683 :     =cut
684 :    
685 :     sub drop_table {
686 :     my $self = shift @_;
687 :     my %arg = @_;
688 :     my $tbl = $arg{tbl};
689 :     my $dbh = $self->{_dbh};
690 :     my $dbms = $self->{_dbms};
691 :     my $cmd;
692 :    
693 : overbeek 1.34 #
694 :     # Invalidate table cache.
695 :     #
696 :     delete $self->{table_cache};
697 : parrello 1.49
698 : parrello 1.1 if ($dbms eq "mysql") {
699 : parrello 1.5 $cmd = "DROP TABLE IF EXISTS $tbl;" ;
700 :     } else {
701 :     if ($self->table_exists($tbl)) {
702 :     $cmd = "DROP TABLE $tbl;" ;
703 :     }
704 : parrello 1.1 }
705 :     if ($cmd) {
706 : parrello 1.5 Trace("Executing drop command $cmd.") if T(3);
707 :     if ($dbh->do($cmd)) {
708 :     Trace("Table $tbl dropped.") if T(2);
709 :     } else {
710 : parrello 1.1 Trace("Error dropping table: " . $dbh->errstr) if T(0);
711 : parrello 1.5 }
712 : parrello 1.1 }
713 :     }
714 :    
715 :     =head3 create_table
716 :    
717 : parrello 1.49 $db->create_table(tbl => $table, flds => $flds, estimates => [$rowSize, $rowCount]);
718 : parrello 1.1
719 :     Create a new table with the specified name and the specified fields. The
720 :     fields are specified in the form of the string that appears between the
721 :     parentheses in a C<CREATE TABLE> statement. So, for example, to create
722 :     a table called C<Genome> with a 20-character ID, a 255-character name, an
723 :     index number, and a long text sequence field, you would code
724 :    
725 : parrello 1.5 $db->create_table(tbl => 'Genome',
726 :     flds => 'id VARCHAR(20) NOT NULL PRIMARY KEY, ' .
727 : parrello 1.41 'name VARCHAR(255), indexNum INT, seq TEXT');
728 : parrello 1.1
729 :     This method does not return a result. If the table creation fails for any
730 :     reason, it will throw an exception.
731 :    
732 : parrello 1.10 If MySQL is being used and the C<estimates> option is specified, the table will be
733 :     created using MyISAM.
734 :    
735 : parrello 1.1 =over 4
736 :    
737 :     =item tbl
738 :    
739 :     Name to give to the new table.
740 :    
741 :     =item flds
742 :    
743 :     Field specifications for the new table. This should be a single string that
744 :     consists of a comma-delimited list of the I<create-definition> syntactic unit
745 :     for SQL. In MySQL 4.1, it's defined as follows.
746 :    
747 : parrello 1.5 create_definition:
748 :     column_definition
749 :     | [CONSTRAINT [symbol]] PRIMARY KEY [index_type] (index_col_name,...)
750 :     | KEY [index_name] [index_type] (index_col_name,...)
751 :     | INDEX [index_name] [index_type] (index_col_name,...)
752 :     | [CONSTRAINT [symbol]] UNIQUE [INDEX]
753 :     [index_name] [index_type] (index_col_name,...)
754 :     | [FULLTEXT|SPATIAL] [INDEX] [index_name] (index_col_name,...)
755 :     | [CONSTRAINT [symbol]] FOREIGN KEY
756 :     [index_name] (index_col_name,...) [reference_definition]
757 :     | CHECK (expr)
758 :    
759 :     column_definition:
760 :     col_name type [NOT NULL | NULL] [DEFAULT default_value]
761 :     [AUTO_INCREMENT] [UNIQUE [KEY] | [PRIMARY] KEY]
762 :     [COMMENT 'string'] [reference_definition]
763 : parrello 1.1
764 : parrello 1.10 =item rowSize
765 :    
766 :     Average expected row size.
767 :    
768 :     =item rowCount
769 :    
770 :     Estimated maximum number of rows.
771 :    
772 : parrello 1.1 =back
773 :    
774 :     =cut
775 :    
776 :     sub create_table {
777 :     my $self = shift @_;
778 :     my %arg = @_;
779 :     my $tbl = $arg{tbl};
780 :     my $flds = $arg{flds};
781 :     my $dbh = $self->{_dbh};
782 :     my $dbms = $self->{_dbms};
783 : parrello 1.10 my $options = "";
784 : overbeek 1.34
785 :     #
786 :     # Invalidate table cache.
787 :     #
788 :    
789 :     delete $self->{table_cache};
790 :    
791 : olson 1.25 if ($self->{_dbms} eq "mysql")
792 :     {
793 :     if (not $FIG_Config::mysql_v3)
794 :     {
795 :     $options = " DEFAULT CHARSET latin1 COLLATE latin1_bin";
796 :     }
797 : parrello 1.16 if (defined $arg{estimates}) {
798 :     my ($rowSize, $rowCount) = @{$arg{estimates}};
799 : olson 1.25 if (not $FIG_Config::mysql_v3)
800 :     {
801 :     $options .= " ENGINE = MyISAM";
802 :     }
803 :     $options .= " AVG_ROW_LENGTH = $rowSize MAX_ROWS = $rowCount";
804 : parrello 1.16 }
805 : parrello 1.10 }
806 : parrello 1.31 my $cmd = "CREATE TABLE $tbl ( $flds )$options;";
807 :     Trace("Creating table: $cmd") if T(SQL => 2);
808 :     $dbh->do($cmd) ||
809 : parrello 1.5 Confess("Error creating table $tbl: " . $dbh->errstr);
810 : parrello 1.1 }
811 :    
812 :     =head3 load_table
813 :    
814 : parrello 1.50 my $rowCount = $db->load_table(file => $file, tbl => $tbl, delim => $delim, style => $style);
815 : parrello 1.1
816 :     Load a table from a file. This is the fastest way to load a large table, and for best
817 :     results it should be done before any indexes are created for it. For MySQL, the file
818 :     must contain one row per line, and the fields within a row should be tab-delimited.
819 :     For PostGres, you can specify a different delimiter string using the C<delim> option.
820 :    
821 :     =over 4
822 :    
823 :     =item file
824 :    
825 :     Fully-qualified name of the file containing the data to load. The file must contain
826 :     one line per table row, and the fields in each row must be presented in the order in
827 :     which the columns were specified in the L</create_table> method.
828 :    
829 :     =item tbl
830 :    
831 :     Name of the table into which the data should be loaded.
832 :    
833 : parrello 1.20 =item delim (optional)
834 : parrello 1.1
835 : parrello 1.21 String separating the fields on a single line. The default is a tab (C<\t>). This
836 :     must be a single character so that it will work with all of the different database
837 :     technologies.
838 : parrello 1.1
839 : parrello 1.50 =item style (optional)
840 :    
841 :     Style of load. The default is a normal LOAD DATA INFILE. In MySQL, the
842 :     option C<CONCURRENT> or C<LOW_PRIORITY> can be used to modify the way the load
843 :     works. C<LOW_PRIORITY> causes the load to wait until the table is no longer
844 :     being accessed, and C<CONCURRENT> attempts to allow other users to read the
845 :     table while the load is in progress.
846 :    
847 : parrello 1.1 =item RETURN
848 :    
849 :     Returns the number of rows loaded. If no rows were loaded, will return a true 0, that is,
850 :     it will return a value that evaluates to 0 numerically but is treated as TRUE when used in
851 :     a boolean expressing. If an error occurs, will return C<undef>.
852 :    
853 :     =back
854 :    
855 :     =cut
856 :    
857 :     sub load_table {
858 :     my $self = shift @_;
859 :     my %defaults = ( delim => "\t" );
860 :     my %arg = (%defaults, @_);
861 :     my $file = $arg{file};
862 :     my $tbl = $arg{tbl};
863 :     my $delim = $arg{delim};
864 :     my $dbh = $self->{_dbh};
865 :     my $dbms = $self->{_dbms};
866 : parrello 1.50 my $style = $arg{style} || $FIG_Config::load_mode;
867 : parrello 1.5 my $rv;
868 : parrello 1.50 # Convert "normal" load mode to null.
869 :     if ($style eq 'normal') {
870 :     $style = '';
871 :     }
872 : parrello 1.1 if ($file) {
873 :     if ($dbms eq "mysql") {
874 : parrello 1.13 # We need to determine whether or not we have to use a special line
875 :     # terminator string.
876 :     my $lineEnd = ($FIG_Config::arch eq "win" ? "\\r\\n" : "\\n");
877 : parrello 1.50 Trace("Loading $tbl into MySQL using file $file and style $style.") if T(2);
878 : parrello 1.53 # Decide whether this is a local file or a server file.
879 :     my $place = ($self->{_host} ne "localhost" ? "LOCAL" : "");
880 :     my $sql = "LOAD DATA $style $place INFILE '$file' INTO TABLE $tbl FIELDS TERMINATED BY '$delim' LINES TERMINATED BY '$lineEnd';";
881 : parrello 1.50 Trace("SQL command: $sql") if T(SQL => 2);
882 :     $rv = $dbh->do($sql);
883 : parrello 1.1 } elsif ($dbms eq "Pg") {
884 : parrello 1.5 Trace("Loading $tbl into PostGres using file $file.") if T(2);
885 : parrello 1.50 my $sql = "COPY $tbl FROM '$file' USING DELIMITERS '$delim';";
886 :     Trace("SQL command: $sql") if T(SQL => 2);
887 :     $rv = $dbh->do($sql);
888 : parrello 1.1 }
889 : parrello 1.24 elsif ($dbms eq 'SQLite')
890 :     {
891 :     #
892 :     # SQLite needs to do the bulk inserts using INSERT. We enclose it in a transaction,
893 :     # committing every 10000 rows.
894 :     #
895 :    
896 :     my $fh = new FileHandle("<$file");
897 :     $fh or Confess("load_table: cannot open $file");
898 :    
899 :     local $dbh->{AutoCommit} = 0;
900 :    
901 :     #
902 :     # Determine the columns of the table.
903 :     #
904 :    
905 :     my $sth = $dbh->prepare("select * from $tbl where 1 = 0");
906 :     $sth->execute();
907 :     my @cols = @{$sth->{NAME}};
908 :     print "GOt table columns @cols\n";
909 :     my $n_cols = @cols;
910 :    
911 :     my $qs = join(", ", map { "?" } @cols);
912 :    
913 :     my $qry = "INSERT INTO $tbl VALUES($qs)";
914 :     my $stmt = $dbh->prepare($qry);
915 :     $stmt or Confess("Prepare '$qry' failed");
916 :    
917 :     my $row = 0;
918 :     while (<$fh>)
919 :     {
920 :     chomp;
921 :     my @a = split(/\t/);
922 :     #
923 :     # Need to force size of @a to make insert not complain.
924 :     #
925 :     $#a = $n_cols - 1;
926 : parrello 1.49
927 : parrello 1.24 $stmt->execute(@a);
928 :     $row++;
929 :     if ($row % 10000 == 0)
930 :     {
931 :     $dbh->commit();
932 :     }
933 :     }
934 :     print "sqlite inserted $row rows\n";
935 :     $rv = $row;
936 :     }
937 :     else
938 :     {
939 :     Confess "Attempting load_table on unsupported database $dbms\n";
940 :     }
941 : parrello 1.5 if (!defined $rv) {
942 :     my $errorMessage = $dbh->errstr;
943 :     Trace("Error in $tbl load: $errorMessage") if T(0);
944 : parrello 1.24 } elsif ($rv >= 0) {
945 :     Trace("$rv rows loaded into $tbl.") if T(3);
946 : parrello 1.5 } else {
947 : parrello 1.24 Trace("Row loaded into $tbl.") if T(3);
948 : parrello 1.5 }
949 : parrello 1.1 }
950 : parrello 1.5 return $rv;
951 : parrello 1.1 }
952 :    
953 :     =head3 create_index
954 :    
955 : parrello 1.49 $db->create_index(tbl => $tbl, idx => $idx, flds => $flds, type => $type, kind => $unique);
956 : parrello 1.1
957 :     Create an index on a table. For a large table, this should be done after the table is loaded
958 :     so that the load performance is not seriously degraded.
959 :    
960 :     The C<flds> parameter should contain a comma-delimited list of field names, representing
961 :     the fields in the index from most significant to least significant. The field names can
962 :     be qualified with a direction-- C<ASC> for ascending (the default), or C<DESC> for descending.
963 :     For example, the following call creates a unique index on the Genome table that uses the
964 :     name field followed by the index number, with the highest index number coming first.
965 :    
966 : parrello 1.5 $db->create_index(tbl => 'Genome', idx => 'idxGenomeName',
967 : parrello 1.44 flds => 'name, indexNum DESC', kind => 'unique');
968 : parrello 1.1
969 :     =over 4
970 :    
971 :     =item tbl
972 :    
973 :     Name of the table for which the index is being created.
974 :    
975 :     =item idx
976 :    
977 :     Name to give to the index.
978 :    
979 :     =item flds
980 :    
981 :     Field specifier for the index. This should be a single, comma-delimited string containing
982 :     the field names and their associated direction qualifiers (C<ASC> for ascending or C<DESC>
983 :     for descending). If a direction qualifier is omitted for a particular field, the direction
984 :     defaults to C<ASC>.
985 :    
986 :     =item type (optional, PostGres only)
987 :    
988 :     Type of index.
989 :    
990 : parrello 1.44 =item kind (optional)
991 : parrello 1.1
992 : parrello 1.44 C<unique> for a unique index, C<fulltext> for a full-text index. If omitted, an ordinary
993 :     non-unique index is created. Note that only MySQL supports full-text indexes.
994 : parrello 1.1
995 :     =item RETURN
996 :    
997 :     Returns a defined value if successful, and an undefined value if an error occurred.
998 :    
999 :     =back
1000 :    
1001 :     =cut
1002 :    
1003 :     sub create_index {
1004 :     my $self = shift @_;
1005 :     my %arg = @_;
1006 :     my $tbl = $arg{tbl};
1007 :     my $idx = $arg{idx};
1008 :     my $flds = $arg{flds};
1009 :     my $type = $arg{type};
1010 :     my $dbh = $self->{_dbh};
1011 :     my $dbms = $self->{_dbms};
1012 : parrello 1.16 # Drop the index if it already exists. We expect it to not exist,
1013 :     # so we kill the warning messages.
1014 :     my $printError = $dbh->{PrintError};
1015 :     $dbh->{PrintError} = 0;
1016 : overbeek 1.17 $self->drop_index(idx => $idx, tbl => $tbl);
1017 : parrello 1.16 $dbh->{PrintError} = $printError;
1018 :     # Now we can create the index safely.
1019 : parrello 1.44 my $uniqueFlag = ($arg{kind} ? " $arg{kind}" : "");
1020 : parrello 1.1 my $cmd = "CREATE$uniqueFlag INDEX $idx ON $tbl ";
1021 :     if ($type && $dbms eq "Pg") {
1022 :     $cmd .= " USING $type ";
1023 :     }
1024 : parrello 1.32 $cmd .= " ( $flds );";
1025 : parrello 1.33 # If this is Postgres, descending indexes are not allowed.
1026 :     if ($dbms eq "Pg") {
1027 :     $cmd =~ s/\s+DESC//g;
1028 :     }
1029 : parrello 1.31 Trace("Creating index: $cmd") if T(SQL => 2);
1030 : parrello 1.1 my $rv = $dbh->do($cmd);
1031 : parrello 1.5 return $rv;
1032 : parrello 1.1 }
1033 :    
1034 : overbeek 1.17 =head3 drop_index
1035 :    
1036 : parrello 1.49 $db->drop_index(tbl => $tbl, idx => $idx);
1037 : overbeek 1.17
1038 :     Drop an index on a table. This will remove the index.
1039 :    
1040 :     =over 4
1041 :    
1042 :     =item tbl
1043 :    
1044 :     Name of the table from which the index is being dropped. Note that this is only required or used for mysql databases
1045 :    
1046 :     =item idx
1047 :    
1048 :     Name of the index.
1049 :    
1050 :     =item RETURN
1051 :    
1052 :     Returns a defined value if successful, and an undefined value if an error occurred.
1053 :    
1054 :     =back
1055 :    
1056 :     =cut
1057 :    
1058 :     sub drop_index {
1059 :     my $self = shift @_;
1060 :     my %arg = @_;
1061 :     my $tbl = $arg{tbl};
1062 :     my $idx = $arg{idx};
1063 :     my $dbh = $self->{_dbh};
1064 :     my $dbms = $self->{_dbms};
1065 :     my $res;
1066 :     if ($dbms eq "mysql")
1067 :     {
1068 :     unless ($idx && $tbl)
1069 :     {
1070 :     print STDERR "Both Index name and table must be specified for them to be dropped\n";
1071 :     return undef;
1072 :     }
1073 :     $res=$dbh->do("DROP INDEX $idx on $tbl");
1074 :     }
1075 : olson 1.22 elsif ($dbms eq "Pg" or $dbms eq "SQLite")
1076 : overbeek 1.17 {
1077 :     unless ($idx)
1078 :     {
1079 :     print STDERR "An index must be specified to be dropped\n";
1080 :     return undef;
1081 :     }
1082 :     $res=$dbh->do("DROP INDEX $idx");
1083 :     }
1084 : olson 1.22 else
1085 :     {
1086 :     Confess "Attempting drop_index on unsupported database $dbms\n";
1087 :     }
1088 : parrello 1.49 return $res;
1089 : overbeek 1.17 }
1090 :    
1091 : parrello 1.16 =head3 error_message
1092 :    
1093 : parrello 1.49 my $message = $dbh->error_message();
1094 : parrello 1.16
1095 :     Return the error message (if any) from the last database call.
1096 :    
1097 :     =cut
1098 :    
1099 :     sub error_message {
1100 :     my ($self) = @_;
1101 :     return $self->{_dbh}->errstr();
1102 :     }
1103 :    
1104 : parrello 1.1 =head3 DESTROY
1105 :    
1106 :     This is the destructor for the database kernel object, and it releases the database
1107 :     handle to conserve resources.
1108 :    
1109 :     =cut
1110 :    
1111 :     sub DESTROY {
1112 :     my($self) = @_;
1113 :    
1114 :     my($dbh);
1115 :     if ($dbh = $self->{_dbh}) {
1116 : parrello 1.5 $dbh->disconnect;
1117 : parrello 1.1 }
1118 : parrello 1.2
1119 : parrello 1.1 }
1120 :    
1121 :     =head3 prepare_command
1122 :    
1123 :     Prepare a command for use against the database.
1124 :    
1125 :     =over 4
1126 :    
1127 :     =item commandText
1128 :    
1129 :     Text of the command to be prepared.
1130 :    
1131 :     =item RETURN
1132 :    
1133 :     Returns a statement handle that can be used to execute the command.
1134 :    
1135 :     =back
1136 :    
1137 :     =cut
1138 :    
1139 :     sub prepare_command {
1140 : parrello 1.5 # Get the parameters.
1141 : olson 1.57 my ($self, $commandText, $attrs) = @_;
1142 : parrello 1.5 # Get the database handle.
1143 :     my $dbh = $self->{_dbh};
1144 :     # Prepare the command.
1145 : olson 1.57 my $sth = $dbh->prepare($commandText, $attrs) ||
1146 : parrello 1.5 Confess("Command failed: $commandText\n");
1147 :     # Return it to the caller.
1148 :     return $sth;
1149 : parrello 1.1 }
1150 :    
1151 : parrello 1.53 =head3 set_demand_driven
1152 :    
1153 :     $dbh->set_demand_driven($flag);
1154 :    
1155 :     Set the database to demand-driven mode. This means that queries will be
1156 :     processed as results are demanded rather than being cached in memory when
1157 :     the query is executed. Currently, this only works for MySQL.
1158 :    
1159 :     =over 4
1160 :    
1161 :     =item flag
1162 :    
1163 :     TRUE to make the database demand-driven, else FALSE.
1164 :    
1165 :     =back
1166 :    
1167 :     =cut
1168 :    
1169 :     sub set_demand_driven {
1170 :     # Get the parameters.
1171 :     my ($self, $flag) = @_;
1172 :     # Is this MySQL?
1173 :     if ($self->{_dbms} eq 'mysql') {
1174 :     # Yes, we can set the value. Convert it from boolean to an integer.
1175 :     my $flagValue = ($flag ? 1 : 0);
1176 :     # Store it in the handle.
1177 :     $self->{_dbh}->{mysql_use_result} = $flagValue;
1178 :     Trace("Queries will be demand-driven.") if $flag && T(SQL => 2);
1179 :     }
1180 :     }
1181 :    
1182 :    
1183 : parrello 1.1 =head3 begin_tran
1184 :    
1185 :     Begin a database transaction.
1186 :    
1187 :     =cut
1188 :    
1189 :     sub begin_tran {
1190 : parrello 1.5 # Get the parameters.
1191 :     my ($self) = @_;
1192 :     # Turn off auto-commit.
1193 :     my $dbh = $self->{_dbh};
1194 :     $dbh->{AutoCommit} = 0;
1195 : parrello 1.1 }
1196 :    
1197 :     =head3 commit_tran
1198 :    
1199 :     Commit a database transaction.
1200 :    
1201 :     =cut
1202 :    
1203 :     sub commit_tran {
1204 : parrello 1.5 # Get the parameters.
1205 :     my ($self) = @_;
1206 :     # Commit the transaction.
1207 :     my $dbh = $self->{_dbh};
1208 :     $dbh->commit;
1209 :     # Turn auto-commit back on.
1210 :     $dbh->{AutoCommit} = 1;
1211 :     }
1212 :    
1213 : parrello 1.46 =head3 roll_tran
1214 :    
1215 :     Roll back a database transaction.
1216 :    
1217 :     =cut
1218 :    
1219 :     sub roll_tran {
1220 :     # Get the parameters.
1221 :     my ($self) = @_;
1222 :     # Roll back the transaction.
1223 :     my $dbh = $self->{_dbh};
1224 :     $dbh->rollback;
1225 :     # Turn auto-commit back on.
1226 :     $dbh->{AutoCommit} = 1;
1227 :     }
1228 :    
1229 : parrello 1.5 =head3 reload_table
1230 :    
1231 : parrello 1.49 $dbh->reload_table($mode, $table, $flds, $xflds, $fileName, $keyList, $keyName, $estimates);
1232 : parrello 1.5
1233 :     Reload a database table from a sequential file. If I<$mode> is C<all>, the table
1234 :     will be dropped and re-created. If I<$mode> is C<some>, the data for the individual
1235 :     items in I<$keyList> will be deleted before the table is loaded. Thus, the load
1236 :     process is optimized for the type of reload.
1237 :    
1238 : parrello 1.7 This method can be used to drop and re-create a table without loading: simply
1239 :     omit the I<$fileName> parameter. In this case, I<$keyList> and I<$keyName> are
1240 :     ignored, since they specify what to do if the table is not dropped. If this
1241 :     option is used, the load must be finished by calling L</finish_load>.
1242 :    
1243 : parrello 1.5 =over 4
1244 :    
1245 :     =item mode
1246 :    
1247 :     C<all> if we are reloading the entire table, C<some> if we are only reloading
1248 :     specific entries.
1249 :    
1250 :     =item table
1251 :    
1252 :     Name of the table to reload.
1253 :    
1254 :     =item flds
1255 :    
1256 :     String defining the table columns, in SQL format. In general, this is a
1257 :     comma-delimited set of field specifiers, each specifier consisting of the
1258 :     field name followed by the field type and any optional qualifiers (such as
1259 :     C<NOT NULL> or C<DEFAULT>); however, it can be anything that would appear
1260 :     between the parentheses in a C<CREATE TABLE> statement. The order in which
1261 :     the fields are specified is important, since it is presumed that is the
1262 :     order in which they are appearing in the load file.
1263 :    
1264 :     =item xflds
1265 :    
1266 :     Reference to a hash that describes the indexes. The hash is keyed by index name.
1267 :     The value is the index's field list. This is a comma-delimited list of field names
1268 :     in order from most significant to least significant. If a field is to be indexed
1269 :     in descending order, its name should be followed by the qualifier C<DESC>. For
1270 :     example, the following I<$xflds> value will create two indexes, one for name followed
1271 :     by creation date in reverse chronological order, and one for ID.
1272 :    
1273 :     { name_index => "name, createDate DESC", id_index => "id" }
1274 :    
1275 :     =item fileName (optional)
1276 :    
1277 :     Fully-qualified name of the file containing the data to load. Each line of the
1278 :     file must correspond to a record, and the fields must be arranged in order and
1279 :     tab-delimited. If the file name is omitted, the table is dropped and re-created
1280 : parrello 1.7 but not loaded. The user must then call L</finish_load> to finish the load
1281 :     process.
1282 : parrello 1.5
1283 :     =item keyList (optional)
1284 :    
1285 :     Reference to a list of the IDs for the objects being reloaded. This parameter is
1286 :     only used if I<$mode> is C<some>.
1287 :    
1288 :     =item keyName (optional)
1289 :    
1290 :     Name of the key field containing the IDs in the keylist. If omitted, C<genome> is
1291 :     assumed.
1292 :    
1293 : olson 1.11 =item estimates (optional)
1294 :    
1295 :     For a Mysql database, the estimated row size and row count. Used for creating
1296 :     large MyISAM tables. A pair [$row_size, $row_count].
1297 :    
1298 : parrello 1.5 =back
1299 :    
1300 :     =cut
1301 :    
1302 :     sub reload_table {
1303 :     # Get the parameters.
1304 : olson 1.11 my ($self, $mode, $table, $flds, $xflds, $fileName, $keyList, $keyName, $estimates) = @_;
1305 : parrello 1.5 # Create the return value. It defaults to unsuccessful. with no rows
1306 :     # loaded.
1307 :     my $retVal = 0E0;
1308 :     # Insure we can recover from errors.
1309 :     eval {
1310 :     # If we're in ALL mode, we drop and re-create the table. Otherwise,
1311 :     # we delete the obsolete objects.
1312 : olson 1.30 #
1313 :     # Before deleting the obsolete objs, we need to see if the table already exists.
1314 :     # We could have updated the code such that we are now doing a reload on a
1315 :     # portion of a table that we haven't made yet.
1316 :     #
1317 :    
1318 : parrello 1.5 if ( $mode eq 'all') {
1319 :     Trace("Recreating $table.") if T(Load => 2);
1320 :     $self->drop_table( tbl => $table );
1321 : olson 1.11 $self->create_table( tbl => $table, flds => $flds, estimates => $estimates );
1322 : parrello 1.5 # For pre-indexed DBMSs, we want to create the indexes here.
1323 :     if ($self->{_preIndex}) {
1324 :     $self->create_indexes($table, $xflds);
1325 :     }
1326 : olson 1.30 } elsif (not $self->table_exists($table)) {
1327 :     $self->create_table( tbl => $table, flds => $flds, estimates => $estimates );
1328 :     # For pre-indexed DBMSs, we want to create the indexes here.
1329 :     if ($self->{_preIndex}) {
1330 :     $self->create_indexes($table, $xflds);
1331 :     }
1332 : parrello 1.5 } else {
1333 :     Trace("Clearing obsolete data from $table.") if T(Load => 2);
1334 :     foreach my $key ( @{$keyList} ) {
1335 : parrello 1.24 local $self->{_dbh}->{RaiseError} = 1;
1336 :     my $qry = "DELETE FROM $table WHERE ( $keyName = \'$key\' )";
1337 : overbeek 1.15
1338 : parrello 1.24 eval {
1339 :     $self->SQL($qry);
1340 :     };
1341 :     if ($@)
1342 :     {
1343 :     warn "DB error on query $qry: $@\n";
1344 :     }
1345 : parrello 1.5 }
1346 :     }
1347 :     # Only proceed if we want to load the table here.
1348 :     if ($fileName) {
1349 :     # The table is now ready for loading.
1350 :     Trace("Loading $table from $fileName.") if T(Load => 2);
1351 : parrello 1.24 if (! -s $fileName) {
1352 : parrello 1.54 Trace("Load file \'$fileName\' empty or not found.") if T(Load => 2);
1353 : parrello 1.24 } else {
1354 :     my $count = $self->load_table( tbl => $table, file => $fileName );
1355 :     Trace("$table loaded with $count rows.") if T(Load => 2);
1356 :     }
1357 : parrello 1.7 # Do the post-processing. This will create the indexes if
1358 :     # we have not already done so.
1359 : parrello 1.5 $self->finish_load($mode, $table, $xflds);
1360 :     } else {
1361 :     # The user is loading the table. Save the index info for the finish.
1362 :     $self->{_indexList} = $xflds;
1363 :     }
1364 :     };
1365 :     # Check for errors.
1366 :     if ($@) {
1367 :     Confess("Error loading $table: $@");
1368 :     }
1369 :     }
1370 :    
1371 : parrello 1.55 =head3 last_insert_id
1372 :    
1373 :     my $id = $db->last_insert_id();
1374 :    
1375 :     Return the ID of the last auto-increment record inserted.
1376 :    
1377 :     =cut
1378 :    
1379 :     sub last_insert_id {
1380 :     # Get the parameters.
1381 :     my ($self) = @_;
1382 :     # Declare the return variable.
1383 :     my $retVal = $self->{_dbh}->last_insert_id(undef, undef, undef, undef);
1384 :     # Return the result.
1385 :     return $retVal;
1386 :     }
1387 :    
1388 :    
1389 : parrello 1.5 =head3 finish_load
1390 :    
1391 : parrello 1.49 my = $db->finish_load($mode, $table, $indexes);
1392 : parrello 1.5
1393 :     Finish up a table load. Unless the mode is C<all>, there's nothing to be done
1394 :     here. If the mode is C<all> and the indexes need to be created after loading,
1395 :     then they will be created here. Otherwise, nothing happens.
1396 :    
1397 :     A C<finish_load> should only be called after starting a load with L</reload_table>.
1398 :     If the data to load is in a single text file, then C<reload_table> can do the
1399 :     entire job in place. In some cases, however, the load is coming from multiple
1400 :     files and must be done manually by the client. When this happens, the
1401 :     C<finish_load> method is used to perform any post-processing required by the
1402 :     load.
1403 :    
1404 :     =over 4
1405 :    
1406 :     =item mode
1407 :    
1408 :     C<all> if we are reloading the entire table, else C<some>.
1409 :    
1410 :     =item table
1411 :    
1412 :     Name of the table being loaded.
1413 :    
1414 :     =item indexes
1415 :    
1416 :     Reference to a hash describing the indexes (see L</reload_table> for details).
1417 :     If omitted, the index specification from the last call to C<reload_table> will
1418 :     be used.
1419 :    
1420 :     =back
1421 :    
1422 :     =cut
1423 :     #: Return Type ;
1424 :     sub finish_load {
1425 :     # Get the parameters.
1426 :     my ($self, $mode, $table, $indexes) = @_;
1427 :     # Default the index hash.
1428 :     if (!$indexes) {
1429 :     $indexes = $self->{_indexList};
1430 :     }
1431 : parrello 1.6 if ($mode eq 'all' && !$self->{_preIndex}) {
1432 : parrello 1.5 $self->create_indexes($table, $indexes);
1433 :     }
1434 : parrello 1.9 # Analyze the table to speed queries.
1435 : olson 1.48
1436 :     if (!$ENV{DBKERNEL_DEFER_VACUUM})
1437 :     {
1438 :     $self->vacuum_it($table);
1439 :     }
1440 : parrello 1.5 }
1441 :    
1442 :     =head3 create_indexes
1443 :    
1444 : parrello 1.49 $db->create_indexes($table, \%indexes, $noVacuum);
1445 : parrello 1.5
1446 :     Create the indexes for a table. The list of indexes is expressed as a hash. The
1447 :     key of the hash is the index name, and the value of the hash is the field list.
1448 :    
1449 :     =over 4
1450 :    
1451 :     =item table
1452 :    
1453 :     Name of the table for which the indexes are to be created.
1454 :    
1455 :     =item indexes
1456 :    
1457 :     Reference to a hash that describes the indexes. The hash is keyed by index name.
1458 :     The value is the index's field list. This is a comma-delimited list of field names
1459 :     in order from most significant to least significant. If a field is to be indexed
1460 :     in descending order, its name should be followed by the qualifier C<DESC>. For
1461 :     example, the following I<$indexes> value will create two indexes, one for name followed
1462 :     by creation date in reverse chronological order, and one for ID.
1463 :    
1464 :     { name_index => "name, createDate DESC", id_index => "id" }
1465 :    
1466 :     =back
1467 :    
1468 :     =cut
1469 :     #: Return Type ;
1470 :     sub create_indexes {
1471 :     # Get the parameters.
1472 :     my ($self, $table, $indexes) = @_;
1473 :     Trace("Creating indexes for $table.") if T(Load => 2);
1474 :     # Loop through the indexes in the index hash.
1475 :     for my $idxName (keys %{$indexes}) {
1476 :     Trace("Creating index $idxName.") if T(Load => 3);
1477 :     # Insure we can recover from errors.
1478 :     eval {
1479 :     $self->create_index( idx => $idxName,
1480 :     tbl => $table,
1481 :     type => "btree",
1482 :     flds => $indexes->{$idxName}
1483 :     );
1484 :     };
1485 :     if ($@) {
1486 :     Confess("Error creating index $idxName in $table: $@");
1487 :     }
1488 :     }
1489 : parrello 1.1 }
1490 :    
1491 : parrello 1.8 =head3 vacuum_it
1492 :    
1493 : parrello 1.49 $db->vacuum_it($table1, $table2, ... $tableN);
1494 : parrello 1.8
1495 :     Analyze the specified tables to improve the query performance.
1496 :    
1497 :     =over 4
1498 :    
1499 :     =item table1, table2, ... $tableN
1500 :    
1501 :     List of tables to analyze.
1502 :    
1503 : parrello 1.16 =back
1504 :    
1505 : parrello 1.8 =cut
1506 :    
1507 :     sub vacuum_it {
1508 :     my($self,@tables) = @_;
1509 :     my($table);
1510 :    
1511 :     my $dbh = $self->{_dbh};
1512 :     my $dbms = $self->{_dbms};
1513 :     if (@tables == 0) {
1514 :     # Eventually we need to loop through all the tables for MySQL here.
1515 :     if ($dbms eq "Pg") {
1516 :     $self->SQL("VACUUM ANALYZE");
1517 :     }
1518 :     } else {
1519 :     foreach $table (@tables) {
1520 :     Trace("Analyzing table $table.") if T(2);
1521 :     if ($dbms eq "Pg") {
1522 :     $self->SQL("VACUUM ANALYZE $table");
1523 :     } elsif ($dbms eq "mysql") {
1524 :     $self->SQL("ANALYZE TABLE $table");
1525 :     }
1526 :     }
1527 :     }
1528 :     }
1529 :    
1530 : parrello 1.43 =head3 flush_tables
1531 :    
1532 : parrello 1.49 $db->flush_tables();
1533 : parrello 1.43
1534 :     Flush the internal table cache. It is a good idea to do this periodically during a load.
1535 :     Currently, only MySQL supports it.
1536 :    
1537 :     =cut
1538 :    
1539 :     sub flush_tables {
1540 :     # Get the parameters.
1541 :     my ($self) = @_;
1542 :     # Get the database type.
1543 :     my $dbms = $self->{_dbms};
1544 :     # If we're MySQL, execute the flush.
1545 :     if ($dbms eq "mysql") {
1546 :     $self->SQL("FLUSH TABLES");
1547 :     }
1548 :     }
1549 :    
1550 : olson 1.11 =head3 estimate_table_size
1551 :    
1552 : parrello 1.49 $db->estimate_table_size([list of files]);
1553 : olson 1.11
1554 :     Estimate the average row size and number of rows for the given set of files. Does this by reading the
1555 :     first 100 lines of the first file, computing the total size of all the files, and extrapolating.
1556 :    
1557 :     Returns ($row_size, $num_rows).
1558 :    
1559 :     =cut
1560 :    
1561 :     sub estimate_table_size
1562 :     {
1563 :     my($self, $files) = @_;
1564 :    
1565 :     my $total_size = 0;
1566 :     foreach my $file (@$files) {
1567 : parrello 1.16 my $size = -s $file;
1568 :    
1569 :     if (!defined($size))
1570 :     {
1571 :     confess "Cannot read $file: $!";
1572 :     }
1573 : olson 1.11
1574 : parrello 1.16 $total_size += $size;
1575 : olson 1.11 }
1576 :    
1577 :     #
1578 :     # Read 100 lines of the first file to get an average.
1579 :     #
1580 : parrello 1.16
1581 : olson 1.11 my($row_size, $max_rows);
1582 : parrello 1.16
1583 : olson 1.11 if (open(F, "<$files->[0]"))
1584 :     {
1585 : parrello 1.16 my($count, $tot);
1586 :     while (<F>)
1587 :     {
1588 :     last if $. == 100;
1589 :     $count++;
1590 :     $tot += length($_);
1591 :     }
1592 :     close(F);
1593 :     $row_size = int($tot / $count);
1594 : olson 1.11 }
1595 :     else
1596 :     {
1597 : parrello 1.16 confess "Cannot open $files->[0] for reading: $!\n";
1598 : olson 1.11 }
1599 :    
1600 : parrello 1.16 $max_rows = int(1.1 * $total_size / $row_size);
1601 : olson 1.11
1602 :     return ($row_size, $max_rows);
1603 :     }
1604 :    
1605 : olson 1.3 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3