[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

View of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Jul 27 20:00:56 2005 UTC (14 years, 3 months ago) by parrello
Branch: MAIN
Added this object to provide useful utilities for loading ERDB databases.

#!/usr/bin/perl -w

package ERDBLoad;

    use strict;
    use Tracer;
    use PageBuilder;
    use ERDB;
    use Stats;

=head1 ERDB Table Load Utility Object

=head2 Introduction

This object is designed to assist with loading an ERDB data relation. The user
constructs the object by specifying an ERDB object and a relation name. This
re-creates the relevant relation. The client then passes in data lines which
are written to a file. When the file gets big enough, it is loaded into the
table. Calling the L</Finish> method loads any leftover data and optionally
creates the index.

This module makes use of the internal ERDB property C<_dbh> and the internal
method C<_IsPrimary>.

=cut

#

=head2 Public Methods

=head3 new

C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>

Begin loading an ERDB relation.

=over 4

=item erdb

ERDB object representing the target database.

=item relationName

Name of the relation being loaded.

=item directory

Name of the directory to use for the load files, WITHOUT a trailing slash.

=back

=cut

sub new {
    # Get the parameters.
    my ($class, $erdb, $relationName, $directory) = @_;
    # Validate the directory name.
    if (! -d $directory) {
        Confess("Load directory \"$directory\" not found.");
    }
    # Determine the name for this relation's load file.
    my $fileName = "$directory/$relationName.dtx";
    # Create the target table. If this is a pre-index DBMS, we
    # also create the indexes. If the table already exists,
    # it will be dropped.
    $erdb->CreateTable($relationName, $FIG_Config::preIndex);
    # Open the output file and remember its handle.
    my $fileHandle = Open(undef, ">$fileName");
    # Create the $erload object.
    my $retVal = {
                  dbh => $erdb,
                  fh => $fileHandle,
                  fileName => $fileName,
                  relName => $relationName,
                  fileSize => 0,
                  lineCount => 0,
                  stats => Stats->new(),
                  primary => $erdb->_IsPrimary($relationName)
                 };
    # Bless and return it.
    bless $retVal, $class;
    return $retVal;
}

=head3 Put

C<< my  = $erload->Put($field1, $field2, ..., $fieldN); >>

Write a line of data to the load file. This may also cause the load file to be closed
and data read into the table.

=over 4

=item field1, field2, ..., fieldN

List of field values to be put into the data line. The field values must be in the
order determined shown in the documentation for the table. Internal tabs and
new-lines will automatically be escaped before the data line is formatted.

=back

=cut
#: Return Type ;
sub Put {
    # Get the ERDBLoad instance.
    my $self = shift @_;
    # Run through the list of field values, escaping them.
    my @fields = map { Tracer::Escape($_) } @_;
    # If this is a primary relation, append the new-record field.
    if ($self->{primary}) {
        push @fields, '0';
    }
    # Form a data line from the fields.
    my $line = join("\t", @fields) . "\n";
    # Determine how long this will make the load file.
    my $lineLength = length $line;
    if ($lineLength > (200000000 - $self->{fileSize})) {
        # Here it would be too long, so we force a load.
        $self->Flush();
    }
    # Write the new record to the load file.
    my $fh = $self->{fh};
    print $fh $line;
    # Update the statistics.
    $self->{fileSize} += $lineLength;
    $self->{lineCount} ++;
}

=head3 Flush

C<< $erload->Flush(); >>

Load all the data currently in the load file into the database. This clears the load
file and re-opens it.

=cut
#: Return Type ;
sub Flush {
    # Get the parameters.
    my ($self) = @_;
    # Flush the data in the load file.
    $self->_FlushData();
    # Re-open the file so it can accept more data.
    $self->_ReOpen();
}

=head3 Finish

C<< my $stats = $erload->Finish(); >>

Finish loading the table. This closes the load file and loads its contents into the database.
It also creates the indexes if the DBMS uses post-indexing.

=over 4

=item RETURN

Returns a statistics object describing what happened during the load and containing any
error messages.

=back

=cut

sub Finish {
    # Get this object instance.
    my ($self) = @_;
    # Flush out the data in the load file.
    $self->_FlushData();
    # If this is a post-index DBMS, create the indexes.
    if (! $FIG_Config::preIndex) {
        $self->{erdb}->CreateIndex($self->RelName);
    }
    # Delete the load file.
    unlink $self->{fileName};
    # Return the statistics object.
    return $self->{stats};
}

=head3 RelName

C<< my $name = $erload->RelName; >>

Name of the relation being loaded by this object.

=cut

sub RelName {
    # Get the object instance.
    my ($self) = @_;
    # Return the relation name.
    return $self->{relName};
}

=head2 Internal Methods

=head3 ReOpen

Re-open the load file.

This is an instance method.

=cut

sub _ReOpen {
    # Get this instance.
    my ($self) = @_;
    # Open the file with the current filehandle in truncate mode.
    Open($self->{fh}, ">" . $self->{fileName});
    # Denote the file is empty.
    $self->{fileSize} = 0;
    $self->{lineCount} = 0;
}

=head3 FlushData

Close the load file and load all its data into the table.

This is an instance method.

=cut

sub _FlushData {
    # Get this instance.
    my ($self) = @_;
    # Get the relation name.
    my $relName = $self->RelName;
    Trace("Flushing data to table $relName.") if T(2);
    # Close the load file.
    close $self->{fh};
    # We must use the load file to load the table. First, we get the DBKernel
    # handle and the statistics object.
    my $stats = $self->{stats};
    my $dbh = $self->{dbh}->{_dbh};
    # Begin a database transaction. This is not actually for integrity reasons; it
    # speeds up the slow load process.
    $dbh->begin_tran();
    # Load the database table safely.
    my $rv;
    eval {
        Trace("Loading file into relation $relName.") if T(3);
        $rv = $dbh->load_table(file => $self->{fileName}, tbl => $relName);
    };
    # Check to see if we succeeded.
    if (!defined $rv) {
        # We've failed. Format a useful message. If we have an error message from
        # EVAL, we use it.
        my $msg = "Table load failed for $relName" . ($@ ? ": $@" : ".");
        $stats->AddMessage($msg);
        Trace($msg) if T(1);
    } else {
        # Here we successfully loaded the table. Trace the number of records loaded.
        my $lineCount = $self->{lineCount};
        my $byteCount = $self->{fileSize};
        Trace("$lineCount records ($byteCount bytes) loaded into $relName.") if T(2);
        # Accumulate the statistics.
        $stats->Add("records", $lineCount);
        $stats->Add("bytes", $byteCount);
    }
    # Close the database transaction.
    $dbh->commit_tran();
}

1;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3