[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

View of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.8 - (download) (as text) (annotate)
Fri Jan 13 06:59:28 2006 UTC (14 years ago) by parrello
Branch: MAIN
Changes since 1.7: +23 -14 lines
Converted to allow for a load-only mode that loads from pre-generated files.

#!/usr/bin/perl -w

package ERDBLoad;

    use strict;
    use Tracer;
    use PageBuilder;
    use ERDB;
    use Stats;

=head1 ERDB Table Load Utility Object

=head2 Introduction

This object is designed to assist with creating the load file for an ERDB
data relation. The user constructs the object by specifying an ERDB object
and a relation name. This create the load file for the relevant relation. The client
then passes in data lines which are written to a file, and calls
L</Finish> to close the file and get the statistics.

This module makes use of the internal ERDB method C<_IsPrimary>.



=head2 Public Methods

=head3 new

C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>

Begin loading an ERDB relation.

=over 4

=item erdb

ERDB object representing the target database.

=item relationName

Name of the relation being loaded.

=item directory

Name of the directory to use for the load files, WITHOUT a trailing slash.

=item loadOnly

TRUE if the data is to be loaded from an existing file, FALSE if a file is
to be created.



sub new {
    # Get the parameters.
    my ($class, $erdb, $relationName, $directory, $loadOnly) = @_;
    # Validate the directory name.
    if (! -d $directory) {
        Confess("Load directory \"$directory\" not found.");
    # Determine the name for this relation's load file.
    my $fileName = "$directory/$relationName.dtx";
    # Declare the file handle variable.
    my $fileHandle;
    # Determine whether or not this is a primary relation.
    my $primary = ($erdb->_IsPrimary($relationName) ? 1 : 0);
    # Check to see if this is a load-only or a generate-and-load.
    if ($loadOnly) {
        Trace("Relation $relationName will be loaded from $fileName.") if T(2);
        $fileHandle = "";
    } else {
        # If this is a primary entity relation, sort the output to remove
        # duplicate keys.
        my $fileString = ($erdb->IsEntity($relationName) ?
                            "| sort +0 -1 -u -t \"\t\" >$fileName" :
        # Open the output file and remember its handle.
        $fileHandle = Open(undef, $fileString);
        Trace("Relation $relationName load file created with primary flag $primary.") if T(2);
    # Create the $erload object.
    my $retVal = {
                  dbh => $erdb,
                  fh => $fileHandle,
                  fileName => $fileName,
                  relName => $relationName,
                  fileSize => 0,
                  lineCount => 0,
                  stats => Stats->new(),
                  primary => $primary
    # Bless and return it.
    bless $retVal, $class;
    return $retVal;

=head3 Put

C<< my  = $erload->Put($field1, $field2, ..., $fieldN); >>

Write a line of data to the load file. This may also cause the load file to be closed
and data read into the table.

=over 4

=item field1, field2, ..., fieldN

List of field values to be put into the data line. The field values must be in the
order determined shown in the documentation for the table. Internal tabs and
new-lines will automatically be escaped before the data line is formatted.


#: Return Type ;
sub Put {
    # Get the ERDBLoad instance.
    my $self = shift @_;
    # Run through the list of field values, escaping them.
    my @fields = map { Tracer::Escape($_) } @_;
    # If this is a primary relation, append the new-record field.
    if ($self->{primary}) {
        push @fields, '0';
    # Form a data line from the fields.
    my $line = join("\t", @fields) . "\n";
    # Write the new record to the load file.
    my $fh = $self->{fh};
    print $fh $line;
    # Determine how long this will make the load file.
    my $lineLength = length $line;
    # Update the statistics.
    $self->{fileSize} += $lineLength;
    $self->{lineCount} ++;

=head3 Add

C<< my  = $stats->Add($statName); >>

Increment the specified statistic.

=over 4

=item statName

Name of the statistic to increment.


#: Return Type ;
sub Add {
    # Get the parameters.
    my ($self, $statName) = @_;
    # Increment the statistic.

=head3 Finish

C<< my $stats = $erload->Finish(); >>

Finish loading the table. This closes the load file and loads its contents into the database.
It also creates the indexes if the DBMS uses post-indexing.

=over 4

=item RETURN

Returns a statistics object describing what happened during the load and containing any
error messages.



sub Finish {
    # Get this object instance.
    my ($self) = @_;
    # Close the load file.
    close $self->{fh};
    # Return the statistics object.
    return $self->{stats};

=head3 RelName

C<< my $name = $erload->RelName; >>

Name of the relation being loaded by this object.


sub RelName {
    # Get the object instance.
    my ($self) = @_;
    # Return the relation name.
    return $self->{relName};


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3