[Bio] / Sprout / AttrDBRefresh.pl Repository:
ViewVC logotype

View of /Sprout/AttrDBRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Wed Nov 29 20:28:52 2006 UTC (13 years, 4 months ago) by parrello
Branch: MAIN
Changes since 1.4: +66 -34 lines
Converted to another attribute storage strategy.

#!/usr/bin/perl -w

=head1 AttrDBRefresh

This script performs useful function on the custom attributes database.

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is C<3>.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item phone

Phone number to message when the script is complete.

=item migrate

If specified, then in addition to refreshing the objects, the actual attributes
will be migrated. If this option is specified on a system that is using the new
attribute system, it will have no effect, but it will take a very long time
doing it.

=item initializeAndClear

If specified, then the tables in the attribute database are dropped and re-created.

=item load

If specified, the name of a file containing attribute data to be loaded into the
system. The file is presumed to be tab-delimited. The first column must be the
object ID, the second the attribute key name, and the remaining columns the
attribute values. All attribute keys mentioned in the file will be erased before
loading.

=item backup

If specified, the name of a file into which all the attribute data should be
dumped. The output file will be tab-delimited, with the first column containing
object IDs, the second attribute key names, and the remaining columns the
attribute values.

=back

=cut

use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Cwd;
use File::Copy;
use File::Path;
use CustomAttributes;
use ERDBLoad;
use FIG;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(CustomAttributes FIG) ],
                                           {
                                              trace => [3, "trace level"],
                                              initializeAndClear => ["", "if specified, the tables of the attribute database will be re-created"],
                                              migrate => ["", "if specified, attribute data will be migrated along with the object IDs"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
                                              load => ["", "file from which to load attribute data"],
                                              backup => ["", "file to which attribute data should be dumped"]
                                           },
                                           "",
                                           @ARGV);
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Insure we don't use the new attribute system for accessing the old attributes.
    $FIG_Config::attrOld = 1;
    # Get the FIG object.
    my $fig = FIG->new();
    # Get the attribute database.
    Trace("Connecting to attribute database.") if T(2);
    my $ca = CustomAttributes->new();
    # Process according to the options selected.
    if ($options->{load} && $options->{backup}) {
        # Prevent the user from screwing himself by doing a load and a backup
        # at the same time.
        Confess("Please do not specify LOAD and BACKUP at the same time.");
    }
    if ($options->{initializeAndClear}) {
        # Create the tables.
        $ca->CreateTables();
        Trace("Tables recreated.") if T(2);
    }
    if ($options->{migrate}) {
        # Migrate the data.
        Trace("Migrating attribute data.") if T(2);
        my $stats = MigrateAttributes($ca, $fig);
        Trace("Migration statistics:\n" . $stats->Show()) if T(2);
    }
    if ($options->{load}) {
        # We want to load the attribute data from the specified file, but
        # first we need to verify that the file exists.
        my $loadFileName = $options->{load};
        if (! -f $loadFileName) {
            Confess("Cannot load: file \"$loadFileName\" is not found or not a file.");
        } else {
            Trace("Loading attribute data from $loadFileName.") if T(2);
            my $stats = $ca->LoadAttributesFrom($loadFileName);
            Trace("Load statistics:\n" . $stats->Show()) if T(2);
        }
    }
    if ($options->{backup}) {
        # Back up the attributes to the specified file.
        my $backupFileName = $options->{backup};
        Trace("Backing up attribute database.") if T(2);
        my $stats = $ca->BackupAllAttributes($backupFileName);
        Trace("Backup statistics:\n" . $stats->Show()) if T(2);
    }
    Trace("Processing complete.") if T(2);
};
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
}
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "RefreshAttrDB terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);
    }
}

=head3 MigrateAttributes

C<< my $stats = MigrateAttributes($ca, $fig); >>

Migrate all the attributes data from the specified FIG instance. This is a long, slow
method used to convert the old attribute data to the new system. Only attribute
keys that are already in the database will be loaded, and they will completely
replace the existing values for those keys. Therefore, it is very important that the
FIG instance not be connected to the attribute database.

=over 4

=item ca

B<CustomAttributes> object used to access the attribute database.

=item fig

A FIG object that can be used to retrieve attributes for migration purposes.

=item RETURN

Returns a statistical object for the load process.

=back

=cut

sub MigrateAttributes {
    # Get the parameters.
    my ($ca, $fig) = @_;
    # Create the return value.
    my $retVal = Stats->new('keysIn');
    # Create a loader for the value table.
    my $hasValueFor = ERDBLoad->new($ca, 'HasValueFor', $FIG_Config::temp);
    # Create a hash for the target objects.
    my %targetObjectHash = ();
    # Get a list of all our attribute keys.
    my @keys = $ca->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');
    # Loop through them, building the load files. Note that this process will
    # erase and rebuild the data tables involved.
    for my $key (@keys) {
        Trace("Migrating key $key.") if T(3);
        $retVal->Add(keysIn => 1);
        # Get all the values of the specified key.
        my @oldValues = $fig->get_attributes(undef, $key);
        my $count = scalar(@oldValues);
        Trace("$count values found for $key in source system.") if T(3);
        # Loop through the rows.
        for my $row (@oldValues) {
            # Get this row's data.
            my ($id, undef, @values) = @{$row};
            # Format the values.
            my $valueString = join($ca->{splitter}, @values);
            # Add the value.
            $hasValueFor->Put($key, $id, $valueString);
        }
    }
    # Close and finish the loads to upload the data.
    Trace("Closing value table.") if T(2);
    my $hvfStats = $hasValueFor->FinishAndLoad();
    Trace("Statistics from value table load:\n" . $hvfStats->Show()) if T(2);
    # Merge the statistics.
    $retVal->Accumulate($hvfStats);
    # Return the statistics object.
    return $retVal;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3