[Bio] / Sprout / AttrDBRefresh.pl Repository:
ViewVC logotype

View of /Sprout/AttrDBRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.8 - (download) (as text) (annotate)
Tue Jan 9 01:49:08 2007 UTC (13 years, 2 months ago) by parrello
Branch: MAIN
Changes since 1.7: +11 -1 lines
Added support for the new text search index for attributes.

#!/usr/bin/perl -w

=head1 AttrDBRefresh

This script performs useful function on the custom attributes database.

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is C<3>.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item phone

Phone number to message when the script is complete.

=item migrate

If specified, the attributes will be migrated from the old FIG system to the
new attribute system.

=item preserve

If specified, a comma-delimited list of attributes that should not be migrated.
The values of these attributes in the new system will be preserved.

=item initializeAndClear

If specified, then the tables in the attribute database are dropped and re-created.

=item load

If specified, the name of a file containing attribute data to be loaded into the
system. The file is presumed to be tab-delimited. The first column must be the
object ID, the second the attribute key name, and the remaining columns the
attribute values. All attribute keys mentioned in the file will be erased before

=item loadKey

If specified, the name of a tab-delimited file containing attribute key data. For each key,
there is a pair of lines. The first line contains the ID, data type, and description
of the key. The second line contains the marker C<#GROUPS> followed by zero or more
group names. The attribute will be connected to all the specified groups.

=item backup

If specified, the name of a file into which all the attribute data should be
dumped. The file itself will receive the attribute data in the format expected
by C<load>. A second file, consisting of the same file name with the suffix C<.key>,
will contain the attribute key data in the format expected by C<loadKey>.

=item hh_copy

If specified, the attributes used by the collections system will be converted from the
old system to the new one. In the old system, these attributes are assigned either to
a genome or to the word "Subsystem". If they are attached to the word "Subsystem", then
the value is the subsystem name; otherwise, the value is ignored. In the new attribute
system, the attribute key is always "collection" and the value is the collection name.
Rather than attaching attributes to the word "Subsystem", we attach them to the
subsystem itself.

=item re_index

If specified, the text search index will be rebuilt.



use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Cwd;
use File::Copy;
use File::Path;
use CustomAttributes;
use ERDBLoad;
use FIG;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(CustomAttributes FIG) ],
                                              trace => [3, "trace level"],
                                              initializeAndClear => ["", "if specified, the tables of the attribute database will be re-created"],
                                              migrate => ["", "if specified, attribute data will be migrated along with the object IDs"],
                                              preserve => ["", "comma-delimited list of attributes to be preserved during migration"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
                                              load => ["", "file from which to load attribute data"],
                                              loadKey => ["", "file from which to load attribute key data"],
                                              backup => ["", "file to which attribute data should be dumped"],
                                              hh_copy => ["", "if specified, the attributes used by the collections system will be converted"],
                                              re_index => ["", "if specified, the text search index will be rebuilt"]
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Insure we don't use the new attribute system for accessing the old attributes.
    $FIG_Config::attrOld = 1;
    # Get the FIG object.
    my $fig = FIG->new();
    # Get the attribute database.
    Trace("Connecting to attribute database.") if T(2);
    my $ca = CustomAttributes->new();
    # Process according to the options selected.
    if ($options->{backup}) {
        # Back up the attributes to the specified file.
        my $backupFileName = $options->{backup};
        Trace("Backing up attribute data.") if T(2);
        my $stats = $ca->BackupAllAttributes($backupFileName);
        Trace("Attribute backup statistics:\n" . $stats->Show()) if T(2);
        Trace("Backing up key data.") if T(2);
        $stats = $ca->BackupKeys("$backupFileName.key");
        Trace("Key backup statistics:\n" . $stats->Show()) if T(2);
    if ($options->{initializeAndClear}) {
        # Create the tables.
        Trace("Tables recreated.") if T(2);
    if ($options->{migrate}) {
        # Migrate the data.
        Trace("Migrating attribute data.") if T(2);
        my $stats = MigrateAttributes($ca, $fig, $options->{preserve});
        Trace("Migration statistics:\n" . $stats->Show()) if T(2);
    if ($options->{hh_copy}) {
        # Migrate the collections keys to the new system.
        Trace("Migrating collections data.") if T(2);
        my $stats = MigrateCollections($ca, $fig);
        Trace("Collection statistics:\n" . $stats->Show()) if T(2);
    if ($options->{loadKey}) {
        # We want to load the attribute data from the specified file, but
        # first we need to verify that the file exists.
        my $loadFileName = $options->{loadKey};
        if (! -f $loadFileName) {
            Confess("Cannot load keys: file \"$loadFileName\" is not found or not a file.");
        } else {
            Trace("Loading key data from $loadFileName.") if T(2);
            my $stats = $ca->RestoreKeys($loadFileName);
            Trace("Load statistics:\n" . $stats->Show()) if T(2);
    if ($options->{load}) {
        # We want to load the attribute data from the specified file, but
        # first we need to verify that the file exists.
        my $loadFileName = $options->{load};
        if (! -f $loadFileName) {
            Confess("Cannot load: file \"$loadFileName\" is not found or not a file.");
        } else {
            Trace("Loading attribute data from $loadFileName.") if T(2);
            my $stats = $ca->LoadAttributesFrom($loadFileName);
            Trace("Load statistics:\n" . $stats->Show()) if T(2);
    if ($options->{re_index}) {
        # We want to rebuild the text search index.
        Trace("Search index created on \"HasValueFor\" table.");
    Trace("Processing complete.") if T(2);
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "RefreshAttrDB terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);

=head3 MigrateAttributes

C<< my $stats = MigrateAttributes($ca, $fig, $preserve); >>

Migrate all the attributes data from the specified FIG instance. This is a long, slow
method used to convert the old attribute data to the new system. Only attribute
keys that are already in the database will be loaded, and they will completely
replace the existing values for those keys. Therefore, it is very important that the
FIG instance not be connected to the attribute database.

=over 4

=item ca

B<CustomAttributes> object used to access the attribute database.

=item fig

A FIG object that can be used to retrieve attributes for migration purposes.

=item preserve (optional)

A comma-delimited list of attributes that are not to be migrated.

=item RETURN

Returns a statistical object for the load process.



sub MigrateAttributes {
    # Get the parameters.
    my ($ca, $fig, $preserve) = @_;
    # Create the return value.
    my $retVal = Stats->new('keysIn');
    # Create a loader for the value table.
    my $hasValueFor = ERDBLoad->new($ca, 'HasValueFor', $FIG_Config::temp);
    # Create a hash for the target objects.
    my %targetObjectHash = ();
    # Get a list of the attributes we're to preserve.
    my %preserve = ();
    if (defined $preserve) {
        %preserve = map { $_ => 1 } split /\s*,\s*/, $preserve;
    # Put the preserved keys into the load file.
    for my $key (keys %preserve) {
        Trace("Preserving key $key.") if T(3);
        my @newValues = $ca->GetAttributes(undef, $key);
        Trace(scalar(@newValues) . " values of $key will be preserved.");
        # Put the values into the load file.
        PutValue($hasValueFor, $ca, @newValues);
    # Get a list of all our attribute keys.
    my @allKeys = $ca->GetFlat(['AttributeKey'], "", [], 'AttributeKey(id)');
    # Delete the preserved keys.
    my @keys = grep { ! $preserve{$_} } @allKeys;
    # Loop through the reset, building the load files.
    for my $key (@keys) {
        Trace("Migrating key $key.") if T(3);
        $retVal->Add(keysIn => 1);
        # Get all the values of the specified key.
        my @oldValues = $fig->get_attributes(undef, $key);
        my $count = scalar(@oldValues);
        Trace("$count values found for $key in source system.") if T(3);
        # Put the values into the load file.
        PutValue($hasValueFor, $ca, @oldValues);
    # Close and finish the loads to upload the data.
    Trace("Closing value table.") if T(2);
    my $hvfStats = $hasValueFor->FinishAndLoad();
    Trace("Statistics from value table load:\n" . $hvfStats->Show()) if T(2);
    # Merge the statistics.
    # Return the statistics object.
    return $retVal;

=head3 PutValue

C<< PutValue($hasValueFor, $ca, @values); >>

Put the values from an attribute value list into a HasValueFor load file.

=over 4

=item hasValueFor

Load object for the HasValueFor table.

=item ca

A CustomAttribute object. We get the splitter value from it.

=item value

A list of tuples, each consisting of an object ID, a key name, and one or more values.



sub PutValue {
    # Get the parameters.
    my ($hasValueFor, $ca, @values) = @_;
    # Loop through the value rows.
    for my $row (@values) {
        # Get this row's data.
        my ($id, $key, @values) = @{$row};
        # Format the values.
        my $valueString = join($ca->{splitter}, @values);
        # Add the value.
        $hasValueFor->Put($key, $id, $valueString);

=head3 MigrateCollections

C<< my $stats = MigrateCollections($ca, $fig); >>

This method copies the collection data from the specified FIG object and stores it as values
of the C<collection> attribute in the specified custom attribute database.

=over 4

=item ca

Custom attribute database into which the collections are to be stored.

=item fig

FIG object from which the collection attributes are to be harvested.

=item RETURN

Returns a statistics object with informatino about the migration.



sub MigrateCollections {
    # Get the parameters.
    my ($ca, $fig) = @_;
    # Declare the return variable.
    my $retVal = Stats->new();
    # Get the collection names.
    my @collections = qw(higher_plants eukaryotic_ps nonoxygenic_ps hundred_hundred functional_coupling_paper ecoli_essentiality_paper);
    # Erase the current collection date.
    # Loop through the collection attributes.
    for my $cname (@collections) {
        $retVal->Add(collection => 1);
        # Get this attribute from the old system.
        my @rows = $fig->get_attributes(undef, $cname);
        # Loop through its values.
        for my $row (@rows) {
            $retVal->Add($cname => 1);
            # Determine the object key.
            my $objectID = ($row->[0] eq 'Subsystem' ? $row->[2] : $row->[0]);
            $ca->AddAttribute($objectID, 'collection', $cname);
    # Return the statistics.
    return $retVal;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3