[Bio] / Sprout / AttrKeyConvert.pl Repository:
ViewVC logotype

View of /Sprout/AttrKeyConvert.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Sep 3 20:23:31 2008 UTC (10 years, 11 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, rast_rel_2009_05_18, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, HEAD
New script to convert old-format attribute dumps to the new format.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use strict;
use Tracer;
use Stats;

=head1 AttrKeyConvert Script

    AttrKeyConvert [options] <inFile> <outFile>

Convert an old attribute key dump to the new format

=head2 Introduction

The new attribute system adds the ability to store a specified keyword's values
in a separate table, which requires a new field in the B<AttributeKey> table.
This script takes a key dump created in the old attribute system and modifies it
so that it can be used to load keys into the new system.

=head2 Positional Parameters

=over 4

=item inFile

Name of the file containing the old attribute key dump. If this parameter is
not specified, the standard input will be used as the input and the output will
be to the standard output.

=item outFile 

Name of the file to be created with the attribute key dump in the new format.
If this parameter is not specified, the standard output will be used as the
output. When this happens, insure the tracing output is not also directed
to the standard output by using the default trace value (C<2->) or specifying
a minus sign in your trace value override (e,g, C<3-> instead of C<3>).

=back

=head2 Command-Line Options

=over 4

=item trace

Specifies the tracing level. The higher the tracing level, the more messages
will appear in the trace log. Use E to specify emergency tracing.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item help

Display this command's parameters and options.

=item warn

Create an event in the RSS feed when an error occurs.

=item phone

Phone number to message when the script is complete.

=item tableName

Name of the default table. This value is inserted into the 3rd column
of the input to create the output.

=back

=cut

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(CustomAttributes) ],
                                           {
                                              trace => ["2-", "tracing level"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
                                              tableName => ["HasValueFor", "name to be inserted as the new table name"]
                                           },
                                           "<inFile> <outFile>",
                                           @ARGV);
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Get the names of the input and output files.
    my ($inFile, $outFile) = @parameters;
    # These variables will contain the input and output file handles.
    my ($ih, $oh);
    if (! defined $inFile) {
        # No parameters, so we pipe from the standard input to the standard
        # output.
        $ih = \*STDIN;
        $oh = \*STDOUT;
        Trace("Data piped from standard input to standard output.") if T(2);
    } elsif (! defined $outFile) {
        $ih = Open(undef, "<$inFile");
        $oh = \*STDOUT;
        Trace("Data from \"$inFile\" piped to standard output.") if T(2);
    } else {
        $ih = Open(undef, "<$inFile");
        $oh = Open(undef, ">$outFile");
    }
    # Create a statistics object to track our activity.
    my $stats = Stats->new(qw(input output groupLines errors dataLines));
    # Get the table name.
    my $tableName = $options->{tableName};
    Trace("Table name will be \"$tableName\".") if T(2);
    # Loop through the input.
    while (! eof $ih) {
        # Read this line and split it into sections.
        my @cols = Tracer::GetLine($ih);
        # Count the line and trace our progress.
        Trace($stats->Ask('input') . " input lines processed.") if T(3) && $stats->Check(input => 50);
        # Parse the line.
        if ($cols[0] =~ /^#GROUPS/) {
            # This is a group list. Spit it out the way we found it.
            $stats->Add(groupLines => 1);
            Tracer::PutLine($oh, \@cols);
            $stats->Add(output => 1);
        } elsif (scalar(@cols) != 3) {
            # This line has an invalid format. Complain about it.
            $stats->Add(errors => 1);
            my $lineCount = $stats->Ask('input');
            Trace("Error in line $lineCount (starts with \"$cols[0]\"): wrong number of columns (should be 3).") if T(0);
        } else {
            # We have a valid data line. Add the table name before the third column.
            $stats->Add(dataLines => 1);
            splice @cols, 2, 0, $tableName;
            Tracer::PutLine($oh, \@cols);
            $stats->Add(output => 1);
        }
    }
    # All done. Output the statistics.
    Trace("Statistics for this run:\n" . $stats->Show()) if T(2);
};
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
}
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "AttrKeyConvert terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);
    }
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3