#!/usr/bin/perl -w # # Copyright (c) 2003-2006 University of Chicago and Fellowship # for Interpretations of Genomes. All Rights Reserved. # # This file is part of the SEED Toolkit. # # The SEED Toolkit is free software. You can redistribute # it and/or modify it under the terms of the SEED Toolkit # Public License. # # You should have received a copy of the SEED Toolkit Public License # along with this program; if not write to the University of Chicago # at info@ci.uchicago.edu or the Fellowship for Interpretation of # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # use strict; use Tracer; use CustomAttributes; use Stats; =head1 EvCodeRefresh Script EvCodeRefresh [options] Refresh evidence codes from a sequential file =head2 Introduction This script loads evidence codes from a tab-delimited file into the B table. The incoming file should contain two columns of data-- a feature ID followed by an evidence code. For compatibility, there may be an intervening column that is ignored. =head2 Positional Parameters =over 4 =item filename Name of the file from which the evidence codes are to be loaded. The file must be a tab-delimited file containing two or three columns. The first column must contain a feature ID, and the last column an evidence code to be applied to that feature. The evidence code will automatically be split into a class (e.g. C, C) and a modifier. =back =head2 Command-Line Options =over 4 =item trace Specifies the tracing level. The higher the tracing level, the more messages will appear in the trace log. Use E to specify emergency tracing. =item append Normally, the existing evidence codes are erased before the data from the file is loaded. If C is specified, then the erase is suppressed, and the existing codes are kept. =item classes If this option is specified, it should be the name of a tab-delimited file containing the evidence classes. The evidence class table will be deleted and reloaded from the file, which should be a valid ERDB load file for the B table. =item user Name suffix to be used for log files. If omitted, the PID is used. =item sql If specified, turns on tracing of SQL activity. =item background Save the standard and error output to files. The files will be created in the FIG temporary directory and will be named CIC<.log> and CIC<.log>, respectively, where I is the value of the B option above. =item help Display this command's parameters and options. =item warn Create an event in the RSS feed when an error occurs. =item phone Phone number to message when the script is complete. =back =cut # Get the command-line options and parameters. my ($options, @parameters) = StandardSetup([qw(ERDB CustomAttributes) ], { trace => ["2", "tracing level"], append => ["", "if specified, the evidence codes will be appended to existing codes"], classes => ["", "evidence class file name (optional)"], phone => ["", "phone number (international format) to call when load finishes"] }, "", @ARGV); # Set a variable to contain return type information. my $rtype; # Insure we catch errors. eval { # Get the attributes database. my $attr = CustomAttributes->new(); # Check for a class file. if ($options->{classes}) { # We have one. Load it into the evidence class table. Trace("Loading evidence classes from $options->{classes}.") if T(2); $attr->LoadTable($options->{classes}, 'EvidenceClass', truncate => 1); Trace("Evidence classes loaded.") if T(2); } # Verify that the input file exists. my $fileName = $parameters[0]; if (! $fileName) { Confess("No input file name specified."); } else { # Now we convert the input file into a load file. First, we open it. my $ih = Open(undef, "<$fileName"); # We'll count the number of codes in here. my $stats = Stats->new(); # Create the load file. We sort it to speed up the load. my $loadFileName = "$FIG_Config::temp/IsEvidencedBy$$.dtx"; my $oh = Open(undef, "| sort >$loadFileName"); # Finally, we use this hash to track all the evidence classes. my %classes = (); Trace("Reading evidence codes.") if T(3); # Loop through the input file, writing load records. while (! eof $ih) { # Read the input record. my @fields = Tracer::GetLine($ih); my $line = $.; Trace("$line input lines processed.") if T(3) && ($. % 10000 == 0); # Insure it's valid. my $last = $#fields; if ($last >= 3 || $last < 1) { Trace("Record $line in input file has incorrect number of columns.") if T(3); $stats->Add(errors => 1); } else { # Get the feature ID and the code. my ($fid, $code) = @fields[0, $last]; # Validate the feature ID and the evidence code. if (! ($fid =~ /^fig\|\d+/)) { Trace("Record $line in input file has invalid feature ID \"$fid\".") if T(3); $stats->Add(errors => 1); } elsif (! ($code =~ /^([a-z]+)(.*)/)) { Trace("Record $line in input file has invalid evidence code \"$code\".") if T(3); $stats->Add(errors => 1); } else { # We have a valid input row. Produce the output line. Note that as a # result of the pattern match that validated the evidence code, $1 # contains the class and $2 the modifier. my ($class, $modifier) = ($1, $2); Tracer::PutLine($oh, [$fid, $class, $modifier]); # Count this as an output row and as a member of the specified class. $stats->Add(rows => 1); $stats->Add($class => 1); $classes{$class} = 1; } } } Trace("Evidence codes reformatted.") if T(2); # Close the files. close $oh; close $ih; Trace("Evidence codes will be loaded from $loadFileName.") if T(2); # Now we need to verify the incoming evidence codes against the known # evidence classes. We issue a message for every non-existent class. It's # not a serious error, but it's something the user should know. Trace("Verifying evidence classes.") if T(2); for my $class (keys %classes) { if (! $attr->Exists(EvidenceClass => $class)) { Trace("Evidence class \"$class\" not found in database!") if T(2); $stats->Add(bad_class => 1); } } # We are almost ready to reload the evidence codes. Set up the append option. # It's passed in as the truncate option flag on the load. my $truncate = ($options->{append} ? 0 : 1); # Now we load. Trace("Loading evidence codes.") if T(2); $attr->LoadTable($loadFileName, 'IsEvidencedBy', truncate => $truncate, mode => 'concurrent'); # Tell the user we're done, and show the statistics. Trace("Evidence codes loaded.\n" . $stats->Show()) if T(2); } }; if ($@) { Trace("Script failed with error: $@") if T(0); $rtype = "error"; } else { Trace("Script complete.") if T(2); $rtype = "no error"; } if ($options->{phone}) { my $msgID = Tracer::SendSMS($options->{phone}, "EvCodeRefresh terminated with $rtype."); if ($msgID) { Trace("Phone message sent with ID $msgID.") if T(2); } else { Trace("Phone message not sent.") if T(2); } } 1;