Parent Directory
|
Revision Log
Removed obsolete use clauses.
#!/usr/bin/perl -w =head1 Drug Cleaner Clean up a flat file with PEGs in it. This script runs through a tab-delimited text file, removing duplicate entries and entries for features not in the Sprout database. The positional parameters should be the names of the files to clean. The currently-supported command-line options are as follows. =over 4 =item user Name suffix to be used for log files. If omitted, the PID is used. =item trace Numeric trace level. A higher trace level causes more messages to appear. The default trace level is 2. Tracing will be directly to the standard output as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory, where I<User> is the value of the B<user> option above. =item sql If specified, turns on tracing of SQL activity. =item macFile If specified, the file is presumed to be in Macintosh format. =item background Save the standard and error output to files. The files will be created in the FIG temporary directory and will be named C<err>I<User>C<.log> and C<out>I<User>C<.log>, respectively, where I<User> is the value of the B<user> option above. =item h Display this command's parameters and options. =item col Column in the input file that contains feature IDs. The default is C<6>. The column count is 1-based. =item phone Phone number to message when the script is complete. =back =cut use strict; use Tracer; use Cwd; use File::Copy; use File::Path; use FIG; use SFXlate; use Stats; # Get the command-line options and parameters. my ($options, @parameters) = StandardSetup([qw(Sprout) ], { col => ["6", "1-based index of the column containing feature IDs"], trace => ["2", "trace level"], macFile => ["", "If specified, the file is presumed to be in macintosh format."], phone => ["", "phone number (international format) to call when load finishes"], }, "<fileName1> <fileName2> ... ", @ARGV); # Set a variable to contain return type information. my $rtype; # Insure we catch errors. eval { # Get a sprout object. my $sprout = SFXlate->new_sprout_only(); # Get the 0-based index of the column containing feature IDs. my $col = $options->{col}; if ($col !~ /^\d+$/) { Confess("Invalid column number \"$col\"."); } else { $col--; # Check for macintosh format. if ($options->{macFile}) { # The input file is from the MAC, so use "\r" instead of "\n" for the input. # This will not affect output, so we'll be converting the file to Unix as # part of the cleaning. $/ = "\r"; } # Loop through the files. for my $fileName (@parameters) { Trace("Processing $fileName.") if T(2); # Create a backup file name. my $tempFile = "$fileName.tmp~"; # Create a hash of features. We will skip any feature whose ID is already in the hash. my %fids; # Open the two files. my $inh = Open(undef, "<$fileName"); my $outh = Open(undef, ">$tempFile"); # Get a statistics object. my $stats = Stats->new(); # Loop through the input file. while (! eof $inh) { # Get the current record. my @fields = Tracer::GetLine($inh); $stats->Add(input => 1); # Pull out the feature ID. my $fid = $fields[$col]; # Figure out what to do with this record. if (! $fid) { # No feature ID, so this record is considered a bad line and skipped. $stats->Add(badline => 1); } elsif (! exists $fids{$fid}) { # Here we are seeing this feature for the first time. Make sure we # don't process it again. $fids{$fid} = 1; # Now, find out if this feature exists. if ($sprout->Exists('Feature', $fid)) { # It does, so write it out. Tracer::PutLine($outh, \@fields); $stats->Add(output => 1); } else { Trace("Feature $fid not found.") if T(3); $stats->Add(notFound => 1); } } else { $stats->Add(duplicate => 1); } } # Display the statistics. Trace("Statistics for $fileName:\n" . $stats->Show()) if T(2); # Close the files. close $inh; close $outh; # Kill the old file and rename the new one. my $okFlag = rename($tempFile, $fileName); if (! $okFlag) { Trace("Could not rename $tempFile to $fileName.") if T(0); } } } Trace("Processing complete.") if T(2); }; if ($@) { Trace("Script failed with error: $@") if T(0); $rtype = "error"; } else { Trace("Script complete.") if T(2); $rtype = "no error"; } if ($options->{phone}) { my $msgID = Tracer::SendSMS($options->{phone}, "Drug Cleaner terminated with $rtype."); if ($msgID) { Trace("Phone message sent with ID $msgID.") if T(2); } else { Trace("Phone message not sent.") if T(2); } } 1;
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |