[Bio] / Sprout / DupCheck.pl Repository:
ViewVC logotype

View of /Sprout/DupCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Aug 16 20:32:25 2005 UTC (14 years, 3 months ago) by parrello
Branch: MAIN
Added a utility to check for duplicate keys in TBL files.

#!/usr/bin/perl -w

=head1 DupCheck

Find duplicate keys in a sorted file. The first parameter is the number of fields in
the key, the second is the name of the file to examine.

The currently-supported command-line options are as follows.

=over 4

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2.

=back

=cut

use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Cwd;
use File::Copy;
use File::Path;

# Get the command-line options.
my ($options, @parameters) = Tracer::ParseCommand({ trace => 2 }, @ARGV);
# Set up tracing.
my $traceLevel = $options->{trace};
TSetup("$traceLevel errors Tracer DocUtils ERDB", "TEXT");
# Get the parameters.
my ($fldCount, $fileName) = @parameters;
Open(\*INFILE, "<$fileName");
# Get the first line of the file.
my $oldKey = GetKey($fldCount);
# Loop through the file.
my $lineCount = 1;
while (my $line = <INFILE>) {
    # Count this line.
    $lineCount++;
    # Get the current line's key.
    my $key = GetKey($fldCount);
    # Compare it to the old key.
    if ($key eq $oldKey) {
        print "Duplicate key at line $lineCount: $key\n";
    } elsif (lc $key eq lc $oldKey) {
        print "Case-duplicate key at line $lineCount: $key\n";
    }
    $oldKey = $key;
}
print "$lineCount lines read.\n";

# Get the key field of the next record.
sub GetKey {
    my ($fldCount) = @_;
    my $line = <INFILE>;
    my $retVal;
    if ($line) {
        chomp $line;
        my @fields = split /\s*\t\s*/, $line;
        $retVal = @fields[0 ... $fldCount-1];
    }
    return $retVal;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3