[Bio] / Sprout / FindDuplicate.pl Repository:
ViewVC logotype

View of /Sprout/FindDuplicate.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Sun Dec 9 01:36:05 2012 UTC (6 years, 10 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Added support for nullable floats and integers.

use strict;

use Tracer;
use Stats;

=head1 Find Duplicate Field Values

This script searches a specific column of a tab-delimited file looking for
duplicate values. The default column is the first (1). Use the B<c> option to
specify a different column. The input file name is specified as a positional
parameter. If no input file name is specified, the input is taken from the
standard input.

=cut

my ($options, $fileName) = StandardSetup([], { c => [1, "column to check (1-based)"]},
        "<fileName>", @ARGV);
# Get the column number to check. Note we convert from a column number to an array
# index.
my $col = $options->{c} - 1;
# Create a statistics object.
my $stats = Stats->new(qw(lines duplicates blanks));
# Open the input file. If there is no input file name, we use a hyphen to get
# STDIN.
if (! $fileName) {
    $fileName = "-";
    Trace("Reading from standard input.") if T(2);
} else {
    Trace("Reading from $fileName.") if T(2);
}
my $ih = Open(undef, "<$fileName");
# The values found will be cached in here.
my %values;
# Loop through the input file.
while (! eof $ih) {
    # Get the key column from this record.
    my @fields = Tracer::GetLine($ih);
    $stats->Add(lines => 1);
    my $key = $fields[$col];
    if (! defined $key || $key eq '') {
        # Ignore lines without a field value.
        $stats->Add(blanks => 1);
    } else {
        # Here we have a field value. See if it's a duplicate.
        if ($values{$key}) {
            Trace("Duplicate value \"$key\" found in line $..") if T(1);
            $stats->Add(duplicates => 1);
        }
        # Record this key value.
        $values{$key} = 1;
    }
}
# Display the statistics.
Trace("All done:\n" . $stats->Show()) if T(2);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3