[Bio] / Sprout / DupCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/DupCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 : parrello 1.2 =head1 Duplicate Key Check
4 :    
5 :     C<DupCheck> [I<options>] I<keySize> I<fileName>
6 : parrello 1.1
7 :     Find duplicate keys in a sorted file. The first parameter is the number of fields in
8 : parrello 1.2 the key, the second is the name of the file to examine. The goal is to be able to
9 :     determine exactly where in the file the duplicate keys exist.
10 : parrello 1.1
11 :     The currently-supported command-line options are as follows.
12 :    
13 :     =over 4
14 :    
15 :     =item trace
16 :    
17 :     Numeric trace level. A higher trace level causes more messages to appear. The
18 :     default trace level is 2.
19 :    
20 :     =back
21 :    
22 :     =cut
23 :    
24 :     use strict;
25 :     use Tracer;
26 :     use DocUtils;
27 :     use TestUtils;
28 :     use Cwd;
29 :     use File::Copy;
30 :     use File::Path;
31 :    
32 :     # Get the command-line options.
33 :     my ($options, @parameters) = Tracer::ParseCommand({ trace => 2 }, @ARGV);
34 :     # Set up tracing.
35 :     my $traceLevel = $options->{trace};
36 :     TSetup("$traceLevel errors Tracer DocUtils ERDB", "TEXT");
37 :     # Get the parameters.
38 :     my ($fldCount, $fileName) = @parameters;
39 :     Open(\*INFILE, "<$fileName");
40 :     # Get the first line of the file.
41 :     my $oldKey = GetKey($fldCount);
42 :     # Loop through the file.
43 :     my $lineCount = 1;
44 :     while (my $line = <INFILE>) {
45 :     # Count this line.
46 :     $lineCount++;
47 :     # Get the current line's key.
48 :     my $key = GetKey($fldCount);
49 :     # Compare it to the old key.
50 :     if ($key eq $oldKey) {
51 :     print "Duplicate key at line $lineCount: $key\n";
52 :     } elsif (lc $key eq lc $oldKey) {
53 :     print "Case-duplicate key at line $lineCount: $key\n";
54 :     }
55 :     $oldKey = $key;
56 :     }
57 :     print "$lineCount lines read.\n";
58 :    
59 :     # Get the key field of the next record.
60 :     sub GetKey {
61 :     my ($fldCount) = @_;
62 :     my $line = <INFILE>;
63 :     my $retVal;
64 :     if ($line) {
65 :     chomp $line;
66 :     my @fields = split /\s*\t\s*/, $line;
67 :     $retVal = @fields[0 ... $fldCount-1];
68 :     }
69 :     return $retVal;
70 :     }
71 :    
72 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3