[Bio] / Sprout / DupCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/DupCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 : parrello 1.2 =head1 Duplicate Key Check
4 :    
5 :     C<DupCheck> [I<options>] I<keySize> I<fileName>
6 : parrello 1.1
7 :     Find duplicate keys in a sorted file. The first parameter is the number of fields in
8 : parrello 1.2 the key, the second is the name of the file to examine. The goal is to be able to
9 :     determine exactly where in the file the duplicate keys exist.
10 : parrello 1.1
11 :     The currently-supported command-line options are as follows.
12 :    
13 :     =over 4
14 :    
15 :     =item trace
16 :    
17 :     Numeric trace level. A higher trace level causes more messages to appear. The
18 :     default trace level is 2.
19 :    
20 :     =back
21 :    
22 :     =cut
23 :    
24 :     use strict;
25 :     use Tracer;
26 :     use Cwd;
27 :     use File::Copy;
28 :     use File::Path;
29 :    
30 :     # Get the command-line options.
31 :     my ($options, @parameters) = Tracer::ParseCommand({ trace => 2 }, @ARGV);
32 :     # Set up tracing.
33 :     my $traceLevel = $options->{trace};
34 :     TSetup("$traceLevel errors Tracer DocUtils ERDB", "TEXT");
35 :     # Get the parameters.
36 :     my ($fldCount, $fileName) = @parameters;
37 :     Open(\*INFILE, "<$fileName");
38 :     # Get the first line of the file.
39 :     my $oldKey = GetKey($fldCount);
40 :     # Loop through the file.
41 :     my $lineCount = 1;
42 :     while (my $line = <INFILE>) {
43 :     # Count this line.
44 :     $lineCount++;
45 :     # Get the current line's key.
46 :     my $key = GetKey($fldCount);
47 :     # Compare it to the old key.
48 :     if ($key eq $oldKey) {
49 :     print "Duplicate key at line $lineCount: $key\n";
50 :     } elsif (lc $key eq lc $oldKey) {
51 :     print "Case-duplicate key at line $lineCount: $key\n";
52 :     }
53 :     $oldKey = $key;
54 :     }
55 :     print "$lineCount lines read.\n";
56 :    
57 :     # Get the key field of the next record.
58 :     sub GetKey {
59 :     my ($fldCount) = @_;
60 :     my $line = <INFILE>;
61 :     my $retVal;
62 :     if ($line) {
63 :     chomp $line;
64 :     my @fields = split /\s*\t\s*/, $line;
65 :     $retVal = @fields[0 ... $fldCount-1];
66 :     }
67 :     return $retVal;
68 :     }
69 :    
70 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3