[Bio] / Sprout / DupCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/DupCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 DupCheck
4 :    
5 :     Find duplicate keys in a sorted file. The first parameter is the number of fields in
6 :     the key, the second is the name of the file to examine.
7 :    
8 :     The currently-supported command-line options are as follows.
9 :    
10 :     =over 4
11 :    
12 :     =item trace
13 :    
14 :     Numeric trace level. A higher trace level causes more messages to appear. The
15 :     default trace level is 2.
16 :    
17 :     =back
18 :    
19 :     =cut
20 :    
21 :     use strict;
22 :     use Tracer;
23 :     use DocUtils;
24 :     use TestUtils;
25 :     use Cwd;
26 :     use File::Copy;
27 :     use File::Path;
28 :    
29 :     # Get the command-line options.
30 :     my ($options, @parameters) = Tracer::ParseCommand({ trace => 2 }, @ARGV);
31 :     # Set up tracing.
32 :     my $traceLevel = $options->{trace};
33 :     TSetup("$traceLevel errors Tracer DocUtils ERDB", "TEXT");
34 :     # Get the parameters.
35 :     my ($fldCount, $fileName) = @parameters;
36 :     Open(\*INFILE, "<$fileName");
37 :     # Get the first line of the file.
38 :     my $oldKey = GetKey($fldCount);
39 :     # Loop through the file.
40 :     my $lineCount = 1;
41 :     while (my $line = <INFILE>) {
42 :     # Count this line.
43 :     $lineCount++;
44 :     # Get the current line's key.
45 :     my $key = GetKey($fldCount);
46 :     # Compare it to the old key.
47 :     if ($key eq $oldKey) {
48 :     print "Duplicate key at line $lineCount: $key\n";
49 :     } elsif (lc $key eq lc $oldKey) {
50 :     print "Case-duplicate key at line $lineCount: $key\n";
51 :     }
52 :     $oldKey = $key;
53 :     }
54 :     print "$lineCount lines read.\n";
55 :    
56 :     # Get the key field of the next record.
57 :     sub GetKey {
58 :     my ($fldCount) = @_;
59 :     my $line = <INFILE>;
60 :     my $retVal;
61 :     if ($line) {
62 :     chomp $line;
63 :     my @fields = split /\s*\t\s*/, $line;
64 :     $retVal = @fields[0 ... $fldCount-1];
65 :     }
66 :     return $retVal;
67 :     }
68 :    
69 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3