[Bio] / Sprout / DrugClean.pl Repository:
ViewVC logotype

Annotation of /Sprout/DrugClean.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 Drug Cleaner
4 :    
5 :     Clean up a flat file with PEGs in it.
6 :    
7 :     This script runs through a tab-delimited text file, removing duplicate entries and entries
8 :     for features not in the Sprout database. The positional parameters should be the names of the
9 :     files to clean.
10 :    
11 :     The currently-supported command-line options are as follows.
12 :    
13 :     =over 4
14 :    
15 :     =item user
16 :    
17 :     Name suffix to be used for log files. If omitted, the PID is used.
18 :    
19 :     =item trace
20 :    
21 :     Numeric trace level. A higher trace level causes more messages to appear. The
22 :     default trace level is 2. Tracing will be directly to the standard output
23 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
24 :     where I<User> is the value of the B<user> option above.
25 :    
26 :     =item sql
27 :    
28 :     If specified, turns on tracing of SQL activity.
29 :    
30 :     =item macFile
31 :    
32 :     If specified, the file is presumed to be in Macintosh format.
33 :    
34 :     =item background
35 :    
36 :     Save the standard and error output to files. The files will be created
37 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
38 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
39 :     B<user> option above.
40 :    
41 :     =item h
42 :    
43 :     Display this command's parameters and options.
44 :    
45 :     =item col
46 :    
47 :     Column in the input file that contains feature IDs. The default is C<6>. The
48 :     column count is 1-based.
49 :    
50 :     =item phone
51 :    
52 :     Phone number to message when the script is complete.
53 :    
54 :     =back
55 :    
56 :     =cut
57 :    
58 :     use strict;
59 :     use Tracer;
60 :     use Cwd;
61 :     use File::Copy;
62 :     use File::Path;
63 :     use FIG;
64 :     use SFXlate;
65 :     use Stats;
66 :    
67 :     # Get the command-line options and parameters.
68 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
69 :     {
70 :     col => ["6", "1-based index of the column containing feature IDs"],
71 :     trace => ["2", "trace level"],
72 :     macFile => ["", "If specified, the file is presumed to be in macintosh format."],
73 :     phone => ["", "phone number (international format) to call when load finishes"],
74 :     },
75 :     "<fileName1> <fileName2> ... ",
76 :     @ARGV);
77 :     # Set a variable to contain return type information.
78 :     my $rtype;
79 :     # Insure we catch errors.
80 :     eval {
81 :     # Get a sprout object.
82 :     my $sprout = SFXlate->new_sprout_only();
83 :     # Get the 0-based index of the column containing feature IDs.
84 :     my $col = $options->{col};
85 :     if ($col !~ /^\d+$/) {
86 :     Confess("Invalid column number \"$col\".");
87 :     } else {
88 :     $col--;
89 :     # Check for macintosh format.
90 :     if ($options->{macFile}) {
91 :     # The input file is from the MAC, so use "\r" instead of "\n" for the input.
92 :     # This will not affect output, so we'll be converting the file to Unix as
93 :     # part of the cleaning.
94 :     $/ = "\r";
95 :     }
96 :     # Loop through the files.
97 :     for my $fileName (@parameters) {
98 :     Trace("Processing $fileName.") if T(2);
99 :     # Create a backup file name.
100 :     my $tempFile = "$fileName.tmp~";
101 :     # Create a hash of features. We will skip any feature whose ID is already in the hash.
102 :     my %fids;
103 :     # Open the two files.
104 :     my $inh = Open(undef, "<$fileName");
105 :     my $outh = Open(undef, ">$tempFile");
106 :     # Get a statistics object.
107 :     my $stats = Stats->new();
108 :     # Loop through the input file.
109 :     while (! eof $inh) {
110 :     # Get the current record.
111 :     my @fields = Tracer::GetLine($inh);
112 :     $stats->Add(input => 1);
113 :     # Pull out the feature ID.
114 :     my $fid = $fields[$col];
115 :     # Figure out what to do with this record.
116 :     if (! $fid) {
117 :     # No feature ID, so this record is considered a bad line and skipped.
118 :     $stats->Add(badline => 1);
119 :     } elsif (! exists $fids{$fid}) {
120 :     # Here we are seeing this feature for the first time. Make sure we
121 :     # don't process it again.
122 :     $fids{$fid} = 1;
123 :     # Now, find out if this feature exists.
124 :     if ($sprout->Exists('Feature', $fid)) {
125 :     # It does, so write it out.
126 :     Tracer::PutLine($outh, \@fields);
127 :     $stats->Add(output => 1);
128 :     } else {
129 :     Trace("Feature $fid not found.") if T(3);
130 :     $stats->Add(notFound => 1);
131 :     }
132 :     } else {
133 :     $stats->Add(duplicate => 1);
134 :     }
135 :     }
136 :     # Display the statistics.
137 :     Trace("Statistics for $fileName:\n" . $stats->Show()) if T(2);
138 :     # Close the files.
139 :     close $inh;
140 :     close $outh;
141 :     # Kill the old file and rename the new one.
142 :     my $okFlag = rename($tempFile, $fileName);
143 :     if (! $okFlag) {
144 :     Trace("Could not rename $tempFile to $fileName.") if T(0);
145 :     }
146 :     }
147 :     }
148 :     Trace("Processing complete.") if T(2);
149 :     };
150 :     if ($@) {
151 :     Trace("Script failed with error: $@") if T(0);
152 :     $rtype = "error";
153 :     } else {
154 :     Trace("Script complete.") if T(2);
155 :     $rtype = "no error";
156 :     }
157 :     if ($options->{phone}) {
158 :     my $msgID = Tracer::SendSMS($options->{phone}, "Drug Cleaner terminated with $rtype.");
159 :     if ($msgID) {
160 :     Trace("Phone message sent with ID $msgID.") if T(2);
161 :     } else {
162 :     Trace("Phone message not sent.") if T(2);
163 :     }
164 :     }
165 :    
166 :    
167 :    
168 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3