[Bio] / FigKernelPackages / ScriptUtils.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ScriptUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : parrello 1.1 #
2 :     # Copyright (c) 2003-2015 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     package ScriptUtils;
19 :    
20 :     use strict;
21 :     use warnings;
22 :     use Getopt::Long::Descriptive;
23 :    
24 :    
25 :     =head1 Script Utilities
26 :    
27 :     This package contains utilities that are useful in coding SEEDtk command-line scripts.
28 :    
29 :     =head2 Special Methods
30 :    
31 :     =head3 WarnHandler
32 :    
33 :     The Perl EPIC debugger does not handle warnings properly. This method fixes the problem.
34 :     It is hooked into the warning signal if the environment variable STK_TYPE is not set.
35 :     That variable is set by the various C<user-env> scripts, which are more or less required
36 :     in the non-debugging environments. If it is executed accidentally, it does no harm.
37 :    
38 :     =cut
39 :    
40 :     sub WarnHandler {
41 :     print STDERR @_ ;
42 :     }
43 :     if (! $ENV{STK_TYPE}) {
44 :     $SIG{'__WARN__'} = 'WarnHandler';
45 :     }
46 :    
47 :    
48 :     =head2 Public Methods
49 :    
50 :     =head3 IH
51 :    
52 :     my $ih = ScriptUtils::IH($fileName);
53 :    
54 :     Get the input file handle. If the parameter is undefined or empty, the
55 :     standard input will be used. Otherwise the file will be opened and an
56 :     error thrown if the open fails. When debugging in Eclipse, the
57 :     standard input is not available, so this method provides a cheap way for
58 :     the input to ber overridden from the command line. This method provides a
59 :     compact way of insuring this is possible. For example, if the script has
60 :     two positional parameters, and the last is an optional filename, you
61 :     would code
62 :    
63 :     my $ih = ScriptUtils::IH($ARGV[1]);
64 :    
65 :     If the C<-i> option contains the input file name, you would code
66 :    
67 :     my $ih = ScriptUtils::IH($opt->i);
68 :    
69 :     =over 4
70 :    
71 :     =item fileName
72 :    
73 :     Name of the file to open for input. If the name is empty or omitted, the
74 :     standard input will be returned.
75 :    
76 :     =item RETURN
77 :    
78 :     Returns an open file handle for the script input.
79 :    
80 :     =back
81 :    
82 :     =cut
83 :    
84 :     sub IH {
85 :     # Get the parameters.
86 :     my ($fileName) = @_;
87 :     # Declare the return variable.
88 :     my $retVal;
89 :     if (! $fileName) {
90 :     # Here we have the standard input.
91 :     $retVal = \*STDIN;
92 :     } else {
93 :     # Here we have a real file name.
94 :     open($retVal, "<$fileName") ||
95 :     die "Could not open input file $fileName: $!";
96 :     }
97 :     # Return the open handle.
98 :     return $retVal;
99 :     }
100 :    
101 :    
102 :     =head3 ih_options
103 :    
104 :     my @opt_specs = ScriptUtils::ih_options();
105 :    
106 :     These are the command-line options for specifying a standard input file.
107 :    
108 :     =over 4
109 :    
110 :     =item input
111 :    
112 :     Name of the main input file. If omitted and an input file is required, the standard
113 :     input is used.
114 :    
115 :     =back
116 :    
117 :     This method returns the specifications for these command-line options in a form
118 :     that can be used in the L<ScriptUtils/Opts> method.
119 :    
120 :     =cut
121 :    
122 :     sub ih_options {
123 :     return (
124 :     ["input|i=s", "name of the input file (if not the standard input)"]
125 :     );
126 :     }
127 :    
128 :    
129 :     =head2 Command-Line Option Methods
130 :    
131 :     =head3 Opts
132 :    
133 :     my $opt = ScriptUtils::Opts($parmComment, @options);
134 :    
135 :     Parse the command line using L<Getopt::Long::Descriptive>. This method automatically handles
136 :     the C<help> option and dies if the command parse fails.
137 :    
138 :     =over 4
139 :    
140 :     =item parmComment
141 :    
142 :     A string that describes the positional parameters for display in the usage statement.
143 :    
144 :     =item options
145 :    
146 :     A list of options such as are expected by L<Getopt::Long::Descriptive>.
147 :    
148 :     =item RETURN
149 :    
150 :     Returns the options object. Every command-line option's value may be retrieved using a method
151 :     on this object.
152 :    
153 :     =back
154 :    
155 :     =cut
156 :    
157 :     sub Opts {
158 :     # Get the parameters.
159 :     my ($parmComment, @options) = @_;
160 :     # Parse the command line.
161 :     my ($retVal, $usage) = describe_options('%c %o ' . $parmComment, @options,
162 :     [ "help|h", "display usage information", { shortcircuit => 1}]);
163 :     # The above method dies if the options are invalid. Check here for the HELP option.
164 :     if ($retVal->help) {
165 :     print $usage->text;
166 :     exit;
167 :     }
168 :     return $retVal;
169 :     }
170 :    
171 : parrello 1.4 =head3 get_col
172 :    
173 :     my @values = ScriptUtils::get_col($ih, $col);
174 :    
175 :     Read from the specified tab-delimited input stream and extract the values from the specified column.
176 :     An undefined or zero value for the column index will retrieve the last column in each row.
177 :    
178 :     =over 4
179 :    
180 :     =item ih
181 :    
182 :     Open input handle for a tab-delimited file.
183 :    
184 :     =item col
185 :    
186 :     Index (1-based) of the desired column. A zero or undefined value may be used to specified the last column.
187 :    
188 :     =item RETURN
189 :    
190 :     Returns a list of the values retrieved.
191 :    
192 :     =back
193 :    
194 :     =cut
195 :    
196 :     sub get_col {
197 :     my ($ih, $col) = @_;
198 :     my @retVal;
199 :     while (! eof $ih) {
200 :     my $line = <$ih>;
201 :     $line =~ s/\r?\n$//;
202 :     my @flds = split /\t/, $line;
203 :     if ($col) {
204 :     push @retVal, $flds[$col - 1];
205 :     } else {
206 :     push @retVal, pop @flds;
207 :     }
208 :     }
209 :     return @retVal;
210 :     }
211 :    
212 : parrello 1.5 =head3 read_col
213 :    
214 :     my $value = ScriptUtils::get_col($ih, $col);
215 :    
216 :     Read from the specified tab-delimited input stream and extract the value from the specified column
217 :     of the next record. An undefined or zero value for the column index will retrieve the last column.
218 :    
219 :     =over 4
220 :    
221 :     =item ih
222 :    
223 :     Open input handle for a tab-delimited file.
224 :    
225 :     =item col
226 :    
227 :     Index (1-based) of the desired column. A zero or undefined value may be used to specified the last column.
228 :    
229 :     =item RETURN
230 :    
231 :     Returns the value retrieved.
232 :    
233 :     =back
234 :    
235 :     =cut
236 :    
237 :     sub read_col {
238 :     my ($ih, $col) = @_;
239 :     my $retVal;
240 :     $col //= 0;
241 :     my $line = <$ih>;
242 :     $line =~ s/\r?\n$//;
243 :     my @flds = split /\t/, $line;
244 :     $retVal = $flds[$col - 1];
245 :     return $retVal;
246 :     }
247 :    
248 : parrello 1.7 =head3 get_line
249 :    
250 :     my @fields = ScriptUtils::get_line($ih);
251 :    
252 :     Return all the columns from the next input line.
253 :    
254 :     =over 4
255 :    
256 :     =item ih
257 :    
258 :     Open input file handle.
259 :    
260 :     =item RETURN
261 :    
262 :     Returns a list of all the column entries in the next input line.
263 :    
264 :     =back
265 :    
266 :     =cut
267 :    
268 :     sub get_line {
269 :     my ($ih) = @_;
270 :     my $line = <$ih>;
271 :     $line =~ s/\r?\n$//;
272 :     return split /\t/, $line;
273 :     }
274 :    
275 : parrello 1.4 =head3 get_couplets
276 :    
277 :     my @couplets = ScriptUtils::get_couplets($ih, $col, $batchSize);
278 :    
279 :     Read from the specified tab-delimited input stream and extract the values from the specified column.
280 :     An undefined or zero value for the column index will retrieve the last column in each row.
281 :    
282 :     =over 4
283 :    
284 :     =item ih
285 :    
286 :     Open input handle for a tab-delimited file.
287 :    
288 :     =item col
289 :    
290 :     Index (1-based) of the desired column. A zero or undefined value may be used to specified the last column.
291 :    
292 :     =item batchSize (optional)
293 :    
294 :     If specified, only a limited number of rows will be returned. The specified value is the number of rows.
295 :     This parameter is used to divide the input into batches for performance or parallelism reasons.
296 :    
297 :     =item RETURN
298 :    
299 :     Returns a list of 2-tuples. Each 2-tuple will consist of (0) the value from the input column and (1) the
300 :     original row as a list reference.
301 :    
302 :     =back
303 :    
304 :     =cut
305 :    
306 :     sub get_couplets {
307 :     my ($ih, $col, $batchSize) = @_;
308 :     # This will count the number of rows processed.
309 :     my $count = 0;
310 :     # We will stop when the count equals the batch size. It will never equal -1. Note that a batch
311 :     # size of 0 also counts as unlimited.
312 :     $batchSize ||= -1;
313 :     # This will be the return list.
314 :     my @retVal;
315 :     # Loop until done.
316 :     while (! eof $ih && $count != $batchSize) {
317 :     my $line = <$ih>;
318 :     my @flds = split /\t/, $line;
319 :     # Only proceed if the line is nonblank.
320 :     if (@flds) {
321 : parrello 1.6 # Fix the last column.
322 :     $flds[$#flds] =~ s/[\r\n]+$//;
323 : parrello 1.4 # Extract the desired column.
324 :     my $value;
325 :     if ($col) {
326 :     $value = $flds[$col - 1];
327 :     } else {
328 :     $value = $flds[$#flds];
329 :     }
330 :     # Store and count the result.
331 :     push @retVal, [$value, \@flds];
332 :     $count++;
333 :     }
334 :     }
335 :     return @retVal;
336 :     }
337 : parrello 1.3
338 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3