[Bio] / FigKernelPackages / ScriptThing.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ScriptThing.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     #!/usr/bin/perl -w
5 :     #
6 :     # This is a SAS Component.
7 :     #
8 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
9 :     # for Interpretations of Genomes. All Rights Reserved.
10 :     #
11 :     # This file is part of the SEED Toolkit.
12 :     #
13 :     # The SEED Toolkit is free software. You can redistribute
14 :     # it and/or modify it under the terms of the SEED Toolkit
15 :     # Public License.
16 :     #
17 :     # You should have received a copy of the SEED Toolkit Public License
18 :     # along with this program; if not write to the University of Chicago
19 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
20 :     # Genomes at veronika@thefig.info or download a copy from
21 :     # http://www.theseed.org/LICENSE.TXT.
22 :     #
23 :    
24 :     package ScriptThing;
25 :    
26 :     =head1 Script Utilities Package
27 :    
28 :     This is a simple package containing utility methods of use to the server scripts.
29 :    
30 :     =head2 Public Methods
31 :    
32 :     =head3 GetBatch
33 :    
34 :     my @lines = ScriptThing::GetBatch($ih, $size);
35 :    
36 :     Get a batch of work to do. The specified input stream will be read, and a
37 :     list of IDs pulled out, along with the contents of the input lines on
38 : parrello 1.6 which the IDs were found. The input stream can be an open file handle or a
39 :     list of singleton values to return.
40 : parrello 1.1
41 :     =over 4
42 :    
43 :     =item ih
44 :    
45 : parrello 1.6 Open input file handle, or alternatively a reference to a list of values to return.
46 :     If a list is specified, the items will be removed from the list as they are returned.
47 : parrello 1.1
48 :     =item size (optional)
49 :    
50 :     Maximum permissible batch size. If omitted, the default is C<1000>.
51 :    
52 : parrello 1.3 =item column (optional)
53 :    
54 :     Index (1-based) of the column containing the IDs. The default is the last
55 :     column.
56 :    
57 : parrello 1.1 =item RETURN
58 :    
59 :     Returns a list of 2-tuples; each 2-tuple consists of an ID followed by the text
60 :     of the input line containing the ID (with the trailing new-line removed).
61 :    
62 :     =back
63 :    
64 :     =cut
65 :    
66 :     sub GetBatch {
67 :     # Get the parameters.
68 : parrello 1.3 my ($ih, $size, $column) = @_;
69 : parrello 1.1 # Declare the return variable.
70 :     my @retVal;
71 :     # Compute the batch size.
72 :     my $linesLeft = $size || 1000;
73 : parrello 1.6 # Determine the mode in which we're operating.
74 :     if (ref $ih eq 'ARRAY') {
75 :     # Here we have a list reference. Loop through it until we run out or fill
76 :     # the batch.
77 :     while ($linesLeft-- > 0 && @$ih > 0) {
78 :     # Get the next list entry.
79 :     my $id = shift @$ih;
80 :     # Put it in the return list as the desired ID and the line it appeared on.
81 :     push @retVal, [$id, $id];
82 :     }
83 :     } else {
84 :     # Loop through the input until we run out or fill the batch.
85 :     while ($linesLeft-- > 0 && ! eof $ih) {
86 :     # Get the next input line.
87 :     my $line = <$ih>;
88 :     chomp $line;
89 : parrello 1.8 # Only proceed if it's nonblank.
90 :     if ($line =~ /\S/) {
91 :     # We'll put our desired column in here.
92 :     my $id = GetColumn($line, $column);
93 :     # Put it in the return list.
94 :     push @retVal, [$id, $line];
95 :     }
96 : parrello 1.6 }
97 : parrello 1.5 }
98 :     # Return the result.
99 :     return @retVal;
100 :     }
101 :    
102 :     =head3 GetList
103 :    
104 :     my @list = ScriptThing::GetList($ih, $column);
105 :    
106 :     Extract a list of data items from a tab-delimited file. Unlike L</GetBatch>,
107 :     this method reads the entire file, and it only returns the column of interest
108 :     instead of tuples containing the original data lines.
109 :    
110 :     =over 4
111 :    
112 :     =item ih
113 :    
114 :     Open file handle for the input.
115 :    
116 :     =item column (optional)
117 :    
118 :     Index (1-based) of the column containing the IDs. The default is the last
119 :     column.
120 :    
121 :     =item RETURN
122 :    
123 :     Returns a list containing the contents of the desired column for every record
124 :     in the input stream.
125 :    
126 :     =back
127 :    
128 :     =cut
129 :    
130 :     sub GetList {
131 :     # Get the parameters.
132 :     my ($ih, $column) = @_;
133 :     # Declare the return variable.
134 :     my @retVal;
135 :     # Loop through the input.
136 :     while (! eof $ih) {
137 :     # Get the next input line.
138 :     my $line = <$ih>;
139 :     chomp $line;
140 :     # We'll put our desired column in here.
141 :     my $id = GetColumn($line, $column);
142 :     # Put it in the return list.
143 :     push @retVal, $id;
144 :     }
145 :     # Return the list.
146 :     return @retVal;
147 :     }
148 :    
149 :     =head3 GetColumn
150 :    
151 :     my $id = ScriptThing::GetColumn($line, $column);
152 :    
153 :     Get the specified column from a tab-delimited input line.
154 :    
155 :     =over 4
156 :    
157 :     =item line
158 :    
159 :     A tab-delimited line of text.
160 :    
161 :     =item column
162 :    
163 :     The index (1-based) of the column whose value is desired. If undefined or 0,
164 :     then the last column will be extracted.
165 :    
166 :     =item RETURN
167 :    
168 :     Returns the value of the desired column. Note that if it is the last column, no
169 :     trimming of new-line characters will take place.
170 :    
171 :     =back
172 :    
173 :     =cut
174 :    
175 :     sub GetColumn {
176 :     # Get the parameters.
177 :     my ($line, $column) = @_;
178 :     # Declare the return variable.
179 :     my $retVal;
180 :     # Are we looking for a specific column or the last one?
181 :     if ($column) {
182 :     # We want a specific column.
183 :     my @cols = split /\t/, $line;
184 :     $retVal = $cols[$column - 1];
185 :     } else {
186 :     # We want the last column.
187 :     if ($line =~ /.*\t(.+)$/) {
188 :     $retVal = $1;
189 : parrello 1.1 } else {
190 : parrello 1.5 $retVal = $line;
191 : parrello 1.1 }
192 :     }
193 :     # Return the result.
194 : parrello 1.5 return $retVal;
195 : parrello 1.1 }
196 :    
197 :     =head3 CommentHash
198 :    
199 : parrello 1.7 my %hash = ScriptThing::CommentHash(\@tuples, $column);
200 : parrello 1.1
201 :     Convert the 2-tuples returned by L</GetBatch> to a comment hash for
202 :     FASTA-based methods. The return hash will map each incoming ID to a
203 :     string containing the fields from the corresponding line.
204 :    
205 :     =over 4
206 :    
207 :     =item tuples
208 :    
209 :     Reference to a list of 2-tuples. Each 2-tuple contains an ID followed by a
210 :     tab-delimited input line (without the new-line character).
211 :    
212 : parrello 1.7 =item column
213 :    
214 :     Index (1-based) of the column containing the ID value. The default is the last
215 :     column.
216 :    
217 : parrello 1.1 =item RETURN
218 :    
219 :     Returns a hash mapping each incoming ID to the text from its input line.
220 :    
221 :     =back
222 :    
223 :     =cut
224 :    
225 :     sub CommentHash {
226 :     # Get the parameters.
227 : parrello 1.7 my ($tuples, $column) = @_;
228 : parrello 1.1 # Declare the return variable.
229 :     my %retVal;
230 :     # Loop through the tuples.
231 :     for my $tuple (@$tuples) {
232 :     # Get the ID and line.
233 :     my ($id, $line) = @$tuple;
234 :     # Split the line and pop off the ID.
235 :     my @fields = split /\t/, $line;
236 : parrello 1.7 if (! $column) {
237 :     pop @fields;
238 :     } else {
239 :     splice @fields, $column - 1, 1;
240 :     }
241 : parrello 1.1 # Rejoin the fields with spaces in between to form the result.
242 :     $retVal{$id} = join(" ", @fields);
243 :     }
244 :     # Return the result.
245 :     return %retVal;
246 :     }
247 :    
248 :    
249 : parrello 1.2 =head3 AdjustStdin
250 :    
251 :     AdjustStdin();
252 :    
253 :     Check the environment for a STDIN variable, and if present, open the
254 :     named file as STDIN. This is a debugging hack that allows the scripts to
255 :     be run easily inside a symbolic debugger.
256 :    
257 :     =cut
258 :    
259 :     sub AdjustStdin {
260 :     # Check for the environment variable.
261 :     my $file = $ENV{STDIN};
262 :     if ($file) {
263 :     # We found it, so open STDIN using the specified file name.
264 :     open STDIN, "<$file" || die $!;
265 :     }
266 :     }
267 :    
268 :    
269 : parrello 1.1
270 :    
271 :    
272 :    
273 :    
274 :     1;
275 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3