[Bio] / FigKernelPackages / LogReader.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/LogReader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package LogReader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :    
25 :     =head1 Log File Reader
26 :    
27 :     =head2 Introduction
28 :    
29 :     The log reader contains information about a log file currently being read. A log file
30 :     contains two types of data lines.
31 :    
32 :     =over 4
33 :    
34 :     =item Formatted Lines
35 :    
36 :     I<Formatted lines> consist of one or more columns of data followed by a free-form string,
37 :     which is treated the last column. Each data column is enclosed in square brackets and
38 :     separated from neighboring columns by zero or more spaces.
39 :    
40 :     =item Free-Form Lines
41 :    
42 :     I<Free-form lines> consist of a single string.
43 :    
44 :     =back
45 :    
46 :     A I<record> is defined as a formatted line followed by zero or more free-form lines.
47 :     The log file is processed one record at a time.
48 :    
49 :     In most formatted records, the first column is a time stamp. When trying to decipher
50 :     a log, the time stamps are critical. Unfortunately, in the case of the
51 :     error log, not all of the software components that write to it put in a time stamp.
52 :     When this happens, we put in an undefined value for the time stamp. Note also
53 :     that internally, a time stamp is stored as a number of seconds since the epoch.
54 :    
55 :     Reading a record from a log file involves pulling in multiple lines of text, so
56 :     we don't know whether or not we have the last line of text in a record until we've
57 :     read the first line of the next record. For this reason, the log file reader
58 :     keeps the next line in an internal buffer. If a record has no time stamp, we will read
59 :     ahead so we can interpolate a time. As a result, there may be an extensive list of
60 :     full records buffered in this object as well.
61 :    
62 :     The fields in this object are as follows.
63 :    
64 :     =over 4
65 :    
66 :     =item fh
67 :    
68 :     Input file handle.
69 :    
70 :     =item columnCount
71 :    
72 :     Number of columns in each record. The first column is always a time stamp and the
73 :     last is always a free-form string. The useful information here is how many middle
74 :     columns we expect.
75 :    
76 :     =item buffer
77 :    
78 :     A line of data from the file.
79 :    
80 :     =item tell
81 :    
82 :     The seek location of the line of data in the buffer.
83 :    
84 :     =item fileSize
85 :    
86 :     File size in bytes at the time of the open.
87 :    
88 :     =item stop
89 :    
90 :     Position in the file at which the reader should stop when reading ahead.
91 :    
92 :     =back
93 :    
94 :     =head2 Constants and Globals
95 :    
96 :     =over 4
97 :    
98 :     =item SEEK_SET
99 :    
100 :     Constant value used to tell C<seek> to position from the start of the file.
101 :    
102 :     =cut
103 :    
104 :     use constant SEEK_SET => 0;
105 :    
106 :     =item FRAGMENT
107 :    
108 :     Time string to use for a record fragment.
109 :    
110 :     =cut
111 :    
112 :     use constant FRAGMENT => 'Fragment';
113 :    
114 :     =item NO_TIME
115 :    
116 :     Time string to use for an unknown time.
117 :    
118 :     =cut
119 :    
120 :     use constant NO_TIME => '(none)';
121 :    
122 :     =back
123 :    
124 :     =head2 Public Methods
125 :    
126 :     =head3 new
127 :    
128 :     C<< my $logrdr = LogReader->new($fileName, %options); >>
129 :    
130 :     Construct a new LogReader object.
131 :    
132 :     =over 4
133 :    
134 :     =item fileName
135 :    
136 :     Name of the log file to open.
137 :    
138 :     =item options
139 :    
140 :     Hash containing options.
141 :    
142 :     =back
143 :    
144 :     The permissible options are as follows.
145 :    
146 :     =over 4
147 :    
148 :     =item columnCount
149 :    
150 :     Number of columns expected in each record, including the last column that contains a free-form string.
151 :     The default is C<5>.
152 :    
153 :     =back
154 :    
155 :     =cut
156 :    
157 :     sub new {
158 :     # Get the parameters.
159 :     my ($class, $fileName, %options) = @_;
160 :     # Create the Log File Reader object.
161 :     my $retVal = {};
162 :     # Extract the number of columns from the options.
163 :     $retVal->{columnCount} = $options{columnCount} || 5;
164 :     # Open the file for input and save the handle.
165 :     my $fh = Open(undef, "<$fileName");
166 :     $retVal->{fh} = $fh;
167 :     # Get the file size.
168 :     $retVal->{fileSize} = -s $fh;
169 :     # The file size is the default stop point.
170 :     $retVal->{stop} = $retVal->{fileSize};
171 :     # Position at the start of the file.
172 :     $retVal->{buffer} = '';
173 :     $retVal->{tell} = 0;
174 :     # Denote we have no record in memory.
175 :     $retVal->{record} = undef;
176 :     # Bless and return the object.
177 :     bless $retVal, $class;
178 :     return $retVal;
179 :     }
180 :    
181 :     =head3 GetRecord
182 :    
183 :     my $record = $logrdr->GetRecord();
184 :    
185 :     Return the record at the current position and advance the file position
186 :     past it. The record will be a reference to a list of columns. The first
187 :     column will be the display-formatted time stamp. The last column will be
188 :     free-form text. The intervening columns will contain full strings if they
189 :     were present in the record, and empty strings if they were not. If we've
190 :     reached the end of the file, an undefined value will be returned.
191 :    
192 :     =cut
193 :    
194 :     sub GetRecord {
195 :     # Get the parameters.
196 :     my ($self) = @_;
197 :     # Declare the return variable.
198 :     my $retVal;
199 :     # Only proceed if the file is still operating; that is, we have not yet reached the
200 :     # end of the section.
201 :     if (defined $self->{fh}) {
202 :     # Get the current buffer content. It should contain the first line of
203 :     # the record, which would be a formatted line. If it is not a formatted
204 :     # line, it will be treated as a fragment.
205 :     my $buffer = $self->{buffer};
206 :     # We'll put the output columns in here.
207 :     my @cols;
208 :     # Check for a fragment.
209 :     if (substr($buffer, 0, 1) ne '[') {
210 :     # A fragment has a special timestamp value.
211 :     @cols = (FRAGMENT, $buffer);
212 :     } else {
213 :     # Here we have a real formatted line. We need to split it
214 :     # into columns. The following SPLIT will do the job, but
215 :     # we'll end up with extra columns containing nothing but a
216 :     # single space. We fix that using a GREP filter.
217 :     @cols = grep { $_ =~ /\S/ } split /\[(.+?)\]/, $buffer;
218 :     # Check for a time stamp.
219 :     my $time = Tracer::ParseDate($cols[0]);
220 :     # If the first column is not a time stamp, jam one in. Otherwise, replace it
221 :     # with a re-formatted time value.
222 :     if (! defined $time) {
223 :     unshift @cols, NO_TIME;
224 :     } else {
225 :     $cols[0] = Tracer::DisplayTime($time);
226 :     }
227 :     }
228 :     # Now we need to normalize the number of columns.
229 :     my $columnCount = $self->{columnCount};
230 :     while (scalar(@cols) > $columnCount) {
231 :     # Too many columns, so merge the last column with its predecessor.
232 :     my $lastCol = pop @cols;
233 :     $cols[$#cols] .= " " . $lastCol;
234 :     }
235 :     while (scalar(@cols) < $columnCount) {
236 :     # Too few columns, so add an empty one before the last one.
237 :     my $lastCol = pop @cols;
238 :     push @cols, '', $lastCol;
239 :     }
240 :     # We have our record. All that remains is to slurp in subsequent free-form lines.
241 :     # Set up to do some reading.
242 :     my $done = 0;
243 :     # Loop until we hit end-of-file or find the next formatted line.
244 :     while (! $done) {
245 :     # Pull the next line into the buffer.
246 :     my $found = $self->_ReadLine();
247 :     if (! $found) {
248 :     # We've hit end-of-file, so stop the record.
249 :     $done = 1;
250 :     } elsif (_Formatted($self->{buffer})) {
251 :     # This is the first line of the next record. Stop the loop.
252 :     $done = 1;
253 :     } else {
254 :     # This is a free-form line. Add it to the last column.
255 :     $cols[$#cols] .= $self->{buffer};
256 :     }
257 :     }
258 :     # Store the record found as the result.
259 :     $retVal = \@cols;
260 :     }
261 :     # Return the result.
262 :     return $retVal;
263 :     }
264 :    
265 :     =head3 FragmentString
266 :    
267 :     my $marker = LogReader::FragmentString();
268 :    
269 :     Return the string used to mark a record as a fragment.
270 :    
271 :     =cut
272 :    
273 :     sub FragmentString {
274 :     return FRAGMENT;
275 :     }
276 :    
277 :     =head3 AtEnd
278 :    
279 :     my $flag = $logrdr->AtEnd();
280 :    
281 :     Return TRUE if we're at the end of the section to be displayed, else FALSE.
282 :     The section is set by L</SetRegion>. If no section has been specified,
283 :     then the default extends from the beginning of the file to the end of the
284 :     file at the time it was opened.
285 :    
286 :     =cut
287 :    
288 :     sub AtEnd {
289 :     # Get the parameters.
290 :     my ($self) = @_;
291 :     # Compute the result.
292 :     my $retVal = ($self->{tell} >= $self->{stop});
293 :     # If we're at the end, close the handle. This is a precaution to prevent
294 :     # the file from being locked accidentally for an extended period.
295 :     if ($retVal && defined $self->{fh}) {
296 :     close $self->{fh};
297 :     $self->{fh} = undef;
298 :     # Clear the buffer. This insures that "ReadLine" doesn't update the tell
299 :     # value.
300 :     $self->{buffer} = '';
301 :     }
302 :     # Return the flag.
303 :     return $retVal;
304 :     }
305 :    
306 :     =head3 FileSize
307 :    
308 :     my $bytes = $logrdr->FileSize();
309 :    
310 :     Return the total number of bytes in the log file.
311 :    
312 :     =cut
313 :    
314 :     sub FileSize {
315 :     # Get the parameters.
316 :     my ($self) = @_;
317 :     # Return the result.
318 :     return $self->{fileSize};
319 :     }
320 :    
321 :     =head3 SetRegion
322 :    
323 :     $logrdr->SetRegion($start, $end);
324 :    
325 :     Set up to read the specified section of the log file.
326 :    
327 :     =over 4
328 :    
329 :     =item start
330 :    
331 :     Offset to the place where the reading should start.
332 :    
333 :     =item end
334 :    
335 :     Offset to the place where the reading should stop. Note that the
336 :     read operations may extend past this point if it is in the middle
337 :     of a line of text.
338 :    
339 :     =back
340 :    
341 :     =cut
342 :    
343 :     sub SetRegion {
344 :     # Get the parameters.
345 :     my ($self, $start, $end) = @_;
346 :     # Get the file handle.
347 :     my $fh = $self->{fh};
348 :     # Position the file at the specified start point.
349 :     $self->{tell} = $start;
350 :     seek $fh, $start, SEEK_SET;
351 :     Trace("SetRegion from $start to $end. Tell is " . tell($fh) . ".") if T(3);
352 :     # Read the first line into the buffer.
353 :     $self->{buffer} = <$fh>;
354 :     Trace(length($self->{buffer}) . " bytes in first buffer.") if T(3);
355 :     # Save the end point.
356 :     $self->{stop} = $end;
357 :     }
358 :    
359 :    
360 :     =head2 Private Methods
361 :    
362 :     =head3 _Formatted
363 :    
364 :     my $flag = _Formatted($line);
365 :    
366 :     Returns TRUE if the specified line is formatted, FALSE if it is
367 :     free-form. A formatted line contains one or more columns of data at the
368 :     beginning that are enclosed in square brackets and separated by spaces.
369 :    
370 :     =over 4
371 :    
372 :     =item line
373 :    
374 :     Line of input to examine.
375 :    
376 :     =item RETURN
377 :    
378 :     Returns TRUE if the line is formatted, else FALSE.
379 :    
380 :     =back
381 :    
382 :     =cut
383 :    
384 :     sub _Formatted {
385 :     # Get the parameters.
386 :     my ($line) = @_;
387 :     # Declare the return variable. We'll set this to TRUE if the line is formatted.
388 :     my $retVal = 0;
389 :     # Examine the line.
390 :     if ($line =~ /^\[.+?\]\s/) {
391 :     # We have a column, so we're formatted.
392 :     $retVal = 1;
393 :     }
394 :     # Return the result.
395 :     return $retVal;
396 :     }
397 :    
398 :     =head3 _ReadLine
399 :    
400 :     my $flag = $logrdr->_ReadLine();
401 :    
402 :     Read the next line of data into the buffer. Return TRUE if successful,
403 :     FALSE if we are at the end of the currently-selected region. If we are
404 :     at the end of the region, the file will be closed automatically.
405 :    
406 :     =cut
407 :    
408 :     sub _ReadLine {
409 :     # Get the parameters.
410 :     my ($self) = @_;
411 :     # Update the location.
412 :     $self->{tell} += length $self->{buffer};
413 :     # Check for end-of-section.
414 :     my $retVal = ! $self->AtEnd();
415 :     # If we're not at end-of-section, read the next line.
416 :     if ($retVal) {
417 :     Trace("Reading line at $self->{tell}.") if T(3);
418 :     my $fh = $self->{fh};
419 :     $self->{buffer} = <$fh>;
420 :     Trace(length($self->{buffer}) . " bytes read at $self->{tell}.") if T(3);
421 :     }
422 :     # Return the result.
423 :     return $retVal;
424 :     }
425 :    
426 :     1;
427 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3