[Bio] / FigKernelScripts / Packager.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/Packager.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 Packager
4 :    
5 :     Package all the files in a directory tree for transport or recreate the directory
6 :     tree from the packaged files.
7 :    
8 :     This method takes as input three positional parameters: a command, the name of the
9 :     root for the directory tree and the name of the package files. If the command is
10 :     C<pack>, the files are packaged into one or more text files. If the command is
11 :     C<unpack>, the files are unpackaged from the text files. If the command is
12 :     C<scan>, the files are examined to determine the file types.
13 :    
14 :     File and directory names should be specified using Unix conventions, with a
15 :     forward slash (C</>) instead of a backslash (C<\>).
16 :    
17 :     File names ending in a tilde (C<~>) or C<.bak> will be ignored when scanning or
18 :     packaging. These files are presumed to be temporary backups.
19 :    
20 :     The process of packaging is straightforward. We write out the name of the input file,
21 :     its type (text or binary), the file data itself, and an MD5 digest. Each file is
22 :     compressed using the PERL C<Compress::Zlib> libary.
23 :    
24 :     This is by no means the optimal way to package files, but it is simple to do and
25 :     can be customized for this project's special problems (such as incompatible file
26 :     names).
27 :    
28 :     It is important that the packager understand the difference between text files
29 :     and binary files. The line end characters in text files need to be translated
30 :     to the target platform's characters, while binary files must be moved without
31 :     translation. The file's extension is used to determine whether it is binary or
32 :     text. When packaging, we will look for a file called C<extensions.txt> in the
33 :     root directory of the tree. This file will have a list of binary file extensions,
34 :     one per line. The <scan> command can be used to create this file. If the file
35 :     is not found when packaging, it is presumed all files are text files.
36 :    
37 :     When packaging, line-end characters inside text files are converted to a single
38 :     line feed (C<\n>). When unpackaging, the line feeds are converted to the line-ending
39 :     character of the current platform.
40 :    
41 :     =head2 Temporary Files
42 :    
43 :     Temporary files in the directory tree are skipped when scanning or packaging. A file
44 :     is considered temporary if its name ends in a tilde (C<~>) or pound sign C<#>,
45 :     or it has an extension of C<.bak> or C<.tgz>.
46 :    
47 :     =cut
48 :    
49 :     use strict;
50 :     use Cwd 'abs_path';
51 :     use File::Copy;
52 :     use File::Path;
53 :     use Fcntl qw(:DEFAULT :seek);
54 :     use POSIX;
55 :     use Compress::Zlib;
56 :     use Digest::MD5;
57 :    
58 :     =head2 Constants
59 :    
60 :     =head3 CHUNK_SIZE
61 :    
62 :     Number of bytes to use when reading a survey chunk during file scans.
63 :     We take three chunks from the file of this size. Any binary character
64 :     (ASCII code > 127) indicates a binary file. A binary file is also
65 :     presumed if the number of non-whitespace control characters is above
66 :     a certain percentage.
67 :    
68 :     =cut
69 :     my $CHUNK_SIZE = 1024;
70 :    
71 :     =head3 SURVEY_SIZE
72 :    
73 :     Number of bytes in a file survey. A file survey consists of three chunks.
74 :    
75 :     =cut
76 :    
77 :     my $SURVEY_SIZE = $CHUNK_SIZE * 3;
78 :    
79 :     =head3 NON_WHITE
80 :    
81 :     Fraction of the non-white control characters in the survey data that
82 :     would classify the file as binary.
83 :    
84 :     =cut
85 :    
86 :     my $NON_WHITE = 0.25;
87 :    
88 :     =head3 MAX_PACKAGE_SIZE
89 :    
90 :     Maximum legal size for a package file.
91 :    
92 :     =cut
93 :    
94 :     my $MAX_PACKAGE_SIZE = 0x7FFFFFFF;
95 :    
96 :     =head3 NL_LEN
97 :    
98 :     Number of characters in a line-end sequence.
99 :    
100 :     =cut
101 :    
102 :     my $NL_LEN = length($/);
103 :    
104 :     =head3 PACK_BLOCK
105 :    
106 :     Number of characters in a package block.
107 :    
108 :     =cut
109 :    
110 :     my $PACK_BLOCK = 4096;
111 :    
112 :     # OPEN CODE
113 :    
114 :     # Get the command, the file directory name, and the package file name.
115 :     my ($command, $treeDirectory, $packageFile) = @ARGV;
116 :     # Get the absolute path for the directory tree.
117 :     my $absDirectory = abs_path($treeDirectory);
118 :     # Trim off any trailing slash.
119 :     if ($absDirectory =~ m!/$!) {
120 :     chop $absDirectory;
121 :     }
122 :     # Process the command.
123 :     $command = lc $command;
124 :     if ($command eq 'scan') {
125 :     ScanTree($absDirectory);
126 :     } elsif ($command eq 'pack') {
127 :     PackTree($absDirectory, $packageFile);
128 :     } elsif ($command eq 'unpack') {
129 :     UnPackTree($absDirectory, $packageFile);
130 :     } else {
131 :     print "Invalid command \"$command\".\n";
132 :     }
133 :     print "Processing complete.\n";
134 :    
135 :     =head2 Methods
136 :    
137 :     =head3 ScanTree
138 :    
139 :     C<< ScanTree($directory); >>
140 :    
141 :     Scan all files in the specified directory and create an C<extensions.txt> file
142 :     listing the extensions of files that are binary.
143 :    
144 :     =cut
145 :    
146 :     sub ScanTree {
147 :     my ($directory) = @_;
148 :     # Create a hash of file extensions. The first time a file with a particular
149 :     # extension is found, it is scanned for binary characters. If such characters
150 :     # are found, the file is added to the hash with a value of "b"; otherwise it is
151 :     # added with a value of "t".
152 :     my %extensions = ();
153 :     # Create a directory object.
154 :     my $dirObject = OpenDirectory($directory);
155 :     # Loop through the files in the tree, scanning each one.
156 :     while (my $fileName = GetFileName($dirObject)) {
157 :     # Get the file name relative to the root and the file extension.
158 :     my $relName = $dirObject->{relName};
159 :     my $suffix = $dirObject->{suffix};
160 :     # Only proceed if this is a new file type.
161 :     if (exists $extensions{$suffix}) {
162 :     print "Skipping $relName.\n";
163 :     } else {
164 :     # Get the file size.
165 :     my $size = -s $fileName;
166 :     # If the file is empty, skip it.
167 :     if ($size == 0) {
168 :     print "File $relName is empty: ignored.\n";
169 :     } else {
170 :     # Here the file is worth scanning.
171 :     print "Scanning $relName($size) for type of \"$suffix\"... ";
172 :     # Determine the type of the file.
173 :     my $type = ScanFile($fileName, $size);
174 :     print "$type\n";
175 :     # Add the extension to the hash.
176 :     $extensions{$suffix} = substr($type, 0, 1);
177 :     }
178 :     }
179 :     }
180 :     # Now we write the extension file.
181 :     print "Creating extensions file.\n";
182 :     (open EXTFILE, ">$directory/extensions.txt") ||
183 :     die "Could not open extensions file: $!";
184 :     my $count = 0;
185 :     for my $ext (keys %extensions) {
186 :     if ($extensions{$ext} eq "b") {
187 :     (print EXTFILE "$ext\n") ||
188 :     die "Error writing extensions file: $!";
189 :     $count++;
190 :     }
191 :     }
192 :     close EXTFILE;
193 :     print "$count binary extensions found.\n";
194 :     print InSummary($dirObject);
195 :     }
196 :    
197 :     =head3 ScanFile
198 :    
199 :     C<< my $type = ScanFile($fileName, $size); >>
200 :    
201 :     Survey the specified file of the specified size to determine whether it is
202 :     text or binary. A file is binary if it has a lot of control characters or
203 :     any character whose ASCII value is greater than 127.
204 :    
205 :     =cut
206 :    
207 :     sub ScanFile {
208 :     my ($fileName, $size) = @_;
209 :     # Open the file for binary input.
210 :     (sysopen INFILE, $fileName, O_RDONLY + O_BINARY) ||
211 :     die "Could not scan $fileName: $!";
212 :     # Survey the file. We take chunks from the beginning, end, and middle.
213 :     # if the file has fewer than three chunks of characters, we read the
214 :     # whole thing.
215 :     my $survey = "";
216 :     if ($size <= $SURVEY_SIZE) {
217 :     (sysread INFILE, $survey, $SURVEY_SIZE) ||
218 :     die "Error reading $fileName: $!";
219 :     } else {
220 :     (sysread INFILE, $survey, $CHUNK_SIZE) ||
221 :     die "Error reading start of $fileName: $!";
222 :     my $position = int($size - $CHUNK_SIZE / 2);
223 :     (sysseek INFILE, $position, SEEK_SET) ||
224 :     die "Error moving to middle of $fileName: $!";
225 :     (sysread INFILE, $survey, $CHUNK_SIZE, $CHUNK_SIZE) ||
226 :     die "Error reading middle of $fileName: $!";
227 :     (sysseek INFILE, -$CHUNK_SIZE, SEEK_END) ||
228 :     die "Error moving to end of $fileName: $!";
229 :     (sysread INFILE, $survey, 2*$CHUNK_SIZE, $CHUNK_SIZE) ||
230 :     die "Error reading end of $fileName: $!";
231 :     }
232 :     close INFILE;
233 :     my $surveyLen = length $survey;
234 :     # Now $survey contains a bunch of bytes from the file. We run through them
235 :     # counting non-white control characters and looking for binary characters.
236 :     # First, we assume the file is text until we learn otherwise.
237 :     my $retVal = "text";
238 :     # This variable will contain the number of non-white control characters
239 :     # required to prove the file is binary.
240 :     my $controls = ceil($NON_WHITE * $surveyLen);
241 :     # Here we loop through the survey characters.
242 :     while ($retVal eq "text" && (my $chr = chop $survey)) {
243 :     # Process according to the character type.
244 :     if ($chr gt chr(127)) {
245 :     # Here we have a true binary character.
246 :     $retVal = "binary";
247 :     } elsif ($chr lt " " && $chr !~ /\s/) {
248 :     # Here we have a non-white control character.
249 :     $controls--;
250 :     if (! $controls) { $retVal = "binary"; }
251 :     }
252 :     }
253 :     # Return the file type.
254 :     return $retVal;
255 :     }
256 :    
257 :     =head3 OpenDirectory
258 :    
259 :     C<< my $dirObject = OpenDirectory($directoryName); >>
260 :    
261 :     This method returns an object that can be used to find all the files in a directory
262 :     tree. The incoming parameter is the directory tree name, properly cleaned and with
263 :     the trailing slash removed.
264 :    
265 :     =cut
266 :    
267 :     sub OpenDirectory {
268 :     my ($directoryName) = @_;
269 :     return {
270 :     # The root name length enables us to separate the relative file name from
271 :     # the absolute file name.
272 :     rootLen => length($directoryName) + 1,
273 :     # This is a stack of the files and directories still to be processed.
274 :     stack => [$directoryName],
275 :     # These are counters for various file types.
276 :     tempCount => 0,
277 :     dirCount => 0,
278 :     foundCount => 0
279 :     };
280 :     }
281 :    
282 :     =head3 GetFileName
283 :    
284 :     C<< my $fileName = GetFileName($dirObject); >>
285 :    
286 :     This method returns the next file name for a directory tree. The incoming parameter is
287 :     a directory object returned by OpenDirectory. If all the files have been processed, the
288 :     method returns an undefined value.
289 :    
290 :     =cut
291 :    
292 :     sub GetFileName {
293 :     my ($dirObject) = @_;
294 :     # Loop until we find a file name or empty the stack.
295 :     my $retVal;
296 :     my $done = 0;
297 :     while (! $done) {
298 :     # Get the next file name.
299 :     my $nextName = pop @{$dirObject->{stack}};
300 :     if (! $nextName) {
301 :     # The stack is empty, so we're done.
302 :     $done = 1;
303 :     } elsif (! -d $nextName) {
304 :     # Here we've found a real file. Now we need to check for a temporary file name.
305 :     if ($nextName =~ /(~|#|\.bak|\.tgz)$/) {
306 :     # We have a temporary file name, so we count it and keep going.
307 :     $dirObject->{tempCount}++;
308 :     } else {
309 :     # Here we have a non-temporary name, so we're done.
310 :     $done = 1;
311 :     $retVal = $nextName;
312 :     $dirObject->{foundCount}++;
313 :     # Compute the file's relative name and extension, then stuff them in
314 :     # the object.
315 :     $dirObject->{relName} = substr $nextName, $dirObject->{rootLen};
316 :     my ($path, $suffix) = NameParse($nextName);
317 :     $dirObject->{suffix} = lc $suffix;
318 :     }
319 :     } else {
320 :     # Here we have a directory. We need to push all its members on the stack.
321 :     # Note that file names beginning with a period are ignored.
322 :     opendir NEXTDIR, $nextName;
323 :     my @files = grep { $_ =~ /^[^.]/ } readdir NEXTDIR;
324 :     closedir NEXTDIR;
325 :     push @{$dirObject->{stack}}, map { "$nextName/$_" } @files;
326 :     $dirObject->{dirCount}++;
327 :     }
328 :     }
329 :     return $retVal;
330 :     }
331 :    
332 :     =head3 InSummary
333 :    
334 :     C<< print InSummary($dirObject); >>
335 :    
336 :     Return a summary of the files processed by a directory object.
337 :    
338 :     =cut
339 :    
340 :     sub InSummary {
341 :     my ($dirObject) = @_;
342 :     my $dirCount = $dirObject->{dirCount};
343 :     my $tempCount = $dirObject->{tempCount};
344 :     my $foundCount = $dirObject->{foundCount};
345 :     return "$foundCount files found, $tempCount temporary files ignored, $dirCount directories processed.\n";
346 :     }
347 :    
348 :     =head3 NameParse
349 :    
350 :     C<< my ($path, $suffix) = NameParse($fileName); >>
351 :    
352 :     Separate the directory path and suffix out of a file name. Note that if the suffix
353 :     is entirely numeric, we return a suffix of "#".
354 :    
355 :     =cut
356 :    
357 :     sub NameParse {
358 :     my ($fileName) = @_;
359 :     # Split the file into path pieces.
360 :     my @pieces = split /\//, $fileName;
361 :     # Peel off the last piece.
362 :     my $baseName = pop @pieces;
363 :     # Form the path from everything remaining.
364 :     my $path = join "/", @pieces;
365 :     # Split the extension off the base name.
366 :     my $suffix = '';
367 :     if ($baseName =~ /\.\d+$/) {
368 :     $suffix = '#';
369 :     } elsif ($baseName =~ /\.([^.]+)$/) {
370 :     $suffix = $1;
371 :     }
372 :     # Return the results.
373 :     return ($path, $suffix);
374 :     }
375 :    
376 :     =head3 StartFile
377 :    
378 :     C<< StartFile($mode, $fileName); >>
379 :    
380 :     Open the specified file for compression input. The file handle used is CLEARFILE.
381 :    
382 :     =cut
383 :    
384 :     sub StartFile {
385 :     my ($mode, $fileName) = @_;
386 :     # Determine the type of file.
387 :     if ($mode eq 't') {
388 :     # Here we have a text file.
389 :     (open CLEARFILE, "<$fileName") ||
390 :     die "Could not open text input file $fileName: $!";
391 :     } else {
392 :     # Here we have a binary file.
393 :     (sysopen CLEARFILE, $fileName, O_RDONLY | O_BINARY) ||
394 :     die "Could not open binary input file $fileName: $!";
395 :     }
396 :     }
397 :    
398 :     =head3 GetCharacters
399 :    
400 :     C<< my $characters = GetCharacters($mode); >>
401 :    
402 :     Get some characters to compress from the current input file. If we have reached
403 :     end-of-file, returns an empty list.
404 :    
405 :     =cut
406 :    
407 :     sub GetCharacters {
408 :     my ($mode) = @_;
409 :     # Declare the return variable.
410 :     my $retVal;
411 :     # Determine the type of file.
412 :     if ($mode eq 't') {
413 :     # Here we have a text file. We read a line of text. Since it's open as
414 :     # a text file, the line-end will be translated for us.
415 :     $retVal = <CLEARFILE>;
416 :     if (! defined $retVal) {
417 :     # Here we either have end-of-file or a file error.
418 :     if ($?) {
419 :     die "Text input error: $!";
420 :     } else {
421 :     # If it's end-of-file, we close the file.
422 :     close CLEARFILE;
423 :     }
424 :     }
425 :     } else {
426 :     # Here we have a binary file. We read a fixed-length text chunk.
427 :     my $textLine;
428 :     my $result = sysread CLEARFILE, $textLine, $CHUNK_SIZE;
429 :     if (! defined $result) {
430 :     # Here we have a file error.
431 :     die "Binary input error: $!";
432 :     } elsif ($result == 0) {
433 :     # Here we have end-of-file, so we close the file.
434 :     close CLEARFILE;
435 :     } else {
436 :     # Here we have data to return.
437 :     $retVal = $textLine;
438 :     }
439 :     }
440 :     return $retVal;
441 :     }
442 :    
443 :     =head3 GetExtensions
444 :    
445 :     C<< my %extHash = GetExtensions($directory); >>
446 :    
447 :     Create a hash containing the extensions of the binary files in the specified
448 :     directory tree.
449 :    
450 :     =cut
451 :    
452 :     sub GetExtensions {
453 :     my ($directory) = @_;
454 :     # Declare the return variable.
455 :     my %retVal = ();
456 :     # Check for the extensions file.
457 :     my $fileName = "$directory/extensions.txt";
458 :     if (! -e $fileName) {
459 :     print "No extensions file found. All files are treated as text.\n";
460 :     } else {
461 :     # The file exists, so we read it into the hash.
462 :     (open EXTFILE, "<$fileName") ||
463 :     die "Could not open extensions file: $!";
464 :     while (my $line = <EXTFILE>) {
465 :     chomp $line;
466 :     $retVal{$line} = 'b';
467 :     }
468 :     }
469 :     # Return the hash.
470 :     return %retVal;
471 :     }
472 :    
473 :     =head3 StartPackaging
474 :    
475 :     C<< my $packObject = StartPackaging($packageFileName); >>
476 :    
477 :     Prepare to create the package file.
478 :    
479 :     =cut
480 :    
481 :     sub StartPackaging {
482 :     my ($packageFileName) = @_;
483 :     # Open the package file.
484 :     (sysopen PACKFILE, "$packageFileName", O_WRONLY | O_CREAT | O_TRUNC | O_BINARY) ||
485 :     die "Could not open package file $packageFileName: $!";
486 :     # Return an object telling us how we're doing.
487 :     return {
488 :     packName => $packageFileName,
489 :     fileSize => 0,
490 :     buffer => '',
491 :     bufferSize => 0,
492 :     outCount => 0
493 :     };
494 :     }
495 :    
496 :     =head3 WritePackageLine
497 :    
498 :     C<< WritePackageLine($packObject, $line); >>
499 :    
500 :     Write a line of text data to the package file. A text line is preceded by a space character
501 :     and is terminated by a line feed character.
502 :    
503 :     =cut
504 :    
505 :     sub WritePackageLine {
506 :     my ($packObject, $line) = @_;
507 :     # Write the line to the file.
508 :     (print PACKFILE " $line\n") ||
509 :     die "Error writing to package file: $!";
510 :     # Update the file size.
511 :     $packObject->{fileSize} += length($line) + 2;
512 :     }
513 :    
514 :     =head3 FinishPackaging
515 :    
516 :     C<< FinishPackaging($packObject); >>
517 :    
518 :     Finish the packaging process. This basically closes the package file.
519 :    
520 :     =cut
521 :    
522 :     sub FinishPackaging {
523 :     close PACKFILE;
524 :     }
525 :    
526 :     =head3 StartCompressedData
527 :    
528 :     C<< StartCompressedData($packObject, $compTable); >>
529 :    
530 :     Initialize for writing compressed file data. Compressed file data is output in fixed-
531 :     length blocks preceded by a C<b> character. (This distinguishes them from text lines,
532 :     which begin with a space.) The last block may be short. It is preceded by an C<e>
533 :     character and the next four bytes contain the length in decimal. (So, for example,
534 :     C<e0012> would indicate a 12-byte final block.)
535 :    
536 :     =cut
537 :    
538 :     sub StartCompressedData {
539 :     my ($packObject, $compTable) = @_;
540 :     # Initialize the buffer data and the output count.
541 :     $packObject->{buffer} = '';
542 :     $packObject->{bufferLen} = 0;
543 :     $packObject->{outCount} = 0;
544 :     # Remember the compression table.
545 :     $packObject->{compTable} = $compTable;
546 :     # Create the checksum.
547 :     $packObject->{digest} = Digest::MD5->new();
548 :     }
549 :    
550 :     =head3 WriteCompressedData
551 :    
552 :     C<< WriteCompressedData($packObject, $data, $status); >>
553 :    
554 :     Add a block of data to the compressed output. The data is accumulated until we fill
555 :     an entire block, at which time it is written to the file with the appropriate marker.
556 :    
557 :     =cut
558 :    
559 :     sub WriteCompressedData {
560 :     my ($packObject, $data, $status) = @_;
561 :     # Check the status code to see if there's an error.
562 :     if ($status != Z_OK) {
563 :     die "Compression error: " . $packObject->{comptable}->msg();
564 :     } else {
565 :     # Get the amount of data we have.
566 :     my $len = length($data);
567 :     # Find out how much room is in the buffer.
568 :     my $room = $PACK_BLOCK - $packObject->{bufferLen};
569 :     # Now we put the data into blocks. At all times, $residual
570 :     # will be the data not transmitted, $room the room left in
571 :     # the block, and $len the length of the residual.
572 :     my $residual = $data;
573 :     while ($room < $len) {
574 :     # Put as much data into the buffer as will fit.
575 :     $packObject->{buffer} .= substr($residual, 0, $room);
576 :     # Remove the data from the residual.
577 :     $residual = substr($residual, $room);
578 :     $len -= $room;
579 :     # Write the buffer.
580 :     print PACKFILE "b" . $packObject->{buffer};
581 :     $packObject->{outCount} += $PACK_BLOCK + 1;
582 :     # Denote that the buffer is now empty.
583 :     $packObject->{bufferLen} = 0;
584 :     $packObject->{buffer} = '';
585 :     $room = $PACK_BLOCK;
586 :     }
587 :     # Put the remaining text into the buffer.
588 :     $packObject->{buffer} .= $residual;
589 :     $packObject->{bufferLen} += $len;
590 :     }
591 :     }
592 :    
593 :     =head3 WriteEndMark
594 :    
595 :     C<< my $outCount = WriteEndMark($packObject); >>
596 :    
597 :     Write out the last block of a compressed data stream and return the total number of bytes
598 :     in the stream.
599 :    
600 :     =cut
601 :    
602 :     sub WriteEndMark {
603 :     my ($packObject) = @_;
604 :     # Get the number of bytes in the buffer.
605 :     my $len = $packObject->{bufferLen};
606 :     # Format the prefix.
607 :     my $prefix = "$len";
608 :     while (length($prefix) < 4) {
609 :     $prefix = "0$prefix";
610 :     }
611 :     # Insert the checksum.
612 :     $prefix .= $packObject->{digest}->hexdigest;
613 :     # Write the prefix and the buffer content.
614 :     print PACKFILE "e$prefix" . $packObject->{buffer};
615 :     # Compute the total length output.
616 :     my $retVal = $len + 37 + $packObject->{outCount};
617 :     # Add it to the package output counter.
618 :     $packObject->{fileSize} += $retVal;
619 :     # Return the total length output.
620 :     return $retVal;
621 :     }
622 :    
623 :     =head3 PackTree
624 :    
625 :     C<< PackTree($directory, $packageFileName); >>
626 :    
627 :     Package all files in a directory tree into a package file.
628 :    
629 :     The C<extensions.txt> file in the root of the directory tree is used to determine
630 :     which file extensions indicate binary files.
631 :    
632 :     =cut
633 :    
634 :     sub PackTree {
635 :     my ($directory, $packageFileName) = @_;
636 :     # Get the extensions hash. This is used to separate text files
637 :     # from binary files.
638 :     my %extHash = GetExtensions($directory);
639 :     # Set up to output to the package files.
640 :     my $packObject = StartPackaging($packageFileName);
641 :     # Create a directory object.
642 :     my $dirObject = OpenDirectory($directory);
643 :     # Create file type counters.
644 :     my %fileCounters = ( t => 0, b => 0 );
645 :     # Loop through the files in the tree, packaging each one.
646 :     while (my $fileName = GetFileName($dirObject)) {
647 :     # Decide whether this file is text or binary. We begin by separating
648 :     # the file name into its path and extension.
649 :     my ($path, $suffix) = NameParse($fileName);
650 :     # Check the extension hash for the suffix.
651 :     my $mode = (exists $extHash{$suffix} ? "b" : "t");
652 :     # Output the relative file name.
653 :     my $relName = $dirObject->{relName};
654 :     WritePackageLine($packObject, "$mode $relName");
655 :     # Update the appropriate file counter.
656 :     $fileCounters{$mode}++;
657 :     # Now we compress the file.
658 :     PackFile($fileName, $packObject, $mode);
659 :     }
660 :     # Close the package output stream.
661 :     FinishPackaging($packObject);
662 :     # Tell the user how much package file we have.
663 :     print $packObject->{fileSize} . " bytes written to package file.\n";
664 :     # Display the text/binary split.
665 :     print "$fileCounters{t} text files, $fileCounters{b} binary files compressed.\n";
666 :     # Write the summary of files processed.
667 :     print InSummary($dirObject);
668 :     }
669 :    
670 :     =head3 PackFile
671 :    
672 :     C<< PackFile($fileName, $packObject, $mode); >>
673 :    
674 :     Compress the specified file to the specified package file in the specified mode.
675 :    
676 :     =cut
677 :    
678 :     sub PackFile {
679 :     my ($fileName, $packObject, $mode) = @_;
680 :     print "Packaging $fileName ($mode).\n";
681 :     # Create a compression object.
682 :     my $compTable = deflateInit();
683 :     # Initialize for reading the input file.
684 :     StartFile($mode, $fileName);
685 :     # Initialize for writing the output.
686 :     StartCompressedData($packObject, $compTable);
687 :     # Loop through the input.
688 :     while (my $chars = GetCharacters($mode)) {
689 :     # Add the data to the checksum.
690 :     $packObject->{digest}->add($chars);
691 :     # Compress this data.
692 :     my ($output, $status) = $compTable->deflate($chars);
693 :     # Write it to the output.
694 :     WriteCompressedData($packObject, $output, $status);
695 :     }
696 :     # Flush out any remaining data.
697 :     my ($output, $status) = $compTable->flush();
698 :     WriteCompressedData($packObject, $output, $status);
699 :     # Terminate the file.
700 :     my $outCount = WriteEndMark($packObject);
701 :     # Display the compression ratio.
702 :     my $fileSize = $compTable->total_in();
703 :     my $ratio = ($fileSize ? int($outCount * 100 / $fileSize + 0.5) : 0);
704 :     print "$fileSize characters in, $outCount characters out ($ratio%)\n";
705 :     }
706 :    
707 :     =head3 UnPackTree
708 :    
709 :     C<< UnPackTree($directory, $packageFile); >>
710 :    
711 :     Read file data from a package file and put it into a directory tree.
712 :    
713 :     The package file consists of a series of compressed files. The package file contains
714 :     three types of records. A text record begins with a space and is terminated by a
715 :     new-line character (C<\n>). A block record has a fixed length and begins with a C<b>.
716 :     An end record begins with an C<e> and a four-digit length, followed by data of that
717 :     length. A file contains a text record with the file type (C<t> for text and C<b> for
718 :     binary) and its name relative to the root of the tree, zero or more block records, and
719 :     one end record.
720 :    
721 :     To prevent file system compatibility problems, the name is automatically cleaned
722 :     before the file is created: all spaces and colons are converted to underscores.
723 :    
724 :     Text files are open in text mode and binary files in binary mode. As a result, the
725 :     line-end characters in a text file are automatically translated to the line-end
726 :     character of the target operating system.
727 :    
728 :     =cut
729 :    
730 :     sub UnPackTree {
731 :     my ($directory, $packageFile) = @_;
732 :     # Open the package file for input.
733 :     my $packObject = StartUnPacking($packageFile);
734 :     # Create some file counters.
735 :     my %fileCounters = ( b => 0, t => 0 );
736 :     # Loop through the package one file at a time.
737 :     while (my $fileData = GetTextRecord($packObject)) {
738 :     # Extract the file name and mode.
739 :     my $mode = substr($fileData,0,1);
740 :     my $fileName = substr($fileData,2);
741 :     print "Unpacking $fileName ($mode).\n";
742 :     # Create the decompression object.
743 :     my $compObject = inflateInit();
744 :     # Create the output file.
745 :     my $fileObject = CreateFile($fileName, $mode, $directory, $compObject);
746 :     # Loop through the package file, de-compressing the data.
747 :     while (my $block = GetBlock($packObject)) {
748 :     # Decompress the current block.
749 :     my ($data, $status) = $compObject->inflate($block);
750 :     # Write the decompressed data.
751 :     WriteData($fileObject, $data, $status);
752 :     }
753 :     # Verify the checksums.
754 :     my ($expected, $found) = ($packObject->{digest}, $fileObject->{digest}->hexdigest);
755 :     if ($expected ne $found) {
756 :     # Here we have a mismatch.
757 :     die "Invalid checksum found unpacking file $fileName: expected $expected, found $found.";
758 :     }
759 :     # Count this file.
760 :     $fileCounters{$mode}++;
761 :     # Close it.
762 :     close CLEARFILE;
763 :     }
764 :     # Close the package file.
765 :     close PACKFILE;
766 :     # Tell the user what we did.
767 :     print "$fileCounters{t} text files unpacked, $fileCounters{b} binary files unpacked.\n";
768 :     print $packObject->{inCounter} . " package file bytes read.\n";
769 :     }
770 :    
771 :     =head3 StartUnPacking
772 :    
773 :     C<< my $packObject = StartUnPacking($packageFile); >>
774 :    
775 :     Prepare to read from a package file.
776 :    
777 :     =cut
778 :    
779 :     sub StartUnPacking {
780 :     my ($packageFile) = @_;
781 :     # Open the package file for binary input.
782 :     (sysopen PACKFILE, $packageFile, O_RDONLY | O_BINARY) ||
783 :     die "Could not open $packageFile: $!";
784 :     # Return the package file input object.
785 :     return {
786 :     blockEnd => 0,
787 :     inCounter => 0
788 :     }
789 :     }
790 :    
791 :     =head3 GetTextRecord
792 :    
793 :     C<< my $data = GetTextRecord($packObject); >>
794 :    
795 :     Read a text record from the package file. The text record begins with a space and
796 :     ends with a new-line character.
797 :    
798 :     =cut
799 :    
800 :     sub GetTextRecord {
801 :     my ($packObject) = @_;
802 :     # Declare the return variable.
803 :     my $retVal;
804 :     # Read the next character.
805 :     my $buffer;
806 :     my $rv = read PACKFILE, $buffer, 1;
807 :     # Check to insure we read a space.
808 :     if (! defined $rv) {
809 :     # Here the read failed.
810 :     die "Error reading package file: $!";
811 :     } elsif ($rv == 0) {
812 :     # Here we've reached end-of-file. We'll return an undefined value
813 :     # to the caller.
814 :     } elsif ($buffer ne " ") {
815 :     # Here we didn't find a space: the file is corrupt.
816 :     die "Text record not found when expected in package file.";
817 :     } else {
818 :     # Here we have a real, live text record. We loop until we find a new-line.
819 :     $retVal = "";
820 :     $rv = read PACKFILE, $buffer, 1;
821 :     while (defined $rv && $rv == 1 && $buffer ne "\n") {
822 :     $retVal .= $buffer;
823 :     $rv = read PACKFILE, $buffer, 1;
824 :     }
825 :     # Check for error conditions.
826 :     if (! defined $rv) {
827 :     die "Error reading package file: $!";
828 :     } elsif ($rv == 0) {
829 :     die "End-of-file inside text record in package file.";
830 :     }
831 :     # Update the input counter.
832 :     $packObject->{inCounter} += length($retVal) + 2;
833 :     # Denote we haven't read an end block.
834 :     $packObject->{blockEnd} = 0;
835 :     }
836 :     return $retVal;
837 :     }
838 :    
839 :     =head3 GetBlock
840 :    
841 :     C<< my $block = GetBlock($packObject); >>
842 :    
843 :     Read a block of compressed data from the package file. If the end of the compressed data
844 :     has been reached, will return an undefined value.
845 :    
846 :     =cut
847 :    
848 :     sub GetBlock {
849 :     my ($packObject) = @_;
850 :     # Declare the return value.
851 :     my $retVal;
852 :     # Only proceed if we have NOT reached the end block.
853 :     if (! $packObject->{blockEnd}) {
854 :     # Read the indicator character.
855 :     my $buffer;
856 :     my $rv = read PACKFILE, $buffer, 1;
857 :     # Check for errors.
858 :     if (! defined $rv) {
859 :     die "Error reading package block mark: $!";
860 :     } elsif ($rv == 0) {
861 :     die "End-of-file read where block mark expected in package file.";
862 :     } elsif ($buffer eq 'b') {
863 :     # Here we have a normal block. We read it directly into the
864 :     # return variable.
865 :     $rv = read PACKFILE, $retVal, $PACK_BLOCK;
866 :     # Check for errors.
867 :     if (! defined $rv) {
868 :     die "Error reading package block data: $!";
869 :     } elsif ($rv < $PACK_BLOCK) {
870 :     die "Unexpected end-of-file inside package file block.";
871 :     } else {
872 :     # Record the bytes read.
873 :     $packObject->{inCounter} += $PACK_BLOCK + 1;
874 :     }
875 :     } elsif ($buffer eq 'e') {
876 :     # Here we have an end block. We need to read the block length
877 :     # and the checksum.
878 :     $rv = read PACKFILE, $buffer, 36;
879 :     # Check for errors.
880 :     if (! defined $rv) {
881 :     die "Error reading end block length: $!";
882 :     } elsif ($rv < 36) {
883 :     die "Unexpected end-of-file reading end block length and checksum.";
884 :     } else {
885 :     # Get the length and the checksum.
886 :     my $lengthCode = substr $buffer, 0, 4;
887 :     $packObject->{digest} = substr $buffer, 4, 32;
888 :     # Check for an empty block.
889 :     if ($lengthCode == 0) {
890 :     # Here the end block is empty. We will return an undefined value,
891 :     # indicating end-of-file. Record the 36 bytes read.
892 :     $packObject->{inCounter} += 36;
893 :     } else {
894 :     # Here we have a valid block length, so we read the block.
895 :     $rv = read PACKFILE, $retVal, $lengthCode;
896 :     # Check for errors.
897 :     if (! defined $rv) {
898 :     die "Error reading end block: $!";
899 :     } elsif ($rv < $lengthCode) {
900 :     die "Unexpected end-of-file inside end block.";
901 :     } else {
902 :     # Denote the next read will return end-of-file.
903 :     $packObject->{blockEnd} = 1;
904 :     # Record the bytes read.
905 :     $packObject->{inCounter} += $lengthCode + 37;
906 :     }
907 :     }
908 :     }
909 :     } else {
910 :     # Here we have an invalid block mark.
911 :     die "Invalid block mark read in package file.";
912 :     }
913 :     }
914 :     return $retVal;
915 :     }
916 :    
917 :     =head3 CreateFile
918 :    
919 :     C<< my $fileObject = CreateFile($fileName, $mode, $directory, $compObject); >>
920 :    
921 :     Create a file into which packaged data can be unpacked.
922 :    
923 :     =cut
924 :    
925 :     sub CreateFile {
926 :     my ($fileName, $mode, $directory, $compObject) = @_;
927 :     # Clear the file name.
928 :     $fileName =~ tr/: /__/;
929 :     # Add the file name to the directory to get the fully-qualified file name.
930 :     my $fullName = "$directory/$fileName";
931 :     # Extract the path.
932 :     my ($path, $suffix) = NameParse($fullName);
933 :     # Insure the path exists.
934 :     if (! -d $path) {
935 :     mkpath($path);
936 :     }
937 :     # Check for the directory problem.
938 :     if (-d $fullName) {
939 :     # Here a directory has the same name as the file. We append a suffix
940 :     # to work around the problem.
941 :     $fullName .= ".$mode";
942 :     }
943 :     # Open the file in the appropriate mode.
944 :     if ($mode eq 't') {
945 :     (open CLEARFILE, ">$fullName") ||
946 :     die "Could not open text file $fullName: $!";
947 :     } else {
948 :     (sysopen CLEARFILE, $fullName, O_WRONLY | O_CREAT | O_BINARY) ||
949 :     die "Could not open binary file $fullName: $!";
950 :     }
951 :     # Create a file manipulation object to return.
952 :     return {
953 :     mode => $mode,
954 :     compObject => $compObject,
955 :     digest => Digest::MD5->new()
956 :     };
957 :     }
958 :    
959 :     =head3 WriteData
960 :    
961 :     C<< WriteData($fileObject, $data, $status); >>
962 :    
963 :     Write a block of data to the output file.
964 :    
965 :     =cut
966 :    
967 :     sub WriteData {
968 :     my ($fileObject, $data, $status) = @_;
969 :     # Check the status code from the compressor.
970 :     if ($status != Z_OK && $status != Z_STREAM_END) {
971 :     die "Error in decompression: " . $fileObject->{compObject}->msg();
972 :     } else {
973 :     # Here we're okay, so we can write the data.
974 :     print CLEARFILE $data;
975 :     # Add it to the checksum.
976 :     $fileObject->{digest}->add($data);
977 :     }
978 :     }
979 :    
980 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3