[Bio] / FigKernelScripts / Packager.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/Packager.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 Packager
4 :    
5 : parrello 1.3 C<Packager> [I<options>] C<scan>|C<pack>|C<unpack> I<directoryRoot> I<packageFile>
6 :    
7 : parrello 1.1 Package all the files in a directory tree for transport or recreate the directory
8 :     tree from the packaged files.
9 :    
10 : parrello 1.3 This method takes as input three positional parameters.
11 :    
12 :     =over 4
13 :    
14 :     =item command
15 :    
16 :     C<pack> to package the files into a single package file, <unpack> to restore
17 :     the files from the package file, or C<scan> to scan the files to determine
18 :     file types.
19 :    
20 :     =item directoryRoot
21 :    
22 :     Root directory of the directory tree. If scanning or packing, all files in this
23 :     tree will be packaged. If unpacking, the files are unpacked into this tree.
24 :    
25 :     =item packageFile
26 :    
27 :     Name of the package file. If packing, this is the output file. If unpacking,
28 :     this is the input file.
29 :    
30 :     =back
31 : parrello 1.1
32 :     File and directory names should be specified using Unix conventions, with a
33 :     forward slash (C</>) instead of a backslash (C<\>).
34 :    
35 :     The process of packaging is straightforward. We write out the name of the input file,
36 :     its type (text or binary), the file data itself, and an MD5 digest. Each file is
37 :     compressed using the PERL C<Compress::Zlib> libary.
38 :    
39 :     This is by no means the optimal way to package files, but it is simple to do and
40 :     can be customized for this project's special problems (such as incompatible file
41 :     names).
42 :    
43 :     It is important that the packager understand the difference between text files
44 :     and binary files. The line end characters in text files need to be translated
45 :     to the target platform's characters, while binary files must be moved without
46 :     translation. The file's extension is used to determine whether it is binary or
47 :     text. When packaging, we will look for a file called C<extensions.txt> in the
48 :     root directory of the tree. This file will have a list of binary file extensions,
49 :     one per line. The <scan> command can be used to create this file. If the file
50 :     is not found when packaging, it is presumed all files are text files.
51 :    
52 :     When packaging, line-end characters inside text files are converted to a single
53 :     line feed (C<\n>). When unpackaging, the line feeds are converted to the line-ending
54 :     character of the current platform.
55 :    
56 :     =head2 Temporary Files
57 :    
58 :     Temporary files in the directory tree are skipped when scanning or packaging. A file
59 :     is considered temporary if its name ends in a tilde (C<~>) or pound sign C<#>,
60 : parrello 1.2 or it has an extension of C<.bak> or C<.tgz>. In addition, directories with names
61 :     ending in C<.old> will be skipped.
62 : parrello 1.1
63 :     =cut
64 :    
65 :     use strict;
66 :     use Cwd 'abs_path';
67 :     use File::Copy;
68 :     use File::Path;
69 :     use Fcntl qw(:DEFAULT :seek);
70 :     use POSIX;
71 :     use Compress::Zlib;
72 :     use Digest::MD5;
73 :    
74 :     =head2 Constants
75 :    
76 :     =head3 CHUNK_SIZE
77 :    
78 :     Number of bytes to use when reading a survey chunk during file scans.
79 :     We take three chunks from the file of this size. Any binary character
80 :     (ASCII code > 127) indicates a binary file. A binary file is also
81 :     presumed if the number of non-whitespace control characters is above
82 :     a certain percentage.
83 :    
84 :     =cut
85 :     my $CHUNK_SIZE = 1024;
86 :    
87 :     =head3 SURVEY_SIZE
88 :    
89 :     Number of bytes in a file survey. A file survey consists of three chunks.
90 :    
91 :     =cut
92 :    
93 :     my $SURVEY_SIZE = $CHUNK_SIZE * 3;
94 :    
95 :     =head3 NON_WHITE
96 :    
97 :     Fraction of the non-white control characters in the survey data that
98 :     would classify the file as binary.
99 :    
100 :     =cut
101 :    
102 :     my $NON_WHITE = 0.25;
103 :    
104 :     =head3 MAX_PACKAGE_SIZE
105 :    
106 :     Maximum legal size for a package file.
107 :    
108 :     =cut
109 :    
110 :     my $MAX_PACKAGE_SIZE = 0x7FFFFFFF;
111 :    
112 :     =head3 NL_LEN
113 :    
114 :     Number of characters in a line-end sequence.
115 :    
116 :     =cut
117 :    
118 :     my $NL_LEN = length($/);
119 :    
120 :     =head3 PACK_BLOCK
121 :    
122 :     Number of characters in a package block.
123 :    
124 :     =cut
125 :    
126 :     my $PACK_BLOCK = 4096;
127 :    
128 :     # OPEN CODE
129 :    
130 :     # Get the command, the file directory name, and the package file name.
131 :     my ($command, $treeDirectory, $packageFile) = @ARGV;
132 :     # Get the absolute path for the directory tree.
133 :     my $absDirectory = abs_path($treeDirectory);
134 :     # Trim off any trailing slash.
135 :     if ($absDirectory =~ m!/$!) {
136 :     chop $absDirectory;
137 :     }
138 :     # Process the command.
139 :     $command = lc $command;
140 :     if ($command eq 'scan') {
141 :     ScanTree($absDirectory);
142 :     } elsif ($command eq 'pack') {
143 :     PackTree($absDirectory, $packageFile);
144 :     } elsif ($command eq 'unpack') {
145 :     UnPackTree($absDirectory, $packageFile);
146 :     } else {
147 :     print "Invalid command \"$command\".\n";
148 :     }
149 :     print "Processing complete.\n";
150 :    
151 :     =head2 Methods
152 :    
153 :     =head3 ScanTree
154 :    
155 :     C<< ScanTree($directory); >>
156 :    
157 :     Scan all files in the specified directory and create an C<extensions.txt> file
158 :     listing the extensions of files that are binary.
159 :    
160 :     =cut
161 :    
162 :     sub ScanTree {
163 :     my ($directory) = @_;
164 :     # Create a hash of file extensions. The first time a file with a particular
165 :     # extension is found, it is scanned for binary characters. If such characters
166 :     # are found, the file is added to the hash with a value of "b"; otherwise it is
167 :     # added with a value of "t".
168 :     my %extensions = ();
169 :     # Create a directory object.
170 :     my $dirObject = OpenDirectory($directory);
171 :     # Loop through the files in the tree, scanning each one.
172 :     while (my $fileName = GetFileName($dirObject)) {
173 :     # Get the file name relative to the root and the file extension.
174 :     my $relName = $dirObject->{relName};
175 :     my $suffix = $dirObject->{suffix};
176 :     # Only proceed if this is a new file type.
177 :     if (exists $extensions{$suffix}) {
178 :     print "Skipping $relName.\n";
179 :     } else {
180 :     # Get the file size.
181 :     my $size = -s $fileName;
182 :     # If the file is empty, skip it.
183 :     if ($size == 0) {
184 :     print "File $relName is empty: ignored.\n";
185 :     } else {
186 :     # Here the file is worth scanning.
187 :     print "Scanning $relName($size) for type of \"$suffix\"... ";
188 :     # Determine the type of the file.
189 :     my $type = ScanFile($fileName, $size);
190 :     print "$type\n";
191 :     # Add the extension to the hash.
192 :     $extensions{$suffix} = substr($type, 0, 1);
193 :     }
194 :     }
195 :     }
196 :     # Now we write the extension file.
197 :     print "Creating extensions file.\n";
198 :     (open EXTFILE, ">$directory/extensions.txt") ||
199 :     die "Could not open extensions file: $!";
200 :     my $count = 0;
201 :     for my $ext (keys %extensions) {
202 :     if ($extensions{$ext} eq "b") {
203 :     (print EXTFILE "$ext\n") ||
204 :     die "Error writing extensions file: $!";
205 :     $count++;
206 :     }
207 :     }
208 :     close EXTFILE;
209 :     print "$count binary extensions found.\n";
210 :     print InSummary($dirObject);
211 :     }
212 :    
213 :     =head3 ScanFile
214 :    
215 :     C<< my $type = ScanFile($fileName, $size); >>
216 :    
217 :     Survey the specified file of the specified size to determine whether it is
218 :     text or binary. A file is binary if it has a lot of control characters or
219 :     any character whose ASCII value is greater than 127.
220 :    
221 :     =cut
222 :    
223 :     sub ScanFile {
224 :     my ($fileName, $size) = @_;
225 :     # Open the file for binary input.
226 :     (sysopen INFILE, $fileName, O_RDONLY + O_BINARY) ||
227 :     die "Could not scan $fileName: $!";
228 :     # Survey the file. We take chunks from the beginning, end, and middle.
229 :     # if the file has fewer than three chunks of characters, we read the
230 :     # whole thing.
231 :     my $survey = "";
232 :     if ($size <= $SURVEY_SIZE) {
233 :     (sysread INFILE, $survey, $SURVEY_SIZE) ||
234 :     die "Error reading $fileName: $!";
235 :     } else {
236 :     (sysread INFILE, $survey, $CHUNK_SIZE) ||
237 :     die "Error reading start of $fileName: $!";
238 :     my $position = int($size - $CHUNK_SIZE / 2);
239 :     (sysseek INFILE, $position, SEEK_SET) ||
240 :     die "Error moving to middle of $fileName: $!";
241 :     (sysread INFILE, $survey, $CHUNK_SIZE, $CHUNK_SIZE) ||
242 :     die "Error reading middle of $fileName: $!";
243 :     (sysseek INFILE, -$CHUNK_SIZE, SEEK_END) ||
244 :     die "Error moving to end of $fileName: $!";
245 :     (sysread INFILE, $survey, 2*$CHUNK_SIZE, $CHUNK_SIZE) ||
246 :     die "Error reading end of $fileName: $!";
247 :     }
248 :     close INFILE;
249 :     my $surveyLen = length $survey;
250 :     # Now $survey contains a bunch of bytes from the file. We run through them
251 :     # counting non-white control characters and looking for binary characters.
252 :     # First, we assume the file is text until we learn otherwise.
253 :     my $retVal = "text";
254 :     # This variable will contain the number of non-white control characters
255 :     # required to prove the file is binary.
256 :     my $controls = ceil($NON_WHITE * $surveyLen);
257 :     # Here we loop through the survey characters.
258 :     while ($retVal eq "text" && (my $chr = chop $survey)) {
259 :     # Process according to the character type.
260 :     if ($chr gt chr(127)) {
261 :     # Here we have a true binary character.
262 :     $retVal = "binary";
263 :     } elsif ($chr lt " " && $chr !~ /\s/) {
264 :     # Here we have a non-white control character.
265 :     $controls--;
266 :     if (! $controls) { $retVal = "binary"; }
267 :     }
268 :     }
269 :     # Return the file type.
270 :     return $retVal;
271 :     }
272 :    
273 :     =head3 OpenDirectory
274 :    
275 :     C<< my $dirObject = OpenDirectory($directoryName); >>
276 :    
277 :     This method returns an object that can be used to find all the files in a directory
278 :     tree. The incoming parameter is the directory tree name, properly cleaned and with
279 :     the trailing slash removed.
280 :    
281 :     =cut
282 :    
283 :     sub OpenDirectory {
284 :     my ($directoryName) = @_;
285 :     return {
286 :     # The root name length enables us to separate the relative file name from
287 :     # the absolute file name.
288 :     rootLen => length($directoryName) + 1,
289 :     # This is a stack of the files and directories still to be processed.
290 :     stack => [$directoryName],
291 :     # These are counters for various file types.
292 :     tempCount => 0,
293 :     dirCount => 0,
294 : parrello 1.2 dirSkip => 0,
295 : parrello 1.1 foundCount => 0
296 :     };
297 :     }
298 :    
299 :     =head3 GetFileName
300 :    
301 :     C<< my $fileName = GetFileName($dirObject); >>
302 :    
303 :     This method returns the next file name for a directory tree. The incoming parameter is
304 :     a directory object returned by OpenDirectory. If all the files have been processed, the
305 :     method returns an undefined value.
306 :    
307 :     =cut
308 :    
309 :     sub GetFileName {
310 :     my ($dirObject) = @_;
311 :     # Loop until we find a file name or empty the stack.
312 :     my $retVal;
313 :     my $done = 0;
314 :     while (! $done) {
315 :     # Get the next file name.
316 :     my $nextName = pop @{$dirObject->{stack}};
317 :     if (! $nextName) {
318 :     # The stack is empty, so we're done.
319 :     $done = 1;
320 :     } elsif (! -d $nextName) {
321 :     # Here we've found a real file. Now we need to check for a temporary file name.
322 :     if ($nextName =~ /(~|#|\.bak|\.tgz)$/) {
323 :     # We have a temporary file name, so we count it and keep going.
324 : parrello 1.2 $dirObject->{dirSkip}++;
325 : parrello 1.1 } else {
326 :     # Here we have a non-temporary name, so we're done.
327 :     $done = 1;
328 :     $retVal = $nextName;
329 :     $dirObject->{foundCount}++;
330 :     # Compute the file's relative name and extension, then stuff them in
331 :     # the object.
332 :     $dirObject->{relName} = substr $nextName, $dirObject->{rootLen};
333 :     my ($path, $suffix) = NameParse($nextName);
334 :     $dirObject->{suffix} = lc $suffix;
335 :     }
336 : parrello 1.2 } elsif ($nextName =~ /\.old$/) {
337 :     # Here we have a backup directory.
338 :     $dirObject->{dirSkip}++;
339 : parrello 1.1 } else {
340 :     # Here we have a directory. We need to push all its members on the stack.
341 :     # Note that file names beginning with a period are ignored.
342 :     opendir NEXTDIR, $nextName;
343 :     my @files = grep { $_ =~ /^[^.]/ } readdir NEXTDIR;
344 :     closedir NEXTDIR;
345 :     push @{$dirObject->{stack}}, map { "$nextName/$_" } @files;
346 :     $dirObject->{dirCount}++;
347 :     }
348 :     }
349 :     return $retVal;
350 :     }
351 :    
352 :     =head3 InSummary
353 :    
354 :     C<< print InSummary($dirObject); >>
355 :    
356 :     Return a summary of the files processed by a directory object.
357 :    
358 :     =cut
359 :    
360 :     sub InSummary {
361 :     my ($dirObject) = @_;
362 :     my $dirCount = $dirObject->{dirCount};
363 :     my $tempCount = $dirObject->{tempCount};
364 :     my $foundCount = $dirObject->{foundCount};
365 : parrello 1.2 my $dirSkip = $dirObject->{dirSkip};
366 :     return "$foundCount files found, $tempCount temporary files ignored, $dirCount directories processed, $dirSkip directories skipped.\n";
367 : parrello 1.1 }
368 :    
369 :     =head3 NameParse
370 :    
371 :     C<< my ($path, $suffix) = NameParse($fileName); >>
372 :    
373 :     Separate the directory path and suffix out of a file name. Note that if the suffix
374 :     is entirely numeric, we return a suffix of "#".
375 :    
376 :     =cut
377 :    
378 :     sub NameParse {
379 :     my ($fileName) = @_;
380 :     # Split the file into path pieces.
381 :     my @pieces = split /\//, $fileName;
382 :     # Peel off the last piece.
383 :     my $baseName = pop @pieces;
384 :     # Form the path from everything remaining.
385 :     my $path = join "/", @pieces;
386 :     # Split the extension off the base name.
387 :     my $suffix = '';
388 :     if ($baseName =~ /\.\d+$/) {
389 :     $suffix = '#';
390 :     } elsif ($baseName =~ /\.([^.]+)$/) {
391 :     $suffix = $1;
392 :     }
393 :     # Return the results.
394 :     return ($path, $suffix);
395 :     }
396 :    
397 :     =head3 StartFile
398 :    
399 :     C<< StartFile($mode, $fileName); >>
400 :    
401 :     Open the specified file for compression input. The file handle used is CLEARFILE.
402 :    
403 :     =cut
404 :    
405 :     sub StartFile {
406 :     my ($mode, $fileName) = @_;
407 :     # Determine the type of file.
408 :     if ($mode eq 't') {
409 :     # Here we have a text file.
410 :     (open CLEARFILE, "<$fileName") ||
411 :     die "Could not open text input file $fileName: $!";
412 :     } else {
413 :     # Here we have a binary file.
414 :     (sysopen CLEARFILE, $fileName, O_RDONLY | O_BINARY) ||
415 :     die "Could not open binary input file $fileName: $!";
416 :     }
417 :     }
418 :    
419 :     =head3 GetCharacters
420 :    
421 :     C<< my $characters = GetCharacters($mode); >>
422 :    
423 :     Get some characters to compress from the current input file. If we have reached
424 :     end-of-file, returns an empty list.
425 :    
426 :     =cut
427 :    
428 :     sub GetCharacters {
429 :     my ($mode) = @_;
430 :     # Declare the return variable.
431 :     my $retVal;
432 :     # Determine the type of file.
433 :     if ($mode eq 't') {
434 :     # Here we have a text file. We read a line of text. Since it's open as
435 :     # a text file, the line-end will be translated for us.
436 :     $retVal = <CLEARFILE>;
437 :     if (! defined $retVal) {
438 :     # Here we either have end-of-file or a file error.
439 :     if ($?) {
440 :     die "Text input error: $!";
441 :     } else {
442 :     # If it's end-of-file, we close the file.
443 :     close CLEARFILE;
444 :     }
445 :     }
446 :     } else {
447 :     # Here we have a binary file. We read a fixed-length text chunk.
448 :     my $textLine;
449 :     my $result = sysread CLEARFILE, $textLine, $CHUNK_SIZE;
450 :     if (! defined $result) {
451 :     # Here we have a file error.
452 :     die "Binary input error: $!";
453 :     } elsif ($result == 0) {
454 :     # Here we have end-of-file, so we close the file.
455 :     close CLEARFILE;
456 :     } else {
457 :     # Here we have data to return.
458 :     $retVal = $textLine;
459 :     }
460 :     }
461 :     return $retVal;
462 :     }
463 :    
464 :     =head3 GetExtensions
465 :    
466 :     C<< my %extHash = GetExtensions($directory); >>
467 :    
468 :     Create a hash containing the extensions of the binary files in the specified
469 :     directory tree.
470 :    
471 :     =cut
472 :    
473 :     sub GetExtensions {
474 :     my ($directory) = @_;
475 :     # Declare the return variable.
476 :     my %retVal = ();
477 :     # Check for the extensions file.
478 :     my $fileName = "$directory/extensions.txt";
479 :     if (! -e $fileName) {
480 :     print "No extensions file found. All files are treated as text.\n";
481 :     } else {
482 :     # The file exists, so we read it into the hash.
483 :     (open EXTFILE, "<$fileName") ||
484 :     die "Could not open extensions file: $!";
485 :     while (my $line = <EXTFILE>) {
486 :     chomp $line;
487 :     $retVal{$line} = 'b';
488 :     }
489 :     }
490 :     # Return the hash.
491 :     return %retVal;
492 :     }
493 :    
494 :     =head3 StartPackaging
495 :    
496 :     C<< my $packObject = StartPackaging($packageFileName); >>
497 :    
498 :     Prepare to create the package file.
499 :    
500 :     =cut
501 :    
502 :     sub StartPackaging {
503 :     my ($packageFileName) = @_;
504 :     # Open the package file.
505 :     (sysopen PACKFILE, "$packageFileName", O_WRONLY | O_CREAT | O_TRUNC | O_BINARY) ||
506 :     die "Could not open package file $packageFileName: $!";
507 :     # Return an object telling us how we're doing.
508 :     return {
509 :     packName => $packageFileName,
510 :     fileSize => 0,
511 :     buffer => '',
512 :     bufferSize => 0,
513 :     outCount => 0
514 :     };
515 :     }
516 :    
517 :     =head3 WritePackageLine
518 :    
519 :     C<< WritePackageLine($packObject, $line); >>
520 :    
521 :     Write a line of text data to the package file. A text line is preceded by a space character
522 :     and is terminated by a line feed character.
523 :    
524 :     =cut
525 :    
526 :     sub WritePackageLine {
527 :     my ($packObject, $line) = @_;
528 :     # Write the line to the file.
529 :     (print PACKFILE " $line\n") ||
530 :     die "Error writing to package file: $!";
531 :     # Update the file size.
532 :     $packObject->{fileSize} += length($line) + 2;
533 :     }
534 :    
535 :     =head3 FinishPackaging
536 :    
537 :     C<< FinishPackaging($packObject); >>
538 :    
539 :     Finish the packaging process. This basically closes the package file.
540 :    
541 :     =cut
542 :    
543 :     sub FinishPackaging {
544 :     close PACKFILE;
545 :     }
546 :    
547 :     =head3 StartCompressedData
548 :    
549 :     C<< StartCompressedData($packObject, $compTable); >>
550 :    
551 :     Initialize for writing compressed file data. Compressed file data is output in fixed-
552 :     length blocks preceded by a C<b> character. (This distinguishes them from text lines,
553 :     which begin with a space.) The last block may be short. It is preceded by an C<e>
554 :     character and the next four bytes contain the length in decimal. (So, for example,
555 :     C<e0012> would indicate a 12-byte final block.)
556 :    
557 :     =cut
558 :    
559 :     sub StartCompressedData {
560 :     my ($packObject, $compTable) = @_;
561 :     # Initialize the buffer data and the output count.
562 :     $packObject->{buffer} = '';
563 :     $packObject->{bufferLen} = 0;
564 :     $packObject->{outCount} = 0;
565 :     # Remember the compression table.
566 :     $packObject->{compTable} = $compTable;
567 :     # Create the checksum.
568 :     $packObject->{digest} = Digest::MD5->new();
569 :     }
570 :    
571 :     =head3 WriteCompressedData
572 :    
573 :     C<< WriteCompressedData($packObject, $data, $status); >>
574 :    
575 :     Add a block of data to the compressed output. The data is accumulated until we fill
576 :     an entire block, at which time it is written to the file with the appropriate marker.
577 :    
578 :     =cut
579 :    
580 :     sub WriteCompressedData {
581 :     my ($packObject, $data, $status) = @_;
582 :     # Check the status code to see if there's an error.
583 :     if ($status != Z_OK) {
584 :     die "Compression error: " . $packObject->{comptable}->msg();
585 :     } else {
586 :     # Get the amount of data we have.
587 :     my $len = length($data);
588 :     # Find out how much room is in the buffer.
589 :     my $room = $PACK_BLOCK - $packObject->{bufferLen};
590 :     # Now we put the data into blocks. At all times, $residual
591 :     # will be the data not transmitted, $room the room left in
592 :     # the block, and $len the length of the residual.
593 :     my $residual = $data;
594 :     while ($room < $len) {
595 :     # Put as much data into the buffer as will fit.
596 :     $packObject->{buffer} .= substr($residual, 0, $room);
597 :     # Remove the data from the residual.
598 :     $residual = substr($residual, $room);
599 :     $len -= $room;
600 :     # Write the buffer.
601 :     print PACKFILE "b" . $packObject->{buffer};
602 :     $packObject->{outCount} += $PACK_BLOCK + 1;
603 :     # Denote that the buffer is now empty.
604 :     $packObject->{bufferLen} = 0;
605 :     $packObject->{buffer} = '';
606 :     $room = $PACK_BLOCK;
607 :     }
608 :     # Put the remaining text into the buffer.
609 :     $packObject->{buffer} .= $residual;
610 :     $packObject->{bufferLen} += $len;
611 :     }
612 :     }
613 :    
614 :     =head3 WriteEndMark
615 :    
616 :     C<< my $outCount = WriteEndMark($packObject); >>
617 :    
618 :     Write out the last block of a compressed data stream and return the total number of bytes
619 :     in the stream.
620 :    
621 :     =cut
622 :    
623 :     sub WriteEndMark {
624 :     my ($packObject) = @_;
625 :     # Get the number of bytes in the buffer.
626 :     my $len = $packObject->{bufferLen};
627 :     # Format the prefix.
628 :     my $prefix = "$len";
629 :     while (length($prefix) < 4) {
630 :     $prefix = "0$prefix";
631 :     }
632 :     # Insert the checksum.
633 :     $prefix .= $packObject->{digest}->hexdigest;
634 :     # Write the prefix and the buffer content.
635 :     print PACKFILE "e$prefix" . $packObject->{buffer};
636 :     # Compute the total length output.
637 :     my $retVal = $len + 37 + $packObject->{outCount};
638 :     # Add it to the package output counter.
639 :     $packObject->{fileSize} += $retVal;
640 :     # Return the total length output.
641 :     return $retVal;
642 :     }
643 :    
644 :     =head3 PackTree
645 :    
646 :     C<< PackTree($directory, $packageFileName); >>
647 :    
648 :     Package all files in a directory tree into a package file.
649 :    
650 :     The C<extensions.txt> file in the root of the directory tree is used to determine
651 :     which file extensions indicate binary files.
652 :    
653 :     =cut
654 :    
655 :     sub PackTree {
656 :     my ($directory, $packageFileName) = @_;
657 :     # Get the extensions hash. This is used to separate text files
658 :     # from binary files.
659 :     my %extHash = GetExtensions($directory);
660 :     # Set up to output to the package files.
661 :     my $packObject = StartPackaging($packageFileName);
662 :     # Create a directory object.
663 :     my $dirObject = OpenDirectory($directory);
664 :     # Create file type counters.
665 :     my %fileCounters = ( t => 0, b => 0 );
666 :     # Loop through the files in the tree, packaging each one.
667 :     while (my $fileName = GetFileName($dirObject)) {
668 :     # Decide whether this file is text or binary. We begin by separating
669 :     # the file name into its path and extension.
670 :     my ($path, $suffix) = NameParse($fileName);
671 :     # Check the extension hash for the suffix.
672 :     my $mode = (exists $extHash{$suffix} ? "b" : "t");
673 :     # Output the relative file name.
674 :     my $relName = $dirObject->{relName};
675 :     WritePackageLine($packObject, "$mode $relName");
676 :     # Update the appropriate file counter.
677 :     $fileCounters{$mode}++;
678 :     # Now we compress the file.
679 :     PackFile($fileName, $packObject, $mode);
680 :     }
681 :     # Close the package output stream.
682 :     FinishPackaging($packObject);
683 :     # Tell the user how much package file we have.
684 :     print $packObject->{fileSize} . " bytes written to package file.\n";
685 :     # Display the text/binary split.
686 :     print "$fileCounters{t} text files, $fileCounters{b} binary files compressed.\n";
687 :     # Write the summary of files processed.
688 :     print InSummary($dirObject);
689 :     }
690 :    
691 :     =head3 PackFile
692 :    
693 :     C<< PackFile($fileName, $packObject, $mode); >>
694 :    
695 :     Compress the specified file to the specified package file in the specified mode.
696 :    
697 :     =cut
698 :    
699 :     sub PackFile {
700 :     my ($fileName, $packObject, $mode) = @_;
701 :     print "Packaging $fileName ($mode).\n";
702 :     # Create a compression object.
703 :     my $compTable = deflateInit();
704 :     # Initialize for reading the input file.
705 :     StartFile($mode, $fileName);
706 :     # Initialize for writing the output.
707 :     StartCompressedData($packObject, $compTable);
708 :     # Loop through the input.
709 :     while (my $chars = GetCharacters($mode)) {
710 :     # Add the data to the checksum.
711 :     $packObject->{digest}->add($chars);
712 :     # Compress this data.
713 :     my ($output, $status) = $compTable->deflate($chars);
714 :     # Write it to the output.
715 :     WriteCompressedData($packObject, $output, $status);
716 :     }
717 :     # Flush out any remaining data.
718 :     my ($output, $status) = $compTable->flush();
719 :     WriteCompressedData($packObject, $output, $status);
720 :     # Terminate the file.
721 :     my $outCount = WriteEndMark($packObject);
722 :     # Display the compression ratio.
723 :     my $fileSize = $compTable->total_in();
724 :     my $ratio = ($fileSize ? int($outCount * 100 / $fileSize + 0.5) : 0);
725 :     print "$fileSize characters in, $outCount characters out ($ratio%)\n";
726 :     }
727 :    
728 :     =head3 UnPackTree
729 :    
730 :     C<< UnPackTree($directory, $packageFile); >>
731 :    
732 :     Read file data from a package file and put it into a directory tree.
733 :    
734 :     The package file consists of a series of compressed files. The package file contains
735 :     three types of records. A text record begins with a space and is terminated by a
736 :     new-line character (C<\n>). A block record has a fixed length and begins with a C<b>.
737 :     An end record begins with an C<e> and a four-digit length, followed by data of that
738 :     length. A file contains a text record with the file type (C<t> for text and C<b> for
739 :     binary) and its name relative to the root of the tree, zero or more block records, and
740 :     one end record.
741 :    
742 :     To prevent file system compatibility problems, the name is automatically cleaned
743 :     before the file is created: all spaces and colons are converted to underscores.
744 :    
745 :     Text files are open in text mode and binary files in binary mode. As a result, the
746 :     line-end characters in a text file are automatically translated to the line-end
747 :     character of the target operating system.
748 :    
749 :     =cut
750 :    
751 :     sub UnPackTree {
752 :     my ($directory, $packageFile) = @_;
753 :     # Open the package file for input.
754 :     my $packObject = StartUnPacking($packageFile);
755 :     # Create some file counters.
756 :     my %fileCounters = ( b => 0, t => 0 );
757 :     # Loop through the package one file at a time.
758 :     while (my $fileData = GetTextRecord($packObject)) {
759 :     # Extract the file name and mode.
760 :     my $mode = substr($fileData,0,1);
761 :     my $fileName = substr($fileData,2);
762 :     print "Unpacking $fileName ($mode).\n";
763 :     # Create the decompression object.
764 :     my $compObject = inflateInit();
765 :     # Create the output file.
766 :     my $fileObject = CreateFile($fileName, $mode, $directory, $compObject);
767 :     # Loop through the package file, de-compressing the data.
768 :     while (my $block = GetBlock($packObject)) {
769 :     # Decompress the current block.
770 :     my ($data, $status) = $compObject->inflate($block);
771 :     # Write the decompressed data.
772 :     WriteData($fileObject, $data, $status);
773 :     }
774 :     # Verify the checksums.
775 :     my ($expected, $found) = ($packObject->{digest}, $fileObject->{digest}->hexdigest);
776 :     if ($expected ne $found) {
777 :     # Here we have a mismatch.
778 :     die "Invalid checksum found unpacking file $fileName: expected $expected, found $found.";
779 :     }
780 :     # Count this file.
781 :     $fileCounters{$mode}++;
782 :     # Close it.
783 :     close CLEARFILE;
784 :     }
785 :     # Close the package file.
786 :     close PACKFILE;
787 :     # Tell the user what we did.
788 :     print "$fileCounters{t} text files unpacked, $fileCounters{b} binary files unpacked.\n";
789 :     print $packObject->{inCounter} . " package file bytes read.\n";
790 :     }
791 :    
792 :     =head3 StartUnPacking
793 :    
794 :     C<< my $packObject = StartUnPacking($packageFile); >>
795 :    
796 :     Prepare to read from a package file.
797 :    
798 :     =cut
799 :    
800 :     sub StartUnPacking {
801 :     my ($packageFile) = @_;
802 :     # Open the package file for binary input.
803 :     (sysopen PACKFILE, $packageFile, O_RDONLY | O_BINARY) ||
804 :     die "Could not open $packageFile: $!";
805 :     # Return the package file input object.
806 :     return {
807 :     blockEnd => 0,
808 :     inCounter => 0
809 :     }
810 :     }
811 :    
812 :     =head3 GetTextRecord
813 :    
814 :     C<< my $data = GetTextRecord($packObject); >>
815 :    
816 :     Read a text record from the package file. The text record begins with a space and
817 :     ends with a new-line character.
818 :    
819 :     =cut
820 :    
821 :     sub GetTextRecord {
822 :     my ($packObject) = @_;
823 :     # Declare the return variable.
824 :     my $retVal;
825 :     # Read the next character.
826 :     my $buffer;
827 :     my $rv = read PACKFILE, $buffer, 1;
828 :     # Check to insure we read a space.
829 :     if (! defined $rv) {
830 :     # Here the read failed.
831 :     die "Error reading package file: $!";
832 :     } elsif ($rv == 0) {
833 :     # Here we've reached end-of-file. We'll return an undefined value
834 :     # to the caller.
835 :     } elsif ($buffer ne " ") {
836 :     # Here we didn't find a space: the file is corrupt.
837 :     die "Text record not found when expected in package file.";
838 :     } else {
839 :     # Here we have a real, live text record. We loop until we find a new-line.
840 :     $retVal = "";
841 :     $rv = read PACKFILE, $buffer, 1;
842 :     while (defined $rv && $rv == 1 && $buffer ne "\n") {
843 :     $retVal .= $buffer;
844 :     $rv = read PACKFILE, $buffer, 1;
845 :     }
846 :     # Check for error conditions.
847 :     if (! defined $rv) {
848 :     die "Error reading package file: $!";
849 :     } elsif ($rv == 0) {
850 :     die "End-of-file inside text record in package file.";
851 :     }
852 :     # Update the input counter.
853 :     $packObject->{inCounter} += length($retVal) + 2;
854 :     # Denote we haven't read an end block.
855 :     $packObject->{blockEnd} = 0;
856 :     }
857 :     return $retVal;
858 :     }
859 :    
860 :     =head3 GetBlock
861 :    
862 :     C<< my $block = GetBlock($packObject); >>
863 :    
864 :     Read a block of compressed data from the package file. If the end of the compressed data
865 :     has been reached, will return an undefined value.
866 :    
867 :     =cut
868 :    
869 :     sub GetBlock {
870 :     my ($packObject) = @_;
871 :     # Declare the return value.
872 :     my $retVal;
873 :     # Only proceed if we have NOT reached the end block.
874 :     if (! $packObject->{blockEnd}) {
875 :     # Read the indicator character.
876 :     my $buffer;
877 :     my $rv = read PACKFILE, $buffer, 1;
878 :     # Check for errors.
879 :     if (! defined $rv) {
880 :     die "Error reading package block mark: $!";
881 :     } elsif ($rv == 0) {
882 :     die "End-of-file read where block mark expected in package file.";
883 :     } elsif ($buffer eq 'b') {
884 :     # Here we have a normal block. We read it directly into the
885 :     # return variable.
886 :     $rv = read PACKFILE, $retVal, $PACK_BLOCK;
887 :     # Check for errors.
888 :     if (! defined $rv) {
889 :     die "Error reading package block data: $!";
890 :     } elsif ($rv < $PACK_BLOCK) {
891 :     die "Unexpected end-of-file inside package file block.";
892 :     } else {
893 :     # Record the bytes read.
894 :     $packObject->{inCounter} += $PACK_BLOCK + 1;
895 :     }
896 :     } elsif ($buffer eq 'e') {
897 :     # Here we have an end block. We need to read the block length
898 :     # and the checksum.
899 :     $rv = read PACKFILE, $buffer, 36;
900 :     # Check for errors.
901 :     if (! defined $rv) {
902 :     die "Error reading end block length: $!";
903 :     } elsif ($rv < 36) {
904 :     die "Unexpected end-of-file reading end block length and checksum.";
905 :     } else {
906 :     # Get the length and the checksum.
907 :     my $lengthCode = substr $buffer, 0, 4;
908 :     $packObject->{digest} = substr $buffer, 4, 32;
909 :     # Check for an empty block.
910 :     if ($lengthCode == 0) {
911 :     # Here the end block is empty. We will return an undefined value,
912 :     # indicating end-of-file. Record the 36 bytes read.
913 :     $packObject->{inCounter} += 36;
914 :     } else {
915 :     # Here we have a valid block length, so we read the block.
916 :     $rv = read PACKFILE, $retVal, $lengthCode;
917 :     # Check for errors.
918 :     if (! defined $rv) {
919 :     die "Error reading end block: $!";
920 :     } elsif ($rv < $lengthCode) {
921 :     die "Unexpected end-of-file inside end block.";
922 :     } else {
923 :     # Denote the next read will return end-of-file.
924 :     $packObject->{blockEnd} = 1;
925 :     # Record the bytes read.
926 :     $packObject->{inCounter} += $lengthCode + 37;
927 :     }
928 :     }
929 :     }
930 :     } else {
931 :     # Here we have an invalid block mark.
932 :     die "Invalid block mark read in package file.";
933 :     }
934 :     }
935 :     return $retVal;
936 :     }
937 :    
938 :     =head3 CreateFile
939 :    
940 :     C<< my $fileObject = CreateFile($fileName, $mode, $directory, $compObject); >>
941 :    
942 :     Create a file into which packaged data can be unpacked.
943 :    
944 :     =cut
945 :    
946 :     sub CreateFile {
947 :     my ($fileName, $mode, $directory, $compObject) = @_;
948 :     # Clear the file name.
949 :     $fileName =~ tr/: /__/;
950 :     # Add the file name to the directory to get the fully-qualified file name.
951 :     my $fullName = "$directory/$fileName";
952 :     # Extract the path.
953 :     my ($path, $suffix) = NameParse($fullName);
954 :     # Insure the path exists.
955 :     if (! -d $path) {
956 :     mkpath($path);
957 :     }
958 :     # Check for the directory problem.
959 :     if (-d $fullName) {
960 :     # Here a directory has the same name as the file. We append a suffix
961 :     # to work around the problem.
962 :     $fullName .= ".$mode";
963 :     }
964 :     # Open the file in the appropriate mode.
965 :     if ($mode eq 't') {
966 :     (open CLEARFILE, ">$fullName") ||
967 :     die "Could not open text file $fullName: $!";
968 :     } else {
969 :     (sysopen CLEARFILE, $fullName, O_WRONLY | O_CREAT | O_BINARY) ||
970 :     die "Could not open binary file $fullName: $!";
971 :     }
972 :     # Create a file manipulation object to return.
973 :     return {
974 :     mode => $mode,
975 :     compObject => $compObject,
976 :     digest => Digest::MD5->new()
977 :     };
978 :     }
979 :    
980 :     =head3 WriteData
981 :    
982 :     C<< WriteData($fileObject, $data, $status); >>
983 :    
984 :     Write a block of data to the output file.
985 :    
986 :     =cut
987 :    
988 :     sub WriteData {
989 :     my ($fileObject, $data, $status) = @_;
990 :     # Check the status code from the compressor.
991 :     if ($status != Z_OK && $status != Z_STREAM_END) {
992 :     die "Error in decompression: " . $fileObject->{compObject}->msg();
993 :     } else {
994 :     # Here we're okay, so we can write the data.
995 :     print CLEARFILE $data;
996 :     # Add it to the checksum.
997 :     $fileObject->{digest}->add($data);
998 :     }
999 :     }
1000 :    
1001 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3