[Bio] / FigKernelScripts / Packager.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/Packager.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 Packager
4 :    
5 :     Package all the files in a directory tree for transport or recreate the directory
6 :     tree from the packaged files.
7 :    
8 :     This method takes as input three positional parameters: a command, the name of the
9 :     root for the directory tree and the name of the package files. If the command is
10 :     C<pack>, the files are packaged into one or more text files. If the command is
11 :     C<unpack>, the files are unpackaged from the text files. If the command is
12 :     C<scan>, the files are examined to determine the file types.
13 :    
14 :     File and directory names should be specified using Unix conventions, with a
15 :     forward slash (C</>) instead of a backslash (C<\>).
16 :    
17 :     File names ending in a tilde (C<~>) or C<.bak> will be ignored when scanning or
18 :     packaging. These files are presumed to be temporary backups.
19 :    
20 :     The process of packaging is straightforward. We write out the name of the input file,
21 :     its type (text or binary), the file data itself, and an MD5 digest. Each file is
22 :     compressed using the PERL C<Compress::Zlib> libary.
23 :    
24 :     This is by no means the optimal way to package files, but it is simple to do and
25 :     can be customized for this project's special problems (such as incompatible file
26 :     names).
27 :    
28 :     It is important that the packager understand the difference between text files
29 :     and binary files. The line end characters in text files need to be translated
30 :     to the target platform's characters, while binary files must be moved without
31 :     translation. The file's extension is used to determine whether it is binary or
32 :     text. When packaging, we will look for a file called C<extensions.txt> in the
33 :     root directory of the tree. This file will have a list of binary file extensions,
34 :     one per line. The <scan> command can be used to create this file. If the file
35 :     is not found when packaging, it is presumed all files are text files.
36 :    
37 :     When packaging, line-end characters inside text files are converted to a single
38 :     line feed (C<\n>). When unpackaging, the line feeds are converted to the line-ending
39 :     character of the current platform.
40 :    
41 :     =head2 Temporary Files
42 :    
43 :     Temporary files in the directory tree are skipped when scanning or packaging. A file
44 :     is considered temporary if its name ends in a tilde (C<~>) or pound sign C<#>,
45 : parrello 1.2 or it has an extension of C<.bak> or C<.tgz>. In addition, directories with names
46 :     ending in C<.old> will be skipped.
47 : parrello 1.1
48 :     =cut
49 :    
50 :     use strict;
51 :     use Cwd 'abs_path';
52 :     use File::Copy;
53 :     use File::Path;
54 :     use Fcntl qw(:DEFAULT :seek);
55 :     use POSIX;
56 :     use Compress::Zlib;
57 :     use Digest::MD5;
58 :    
59 :     =head2 Constants
60 :    
61 :     =head3 CHUNK_SIZE
62 :    
63 :     Number of bytes to use when reading a survey chunk during file scans.
64 :     We take three chunks from the file of this size. Any binary character
65 :     (ASCII code > 127) indicates a binary file. A binary file is also
66 :     presumed if the number of non-whitespace control characters is above
67 :     a certain percentage.
68 :    
69 :     =cut
70 :     my $CHUNK_SIZE = 1024;
71 :    
72 :     =head3 SURVEY_SIZE
73 :    
74 :     Number of bytes in a file survey. A file survey consists of three chunks.
75 :    
76 :     =cut
77 :    
78 :     my $SURVEY_SIZE = $CHUNK_SIZE * 3;
79 :    
80 :     =head3 NON_WHITE
81 :    
82 :     Fraction of the non-white control characters in the survey data that
83 :     would classify the file as binary.
84 :    
85 :     =cut
86 :    
87 :     my $NON_WHITE = 0.25;
88 :    
89 :     =head3 MAX_PACKAGE_SIZE
90 :    
91 :     Maximum legal size for a package file.
92 :    
93 :     =cut
94 :    
95 :     my $MAX_PACKAGE_SIZE = 0x7FFFFFFF;
96 :    
97 :     =head3 NL_LEN
98 :    
99 :     Number of characters in a line-end sequence.
100 :    
101 :     =cut
102 :    
103 :     my $NL_LEN = length($/);
104 :    
105 :     =head3 PACK_BLOCK
106 :    
107 :     Number of characters in a package block.
108 :    
109 :     =cut
110 :    
111 :     my $PACK_BLOCK = 4096;
112 :    
113 :     # OPEN CODE
114 :    
115 :     # Get the command, the file directory name, and the package file name.
116 :     my ($command, $treeDirectory, $packageFile) = @ARGV;
117 :     # Get the absolute path for the directory tree.
118 :     my $absDirectory = abs_path($treeDirectory);
119 :     # Trim off any trailing slash.
120 :     if ($absDirectory =~ m!/$!) {
121 :     chop $absDirectory;
122 :     }
123 :     # Process the command.
124 :     $command = lc $command;
125 :     if ($command eq 'scan') {
126 :     ScanTree($absDirectory);
127 :     } elsif ($command eq 'pack') {
128 :     PackTree($absDirectory, $packageFile);
129 :     } elsif ($command eq 'unpack') {
130 :     UnPackTree($absDirectory, $packageFile);
131 :     } else {
132 :     print "Invalid command \"$command\".\n";
133 :     }
134 :     print "Processing complete.\n";
135 :    
136 :     =head2 Methods
137 :    
138 :     =head3 ScanTree
139 :    
140 :     C<< ScanTree($directory); >>
141 :    
142 :     Scan all files in the specified directory and create an C<extensions.txt> file
143 :     listing the extensions of files that are binary.
144 :    
145 :     =cut
146 :    
147 :     sub ScanTree {
148 :     my ($directory) = @_;
149 :     # Create a hash of file extensions. The first time a file with a particular
150 :     # extension is found, it is scanned for binary characters. If such characters
151 :     # are found, the file is added to the hash with a value of "b"; otherwise it is
152 :     # added with a value of "t".
153 :     my %extensions = ();
154 :     # Create a directory object.
155 :     my $dirObject = OpenDirectory($directory);
156 :     # Loop through the files in the tree, scanning each one.
157 :     while (my $fileName = GetFileName($dirObject)) {
158 :     # Get the file name relative to the root and the file extension.
159 :     my $relName = $dirObject->{relName};
160 :     my $suffix = $dirObject->{suffix};
161 :     # Only proceed if this is a new file type.
162 :     if (exists $extensions{$suffix}) {
163 :     print "Skipping $relName.\n";
164 :     } else {
165 :     # Get the file size.
166 :     my $size = -s $fileName;
167 :     # If the file is empty, skip it.
168 :     if ($size == 0) {
169 :     print "File $relName is empty: ignored.\n";
170 :     } else {
171 :     # Here the file is worth scanning.
172 :     print "Scanning $relName($size) for type of \"$suffix\"... ";
173 :     # Determine the type of the file.
174 :     my $type = ScanFile($fileName, $size);
175 :     print "$type\n";
176 :     # Add the extension to the hash.
177 :     $extensions{$suffix} = substr($type, 0, 1);
178 :     }
179 :     }
180 :     }
181 :     # Now we write the extension file.
182 :     print "Creating extensions file.\n";
183 :     (open EXTFILE, ">$directory/extensions.txt") ||
184 :     die "Could not open extensions file: $!";
185 :     my $count = 0;
186 :     for my $ext (keys %extensions) {
187 :     if ($extensions{$ext} eq "b") {
188 :     (print EXTFILE "$ext\n") ||
189 :     die "Error writing extensions file: $!";
190 :     $count++;
191 :     }
192 :     }
193 :     close EXTFILE;
194 :     print "$count binary extensions found.\n";
195 :     print InSummary($dirObject);
196 :     }
197 :    
198 :     =head3 ScanFile
199 :    
200 :     C<< my $type = ScanFile($fileName, $size); >>
201 :    
202 :     Survey the specified file of the specified size to determine whether it is
203 :     text or binary. A file is binary if it has a lot of control characters or
204 :     any character whose ASCII value is greater than 127.
205 :    
206 :     =cut
207 :    
208 :     sub ScanFile {
209 :     my ($fileName, $size) = @_;
210 :     # Open the file for binary input.
211 :     (sysopen INFILE, $fileName, O_RDONLY + O_BINARY) ||
212 :     die "Could not scan $fileName: $!";
213 :     # Survey the file. We take chunks from the beginning, end, and middle.
214 :     # if the file has fewer than three chunks of characters, we read the
215 :     # whole thing.
216 :     my $survey = "";
217 :     if ($size <= $SURVEY_SIZE) {
218 :     (sysread INFILE, $survey, $SURVEY_SIZE) ||
219 :     die "Error reading $fileName: $!";
220 :     } else {
221 :     (sysread INFILE, $survey, $CHUNK_SIZE) ||
222 :     die "Error reading start of $fileName: $!";
223 :     my $position = int($size - $CHUNK_SIZE / 2);
224 :     (sysseek INFILE, $position, SEEK_SET) ||
225 :     die "Error moving to middle of $fileName: $!";
226 :     (sysread INFILE, $survey, $CHUNK_SIZE, $CHUNK_SIZE) ||
227 :     die "Error reading middle of $fileName: $!";
228 :     (sysseek INFILE, -$CHUNK_SIZE, SEEK_END) ||
229 :     die "Error moving to end of $fileName: $!";
230 :     (sysread INFILE, $survey, 2*$CHUNK_SIZE, $CHUNK_SIZE) ||
231 :     die "Error reading end of $fileName: $!";
232 :     }
233 :     close INFILE;
234 :     my $surveyLen = length $survey;
235 :     # Now $survey contains a bunch of bytes from the file. We run through them
236 :     # counting non-white control characters and looking for binary characters.
237 :     # First, we assume the file is text until we learn otherwise.
238 :     my $retVal = "text";
239 :     # This variable will contain the number of non-white control characters
240 :     # required to prove the file is binary.
241 :     my $controls = ceil($NON_WHITE * $surveyLen);
242 :     # Here we loop through the survey characters.
243 :     while ($retVal eq "text" && (my $chr = chop $survey)) {
244 :     # Process according to the character type.
245 :     if ($chr gt chr(127)) {
246 :     # Here we have a true binary character.
247 :     $retVal = "binary";
248 :     } elsif ($chr lt " " && $chr !~ /\s/) {
249 :     # Here we have a non-white control character.
250 :     $controls--;
251 :     if (! $controls) { $retVal = "binary"; }
252 :     }
253 :     }
254 :     # Return the file type.
255 :     return $retVal;
256 :     }
257 :    
258 :     =head3 OpenDirectory
259 :    
260 :     C<< my $dirObject = OpenDirectory($directoryName); >>
261 :    
262 :     This method returns an object that can be used to find all the files in a directory
263 :     tree. The incoming parameter is the directory tree name, properly cleaned and with
264 :     the trailing slash removed.
265 :    
266 :     =cut
267 :    
268 :     sub OpenDirectory {
269 :     my ($directoryName) = @_;
270 :     return {
271 :     # The root name length enables us to separate the relative file name from
272 :     # the absolute file name.
273 :     rootLen => length($directoryName) + 1,
274 :     # This is a stack of the files and directories still to be processed.
275 :     stack => [$directoryName],
276 :     # These are counters for various file types.
277 :     tempCount => 0,
278 :     dirCount => 0,
279 : parrello 1.2 dirSkip => 0,
280 : parrello 1.1 foundCount => 0
281 :     };
282 :     }
283 :    
284 :     =head3 GetFileName
285 :    
286 :     C<< my $fileName = GetFileName($dirObject); >>
287 :    
288 :     This method returns the next file name for a directory tree. The incoming parameter is
289 :     a directory object returned by OpenDirectory. If all the files have been processed, the
290 :     method returns an undefined value.
291 :    
292 :     =cut
293 :    
294 :     sub GetFileName {
295 :     my ($dirObject) = @_;
296 :     # Loop until we find a file name or empty the stack.
297 :     my $retVal;
298 :     my $done = 0;
299 :     while (! $done) {
300 :     # Get the next file name.
301 :     my $nextName = pop @{$dirObject->{stack}};
302 :     if (! $nextName) {
303 :     # The stack is empty, so we're done.
304 :     $done = 1;
305 :     } elsif (! -d $nextName) {
306 :     # Here we've found a real file. Now we need to check for a temporary file name.
307 :     if ($nextName =~ /(~|#|\.bak|\.tgz)$/) {
308 :     # We have a temporary file name, so we count it and keep going.
309 : parrello 1.2 $dirObject->{dirSkip}++;
310 : parrello 1.1 } else {
311 :     # Here we have a non-temporary name, so we're done.
312 :     $done = 1;
313 :     $retVal = $nextName;
314 :     $dirObject->{foundCount}++;
315 :     # Compute the file's relative name and extension, then stuff them in
316 :     # the object.
317 :     $dirObject->{relName} = substr $nextName, $dirObject->{rootLen};
318 :     my ($path, $suffix) = NameParse($nextName);
319 :     $dirObject->{suffix} = lc $suffix;
320 :     }
321 : parrello 1.2 } elsif ($nextName =~ /\.old$/) {
322 :     # Here we have a backup directory.
323 :     $dirObject->{dirSkip}++;
324 : parrello 1.1 } else {
325 :     # Here we have a directory. We need to push all its members on the stack.
326 :     # Note that file names beginning with a period are ignored.
327 :     opendir NEXTDIR, $nextName;
328 :     my @files = grep { $_ =~ /^[^.]/ } readdir NEXTDIR;
329 :     closedir NEXTDIR;
330 :     push @{$dirObject->{stack}}, map { "$nextName/$_" } @files;
331 :     $dirObject->{dirCount}++;
332 :     }
333 :     }
334 :     return $retVal;
335 :     }
336 :    
337 :     =head3 InSummary
338 :    
339 :     C<< print InSummary($dirObject); >>
340 :    
341 :     Return a summary of the files processed by a directory object.
342 :    
343 :     =cut
344 :    
345 :     sub InSummary {
346 :     my ($dirObject) = @_;
347 :     my $dirCount = $dirObject->{dirCount};
348 :     my $tempCount = $dirObject->{tempCount};
349 :     my $foundCount = $dirObject->{foundCount};
350 : parrello 1.2 my $dirSkip = $dirObject->{dirSkip};
351 :     return "$foundCount files found, $tempCount temporary files ignored, $dirCount directories processed, $dirSkip directories skipped.\n";
352 : parrello 1.1 }
353 :    
354 :     =head3 NameParse
355 :    
356 :     C<< my ($path, $suffix) = NameParse($fileName); >>
357 :    
358 :     Separate the directory path and suffix out of a file name. Note that if the suffix
359 :     is entirely numeric, we return a suffix of "#".
360 :    
361 :     =cut
362 :    
363 :     sub NameParse {
364 :     my ($fileName) = @_;
365 :     # Split the file into path pieces.
366 :     my @pieces = split /\//, $fileName;
367 :     # Peel off the last piece.
368 :     my $baseName = pop @pieces;
369 :     # Form the path from everything remaining.
370 :     my $path = join "/", @pieces;
371 :     # Split the extension off the base name.
372 :     my $suffix = '';
373 :     if ($baseName =~ /\.\d+$/) {
374 :     $suffix = '#';
375 :     } elsif ($baseName =~ /\.([^.]+)$/) {
376 :     $suffix = $1;
377 :     }
378 :     # Return the results.
379 :     return ($path, $suffix);
380 :     }
381 :    
382 :     =head3 StartFile
383 :    
384 :     C<< StartFile($mode, $fileName); >>
385 :    
386 :     Open the specified file for compression input. The file handle used is CLEARFILE.
387 :    
388 :     =cut
389 :    
390 :     sub StartFile {
391 :     my ($mode, $fileName) = @_;
392 :     # Determine the type of file.
393 :     if ($mode eq 't') {
394 :     # Here we have a text file.
395 :     (open CLEARFILE, "<$fileName") ||
396 :     die "Could not open text input file $fileName: $!";
397 :     } else {
398 :     # Here we have a binary file.
399 :     (sysopen CLEARFILE, $fileName, O_RDONLY | O_BINARY) ||
400 :     die "Could not open binary input file $fileName: $!";
401 :     }
402 :     }
403 :    
404 :     =head3 GetCharacters
405 :    
406 :     C<< my $characters = GetCharacters($mode); >>
407 :    
408 :     Get some characters to compress from the current input file. If we have reached
409 :     end-of-file, returns an empty list.
410 :    
411 :     =cut
412 :    
413 :     sub GetCharacters {
414 :     my ($mode) = @_;
415 :     # Declare the return variable.
416 :     my $retVal;
417 :     # Determine the type of file.
418 :     if ($mode eq 't') {
419 :     # Here we have a text file. We read a line of text. Since it's open as
420 :     # a text file, the line-end will be translated for us.
421 :     $retVal = <CLEARFILE>;
422 :     if (! defined $retVal) {
423 :     # Here we either have end-of-file or a file error.
424 :     if ($?) {
425 :     die "Text input error: $!";
426 :     } else {
427 :     # If it's end-of-file, we close the file.
428 :     close CLEARFILE;
429 :     }
430 :     }
431 :     } else {
432 :     # Here we have a binary file. We read a fixed-length text chunk.
433 :     my $textLine;
434 :     my $result = sysread CLEARFILE, $textLine, $CHUNK_SIZE;
435 :     if (! defined $result) {
436 :     # Here we have a file error.
437 :     die "Binary input error: $!";
438 :     } elsif ($result == 0) {
439 :     # Here we have end-of-file, so we close the file.
440 :     close CLEARFILE;
441 :     } else {
442 :     # Here we have data to return.
443 :     $retVal = $textLine;
444 :     }
445 :     }
446 :     return $retVal;
447 :     }
448 :    
449 :     =head3 GetExtensions
450 :    
451 :     C<< my %extHash = GetExtensions($directory); >>
452 :    
453 :     Create a hash containing the extensions of the binary files in the specified
454 :     directory tree.
455 :    
456 :     =cut
457 :    
458 :     sub GetExtensions {
459 :     my ($directory) = @_;
460 :     # Declare the return variable.
461 :     my %retVal = ();
462 :     # Check for the extensions file.
463 :     my $fileName = "$directory/extensions.txt";
464 :     if (! -e $fileName) {
465 :     print "No extensions file found. All files are treated as text.\n";
466 :     } else {
467 :     # The file exists, so we read it into the hash.
468 :     (open EXTFILE, "<$fileName") ||
469 :     die "Could not open extensions file: $!";
470 :     while (my $line = <EXTFILE>) {
471 :     chomp $line;
472 :     $retVal{$line} = 'b';
473 :     }
474 :     }
475 :     # Return the hash.
476 :     return %retVal;
477 :     }
478 :    
479 :     =head3 StartPackaging
480 :    
481 :     C<< my $packObject = StartPackaging($packageFileName); >>
482 :    
483 :     Prepare to create the package file.
484 :    
485 :     =cut
486 :    
487 :     sub StartPackaging {
488 :     my ($packageFileName) = @_;
489 :     # Open the package file.
490 :     (sysopen PACKFILE, "$packageFileName", O_WRONLY | O_CREAT | O_TRUNC | O_BINARY) ||
491 :     die "Could not open package file $packageFileName: $!";
492 :     # Return an object telling us how we're doing.
493 :     return {
494 :     packName => $packageFileName,
495 :     fileSize => 0,
496 :     buffer => '',
497 :     bufferSize => 0,
498 :     outCount => 0
499 :     };
500 :     }
501 :    
502 :     =head3 WritePackageLine
503 :    
504 :     C<< WritePackageLine($packObject, $line); >>
505 :    
506 :     Write a line of text data to the package file. A text line is preceded by a space character
507 :     and is terminated by a line feed character.
508 :    
509 :     =cut
510 :    
511 :     sub WritePackageLine {
512 :     my ($packObject, $line) = @_;
513 :     # Write the line to the file.
514 :     (print PACKFILE " $line\n") ||
515 :     die "Error writing to package file: $!";
516 :     # Update the file size.
517 :     $packObject->{fileSize} += length($line) + 2;
518 :     }
519 :    
520 :     =head3 FinishPackaging
521 :    
522 :     C<< FinishPackaging($packObject); >>
523 :    
524 :     Finish the packaging process. This basically closes the package file.
525 :    
526 :     =cut
527 :    
528 :     sub FinishPackaging {
529 :     close PACKFILE;
530 :     }
531 :    
532 :     =head3 StartCompressedData
533 :    
534 :     C<< StartCompressedData($packObject, $compTable); >>
535 :    
536 :     Initialize for writing compressed file data. Compressed file data is output in fixed-
537 :     length blocks preceded by a C<b> character. (This distinguishes them from text lines,
538 :     which begin with a space.) The last block may be short. It is preceded by an C<e>
539 :     character and the next four bytes contain the length in decimal. (So, for example,
540 :     C<e0012> would indicate a 12-byte final block.)
541 :    
542 :     =cut
543 :    
544 :     sub StartCompressedData {
545 :     my ($packObject, $compTable) = @_;
546 :     # Initialize the buffer data and the output count.
547 :     $packObject->{buffer} = '';
548 :     $packObject->{bufferLen} = 0;
549 :     $packObject->{outCount} = 0;
550 :     # Remember the compression table.
551 :     $packObject->{compTable} = $compTable;
552 :     # Create the checksum.
553 :     $packObject->{digest} = Digest::MD5->new();
554 :     }
555 :    
556 :     =head3 WriteCompressedData
557 :    
558 :     C<< WriteCompressedData($packObject, $data, $status); >>
559 :    
560 :     Add a block of data to the compressed output. The data is accumulated until we fill
561 :     an entire block, at which time it is written to the file with the appropriate marker.
562 :    
563 :     =cut
564 :    
565 :     sub WriteCompressedData {
566 :     my ($packObject, $data, $status) = @_;
567 :     # Check the status code to see if there's an error.
568 :     if ($status != Z_OK) {
569 :     die "Compression error: " . $packObject->{comptable}->msg();
570 :     } else {
571 :     # Get the amount of data we have.
572 :     my $len = length($data);
573 :     # Find out how much room is in the buffer.
574 :     my $room = $PACK_BLOCK - $packObject->{bufferLen};
575 :     # Now we put the data into blocks. At all times, $residual
576 :     # will be the data not transmitted, $room the room left in
577 :     # the block, and $len the length of the residual.
578 :     my $residual = $data;
579 :     while ($room < $len) {
580 :     # Put as much data into the buffer as will fit.
581 :     $packObject->{buffer} .= substr($residual, 0, $room);
582 :     # Remove the data from the residual.
583 :     $residual = substr($residual, $room);
584 :     $len -= $room;
585 :     # Write the buffer.
586 :     print PACKFILE "b" . $packObject->{buffer};
587 :     $packObject->{outCount} += $PACK_BLOCK + 1;
588 :     # Denote that the buffer is now empty.
589 :     $packObject->{bufferLen} = 0;
590 :     $packObject->{buffer} = '';
591 :     $room = $PACK_BLOCK;
592 :     }
593 :     # Put the remaining text into the buffer.
594 :     $packObject->{buffer} .= $residual;
595 :     $packObject->{bufferLen} += $len;
596 :     }
597 :     }
598 :    
599 :     =head3 WriteEndMark
600 :    
601 :     C<< my $outCount = WriteEndMark($packObject); >>
602 :    
603 :     Write out the last block of a compressed data stream and return the total number of bytes
604 :     in the stream.
605 :    
606 :     =cut
607 :    
608 :     sub WriteEndMark {
609 :     my ($packObject) = @_;
610 :     # Get the number of bytes in the buffer.
611 :     my $len = $packObject->{bufferLen};
612 :     # Format the prefix.
613 :     my $prefix = "$len";
614 :     while (length($prefix) < 4) {
615 :     $prefix = "0$prefix";
616 :     }
617 :     # Insert the checksum.
618 :     $prefix .= $packObject->{digest}->hexdigest;
619 :     # Write the prefix and the buffer content.
620 :     print PACKFILE "e$prefix" . $packObject->{buffer};
621 :     # Compute the total length output.
622 :     my $retVal = $len + 37 + $packObject->{outCount};
623 :     # Add it to the package output counter.
624 :     $packObject->{fileSize} += $retVal;
625 :     # Return the total length output.
626 :     return $retVal;
627 :     }
628 :    
629 :     =head3 PackTree
630 :    
631 :     C<< PackTree($directory, $packageFileName); >>
632 :    
633 :     Package all files in a directory tree into a package file.
634 :    
635 :     The C<extensions.txt> file in the root of the directory tree is used to determine
636 :     which file extensions indicate binary files.
637 :    
638 :     =cut
639 :    
640 :     sub PackTree {
641 :     my ($directory, $packageFileName) = @_;
642 :     # Get the extensions hash. This is used to separate text files
643 :     # from binary files.
644 :     my %extHash = GetExtensions($directory);
645 :     # Set up to output to the package files.
646 :     my $packObject = StartPackaging($packageFileName);
647 :     # Create a directory object.
648 :     my $dirObject = OpenDirectory($directory);
649 :     # Create file type counters.
650 :     my %fileCounters = ( t => 0, b => 0 );
651 :     # Loop through the files in the tree, packaging each one.
652 :     while (my $fileName = GetFileName($dirObject)) {
653 :     # Decide whether this file is text or binary. We begin by separating
654 :     # the file name into its path and extension.
655 :     my ($path, $suffix) = NameParse($fileName);
656 :     # Check the extension hash for the suffix.
657 :     my $mode = (exists $extHash{$suffix} ? "b" : "t");
658 :     # Output the relative file name.
659 :     my $relName = $dirObject->{relName};
660 :     WritePackageLine($packObject, "$mode $relName");
661 :     # Update the appropriate file counter.
662 :     $fileCounters{$mode}++;
663 :     # Now we compress the file.
664 :     PackFile($fileName, $packObject, $mode);
665 :     }
666 :     # Close the package output stream.
667 :     FinishPackaging($packObject);
668 :     # Tell the user how much package file we have.
669 :     print $packObject->{fileSize} . " bytes written to package file.\n";
670 :     # Display the text/binary split.
671 :     print "$fileCounters{t} text files, $fileCounters{b} binary files compressed.\n";
672 :     # Write the summary of files processed.
673 :     print InSummary($dirObject);
674 :     }
675 :    
676 :     =head3 PackFile
677 :    
678 :     C<< PackFile($fileName, $packObject, $mode); >>
679 :    
680 :     Compress the specified file to the specified package file in the specified mode.
681 :    
682 :     =cut
683 :    
684 :     sub PackFile {
685 :     my ($fileName, $packObject, $mode) = @_;
686 :     print "Packaging $fileName ($mode).\n";
687 :     # Create a compression object.
688 :     my $compTable = deflateInit();
689 :     # Initialize for reading the input file.
690 :     StartFile($mode, $fileName);
691 :     # Initialize for writing the output.
692 :     StartCompressedData($packObject, $compTable);
693 :     # Loop through the input.
694 :     while (my $chars = GetCharacters($mode)) {
695 :     # Add the data to the checksum.
696 :     $packObject->{digest}->add($chars);
697 :     # Compress this data.
698 :     my ($output, $status) = $compTable->deflate($chars);
699 :     # Write it to the output.
700 :     WriteCompressedData($packObject, $output, $status);
701 :     }
702 :     # Flush out any remaining data.
703 :     my ($output, $status) = $compTable->flush();
704 :     WriteCompressedData($packObject, $output, $status);
705 :     # Terminate the file.
706 :     my $outCount = WriteEndMark($packObject);
707 :     # Display the compression ratio.
708 :     my $fileSize = $compTable->total_in();
709 :     my $ratio = ($fileSize ? int($outCount * 100 / $fileSize + 0.5) : 0);
710 :     print "$fileSize characters in, $outCount characters out ($ratio%)\n";
711 :     }
712 :    
713 :     =head3 UnPackTree
714 :    
715 :     C<< UnPackTree($directory, $packageFile); >>
716 :    
717 :     Read file data from a package file and put it into a directory tree.
718 :    
719 :     The package file consists of a series of compressed files. The package file contains
720 :     three types of records. A text record begins with a space and is terminated by a
721 :     new-line character (C<\n>). A block record has a fixed length and begins with a C<b>.
722 :     An end record begins with an C<e> and a four-digit length, followed by data of that
723 :     length. A file contains a text record with the file type (C<t> for text and C<b> for
724 :     binary) and its name relative to the root of the tree, zero or more block records, and
725 :     one end record.
726 :    
727 :     To prevent file system compatibility problems, the name is automatically cleaned
728 :     before the file is created: all spaces and colons are converted to underscores.
729 :    
730 :     Text files are open in text mode and binary files in binary mode. As a result, the
731 :     line-end characters in a text file are automatically translated to the line-end
732 :     character of the target operating system.
733 :    
734 :     =cut
735 :    
736 :     sub UnPackTree {
737 :     my ($directory, $packageFile) = @_;
738 :     # Open the package file for input.
739 :     my $packObject = StartUnPacking($packageFile);
740 :     # Create some file counters.
741 :     my %fileCounters = ( b => 0, t => 0 );
742 :     # Loop through the package one file at a time.
743 :     while (my $fileData = GetTextRecord($packObject)) {
744 :     # Extract the file name and mode.
745 :     my $mode = substr($fileData,0,1);
746 :     my $fileName = substr($fileData,2);
747 :     print "Unpacking $fileName ($mode).\n";
748 :     # Create the decompression object.
749 :     my $compObject = inflateInit();
750 :     # Create the output file.
751 :     my $fileObject = CreateFile($fileName, $mode, $directory, $compObject);
752 :     # Loop through the package file, de-compressing the data.
753 :     while (my $block = GetBlock($packObject)) {
754 :     # Decompress the current block.
755 :     my ($data, $status) = $compObject->inflate($block);
756 :     # Write the decompressed data.
757 :     WriteData($fileObject, $data, $status);
758 :     }
759 :     # Verify the checksums.
760 :     my ($expected, $found) = ($packObject->{digest}, $fileObject->{digest}->hexdigest);
761 :     if ($expected ne $found) {
762 :     # Here we have a mismatch.
763 :     die "Invalid checksum found unpacking file $fileName: expected $expected, found $found.";
764 :     }
765 :     # Count this file.
766 :     $fileCounters{$mode}++;
767 :     # Close it.
768 :     close CLEARFILE;
769 :     }
770 :     # Close the package file.
771 :     close PACKFILE;
772 :     # Tell the user what we did.
773 :     print "$fileCounters{t} text files unpacked, $fileCounters{b} binary files unpacked.\n";
774 :     print $packObject->{inCounter} . " package file bytes read.\n";
775 :     }
776 :    
777 :     =head3 StartUnPacking
778 :    
779 :     C<< my $packObject = StartUnPacking($packageFile); >>
780 :    
781 :     Prepare to read from a package file.
782 :    
783 :     =cut
784 :    
785 :     sub StartUnPacking {
786 :     my ($packageFile) = @_;
787 :     # Open the package file for binary input.
788 :     (sysopen PACKFILE, $packageFile, O_RDONLY | O_BINARY) ||
789 :     die "Could not open $packageFile: $!";
790 :     # Return the package file input object.
791 :     return {
792 :     blockEnd => 0,
793 :     inCounter => 0
794 :     }
795 :     }
796 :    
797 :     =head3 GetTextRecord
798 :    
799 :     C<< my $data = GetTextRecord($packObject); >>
800 :    
801 :     Read a text record from the package file. The text record begins with a space and
802 :     ends with a new-line character.
803 :    
804 :     =cut
805 :    
806 :     sub GetTextRecord {
807 :     my ($packObject) = @_;
808 :     # Declare the return variable.
809 :     my $retVal;
810 :     # Read the next character.
811 :     my $buffer;
812 :     my $rv = read PACKFILE, $buffer, 1;
813 :     # Check to insure we read a space.
814 :     if (! defined $rv) {
815 :     # Here the read failed.
816 :     die "Error reading package file: $!";
817 :     } elsif ($rv == 0) {
818 :     # Here we've reached end-of-file. We'll return an undefined value
819 :     # to the caller.
820 :     } elsif ($buffer ne " ") {
821 :     # Here we didn't find a space: the file is corrupt.
822 :     die "Text record not found when expected in package file.";
823 :     } else {
824 :     # Here we have a real, live text record. We loop until we find a new-line.
825 :     $retVal = "";
826 :     $rv = read PACKFILE, $buffer, 1;
827 :     while (defined $rv && $rv == 1 && $buffer ne "\n") {
828 :     $retVal .= $buffer;
829 :     $rv = read PACKFILE, $buffer, 1;
830 :     }
831 :     # Check for error conditions.
832 :     if (! defined $rv) {
833 :     die "Error reading package file: $!";
834 :     } elsif ($rv == 0) {
835 :     die "End-of-file inside text record in package file.";
836 :     }
837 :     # Update the input counter.
838 :     $packObject->{inCounter} += length($retVal) + 2;
839 :     # Denote we haven't read an end block.
840 :     $packObject->{blockEnd} = 0;
841 :     }
842 :     return $retVal;
843 :     }
844 :    
845 :     =head3 GetBlock
846 :    
847 :     C<< my $block = GetBlock($packObject); >>
848 :    
849 :     Read a block of compressed data from the package file. If the end of the compressed data
850 :     has been reached, will return an undefined value.
851 :    
852 :     =cut
853 :    
854 :     sub GetBlock {
855 :     my ($packObject) = @_;
856 :     # Declare the return value.
857 :     my $retVal;
858 :     # Only proceed if we have NOT reached the end block.
859 :     if (! $packObject->{blockEnd}) {
860 :     # Read the indicator character.
861 :     my $buffer;
862 :     my $rv = read PACKFILE, $buffer, 1;
863 :     # Check for errors.
864 :     if (! defined $rv) {
865 :     die "Error reading package block mark: $!";
866 :     } elsif ($rv == 0) {
867 :     die "End-of-file read where block mark expected in package file.";
868 :     } elsif ($buffer eq 'b') {
869 :     # Here we have a normal block. We read it directly into the
870 :     # return variable.
871 :     $rv = read PACKFILE, $retVal, $PACK_BLOCK;
872 :     # Check for errors.
873 :     if (! defined $rv) {
874 :     die "Error reading package block data: $!";
875 :     } elsif ($rv < $PACK_BLOCK) {
876 :     die "Unexpected end-of-file inside package file block.";
877 :     } else {
878 :     # Record the bytes read.
879 :     $packObject->{inCounter} += $PACK_BLOCK + 1;
880 :     }
881 :     } elsif ($buffer eq 'e') {
882 :     # Here we have an end block. We need to read the block length
883 :     # and the checksum.
884 :     $rv = read PACKFILE, $buffer, 36;
885 :     # Check for errors.
886 :     if (! defined $rv) {
887 :     die "Error reading end block length: $!";
888 :     } elsif ($rv < 36) {
889 :     die "Unexpected end-of-file reading end block length and checksum.";
890 :     } else {
891 :     # Get the length and the checksum.
892 :     my $lengthCode = substr $buffer, 0, 4;
893 :     $packObject->{digest} = substr $buffer, 4, 32;
894 :     # Check for an empty block.
895 :     if ($lengthCode == 0) {
896 :     # Here the end block is empty. We will return an undefined value,
897 :     # indicating end-of-file. Record the 36 bytes read.
898 :     $packObject->{inCounter} += 36;
899 :     } else {
900 :     # Here we have a valid block length, so we read the block.
901 :     $rv = read PACKFILE, $retVal, $lengthCode;
902 :     # Check for errors.
903 :     if (! defined $rv) {
904 :     die "Error reading end block: $!";
905 :     } elsif ($rv < $lengthCode) {
906 :     die "Unexpected end-of-file inside end block.";
907 :     } else {
908 :     # Denote the next read will return end-of-file.
909 :     $packObject->{blockEnd} = 1;
910 :     # Record the bytes read.
911 :     $packObject->{inCounter} += $lengthCode + 37;
912 :     }
913 :     }
914 :     }
915 :     } else {
916 :     # Here we have an invalid block mark.
917 :     die "Invalid block mark read in package file.";
918 :     }
919 :     }
920 :     return $retVal;
921 :     }
922 :    
923 :     =head3 CreateFile
924 :    
925 :     C<< my $fileObject = CreateFile($fileName, $mode, $directory, $compObject); >>
926 :    
927 :     Create a file into which packaged data can be unpacked.
928 :    
929 :     =cut
930 :    
931 :     sub CreateFile {
932 :     my ($fileName, $mode, $directory, $compObject) = @_;
933 :     # Clear the file name.
934 :     $fileName =~ tr/: /__/;
935 :     # Add the file name to the directory to get the fully-qualified file name.
936 :     my $fullName = "$directory/$fileName";
937 :     # Extract the path.
938 :     my ($path, $suffix) = NameParse($fullName);
939 :     # Insure the path exists.
940 :     if (! -d $path) {
941 :     mkpath($path);
942 :     }
943 :     # Check for the directory problem.
944 :     if (-d $fullName) {
945 :     # Here a directory has the same name as the file. We append a suffix
946 :     # to work around the problem.
947 :     $fullName .= ".$mode";
948 :     }
949 :     # Open the file in the appropriate mode.
950 :     if ($mode eq 't') {
951 :     (open CLEARFILE, ">$fullName") ||
952 :     die "Could not open text file $fullName: $!";
953 :     } else {
954 :     (sysopen CLEARFILE, $fullName, O_WRONLY | O_CREAT | O_BINARY) ||
955 :     die "Could not open binary file $fullName: $!";
956 :     }
957 :     # Create a file manipulation object to return.
958 :     return {
959 :     mode => $mode,
960 :     compObject => $compObject,
961 :     digest => Digest::MD5->new()
962 :     };
963 :     }
964 :    
965 :     =head3 WriteData
966 :    
967 :     C<< WriteData($fileObject, $data, $status); >>
968 :    
969 :     Write a block of data to the output file.
970 :    
971 :     =cut
972 :    
973 :     sub WriteData {
974 :     my ($fileObject, $data, $status) = @_;
975 :     # Check the status code from the compressor.
976 :     if ($status != Z_OK && $status != Z_STREAM_END) {
977 :     die "Error in decompression: " . $fileObject->{compObject}->msg();
978 :     } else {
979 :     # Here we're okay, so we can write the data.
980 :     print CLEARFILE $data;
981 :     # Add it to the checksum.
982 :     $fileObject->{digest}->add($data);
983 :     }
984 :     }
985 :    
986 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3