[Bio] / FigRelEngTools / extract_transferred_data Repository:
ViewVC logotype

View of /FigRelEngTools/extract_transferred_data

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (annotate) (vendor branch)
Mon Jan 12 21:30:42 2004 UTC (15 years, 3 months ago) by olson
Branch: bob, MAIN
CVS Tags: bob1, HEAD
Changes since 1.1: +0 -0 lines
Initial import of some release-building tools.

#!/usr/bin/perl

use File::Basename;
use FileHandle;
use IPC::Open3;
use strict;

my $usage = "usage: $0 ChunksDir";

my $chunksD;

($chunksD = shift @ARGV)
    || die $usage;

my $err = 0;
my $files = "";

if (! -d $chunksD)
{
    die "Cannot find chunks directory $chunksD\n";
}

#
# Figure out a good blocksize to use for reading.
#

my $blocksize = 4096;

my @s = stat($chunksD);
if (@s)
{
    $blocksize = $s[11];
    # print "Computed optimal blocksize as $blocksize\n";
}


my $fileListFH = new FileHandle;
open($fileListFH, "<$chunksD/checksums") or die "Cannot open checksums file $chunksD/checksums: $!";

#
# First scan the checksums file to ensure we have all the datafiles we need and that
# they are of the correct size.
#
# We also fill in the @files list, which contains triples
# [full-path, filename, checksum, size].
#

my @files = ();
my $total_size = 0;

foreach my $flLine (<$fileListFH>)
{
    my($check_sum, $sz, $file);

    
    if ($flLine =~ /^(\d+)\s+(\d+)\s+(\S+)$/)
    {
        $check_sum = $1;
        $sz = $2;
        my $local_path = $3;
	$file = basename($local_path);
    }
    else
    {
	warn "Unrecognized line in checksums file: $flLine";
	next;
    }

    my $path = "$chunksD/$file";
    my $file_size = -s $path;

    if ($file_size)
    {
	if ($file_size != $sz)
	{
	    $err++;
	    print STDERR "File $path is of the wrong size ($file_size should be $sz)\n";
	    next;
	}
	else
	{
	    #
	    # It's good, add to the list.
	    #

	    push(@files, [$path, $file, $check_sum, $sz]);
	    $total_size += $sz;
	}
    }
    else
    {
	print STDERR "File $path does not exist\n";
	$err++;
    }
}

#
# See if we were okay; bail if not.
#

if ($err > 0)
{
    die "Errors found in data files, aborting\n";
}

print "Initial check successful; starting extraction of ", format_size($total_size), " in ", int(@files), " files\n";

#
# Okay, looks good.
#
# The plan here is to open up the tar to extract, and simultaneously do the checksum
# on the input files and send them to the cksum command. The rationale here is that
# the checksum probably won't fail, and we can avoid reading the file twice this way.
#

#
# Catch SIGPIPE in case a child dies.
#

$SIG{PIPE} = \&sigpipe;

my $tarFH = new FileHandle;

open($tarFH, "| tar xzf -") or die "Cannot start tar: $!";

for my $item (@files)
{
    my($path, $file, $check_sum, $sz) = @$item;
    
    #
    # Open a pipe to compute the checksum.
    #
    my $cksumReadFH = new FileHandle;
    my $cksumWriteFH = new FileHandle;
    my $cksumErrFH = new FileHandle;
    
    my $pid = open3($cksumWriteFH, $cksumReadFH, $cksumErrFH, "cksum");
    $pid or die "Cannot open cksum pipe: $!";
    
    my $buf;
    my $fileFH;

    my $msz = format_size($sz);
    print "Extracting $path, size is $msz bytes\n";
    open($fileFH, "<$path") or die "Cannot read $path: $!";
    
    my $n;
    while ($n = sysread($fileFH, $buf, $blocksize))
    {
	$cksumWriteFH->syswrite($buf, $n);
	$tarFH->syswrite($buf, $n);
    }
    
    $cksumWriteFH->close();
    while (<$cksumReadFH>)
    {
	if (/^(\d+)\s+(\d+)$/)
	{
	    my $thisSum = $1;
	    my $thisSize = $2;
	    if ($thisSum != $check_sum)
	    {
		print STDERR "Invalid checksum on $path (was $thisSum, should be $check_sum)\n";
		$err++;
	    }
	}
    }
    
    $cksumReadFH->close();
    $cksumErrFH->close();
    
    my $stat = waitpid($pid, 0);
    # print "Wait on cksum returns $stat\n";
    if ($err)
    {
	die "errors detected; expansion terminated\n";
    }
}

$tarFH->close();


sub format_size
{
    my($sz) = @_;
    my $msz;
    
    if ($sz > 1000000000)
    {
	$msz = sprintf("%.1f G", $sz / 1000000000);
    }
    elsif ($sz > 1000000)
    {
	$msz = sprintf("%.1f M", $sz / 1000000);
    }
    elsif ($sz > 1000)
    {
	$msz = sprintf("%.1f K", $sz / 1000);
    }
    else
    {
	$msz = $sz;
    }

    return $msz;
}

sub sigpipe
{
    my($sig) = @_;
    die "Child process died with signal $sig\n";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3