[Bio] / FigRelEngTools / make-tarfiles Repository:
ViewVC logotype

View of /FigRelEngTools/make-tarfiles

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (annotate)
Wed Nov 28 22:51:09 2007 UTC (11 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +18 -3 lines
more support for exclusion

# -*- perl -*-


use File::Find;

$SIG{INT} = sub { exit; };
$SIG{TERM} = sub { exit; };
$SIG{HUP} = sub { exit; };


$usage = "usage: make_transferable_tarfiles FromDir ChunksDir";


(($fromD    = shift @ARGV) &&
 ($chunksD = shift @ARGV)
)
    || die $usage;

#
# Creates individual tarfiles of the source directory files.
#


#if ($fromD =~ m/^\//)  { die "FromDir $fromD should be a relative, not an absolute path"; }
#if (-d $chunksD)       { die "ChunksDir $chunksD already exists"; }

-d $chunksD or mkdir($chunksD);

my @exclude;
push @exclude, qr!^[^/]+/((Sims[^/]+)|(CouplingData[^/]+)|(Figfams[^/]+)|PrecomputedPins|JobQueue)/!;
push @exclude, qr!nr\.\d+\.(phr|pin|psq)!;
push @exclude, qr!bad\.figfams!;
push @exclude, qr!Global/BBHs!;
push @exclude, qr!Global/bbhs!;
push @exclude, qr!Global/queued_similarities!;
push @exclude, qr!Global/peg.synonyms.(009|015|2007-0208)!;
push @exclude, qr!/background_jobs/.*!;
push @exclude, qr!Global/bad!;
push @exclude, qr!Logs/GenomeLog/.*!;
push @exclude, qr!/Indexes/.*!;
push @exclude, qr!/HOPSS!;
push @exclude, qr!Global/Users/.*!;
push @exclude, qr!~$!;


my $file_max = 300_000_000;
my $file_cur = 0;
my @files;
my $file_idx = 0;

my @all_files = ();
my $all_size = 0;

find({
    wanted => \&each_dir,
    no_chdir => 1}, $fromD);

$n = @all_files;
printf "Found $n files total size=%5.2f\n", $all_size / 1_000_000;

@all_files = sort { $b->[1] <=> $a->[1] } @all_files;

open(A, ">/tmp/all") or die;
print A join("\n", map { join(" ", @$_) } @all_files), "\n";
close(A);

for my $ent (@all_files)
{
    my($file, $size) = @$ent;

    if (@files > 0 and $file_cur + $size > $file_max)
    {
	dump_files();
    }

    push(@files, $file);
    $file_cur += $size;
}
if (@files > 0)
{
    dump_files();
}

sub each_dir
{
    next unless -f $_;
    my $path = $_;
    if (grep { $path =~ $_ } @exclude)
    {
	return;
    }
    my $s = -s $_;
    $all_size += $s;
    push(@all_files, [$_, $s]);
}


sub dump_files
{
    my $n = @files;
    open(M, sprintf(">/tmp/manifest.%04d", $file_idx)) or die;
    my $out = sprintf("data.%04d", $file_idx++);
    my $mb = sprintf("%5.2f", $file_cur / 1000000);
    print "Write $mb $n to $out\n";

    open(F, ">/tmp/files.$$") or die;
    print F join("\n", @files), "\n";
    print M join("\n", @files), "\n";
    close(F);
    close(M);
    my $ret = system("tar -v -c -T /tmp/files.$$ -z -f $chunksD/$out.tgz");
    if ($? & 127)
    {
	print "Tar killed with signal ", $? & 127, "\n";
 	exit;
    }


    @files = ();
    $file_cur = 0;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3