######################################################################## use strict; use FIG; use IPC::Run qw(start finish); use Cache::Memcached::Fast; use Getopt::Long; my $usage = "usage: FFB2_build_updated_FF [--override fn-override-file] [--skipotu] OldReleaseDir NewReleaseDir memcache-host memcache-port [ssfam]"; my $skip_otu; my $override_file; my $rc = GetOptions("skipotu" => \$skip_otu, "override=s" => \$override_file); $rc or die $usage; my($oldD,$newD,$mchost, $mcport); ( ($oldD = shift @ARGV) && (-d $oldD) && ($newD = shift @ARGV) && ($mchost = shift @ARGV) && ($mcport = shift @ARGV) ) || die $usage; my $in_ssfam = shift; $ENV{TMPDIR} = $FIG_Config::temp; my $sort_args = "-T $FIG_Config::temp -S 4G"; mkdir($newD,0777) || die "You have an existing $newD; remove it and try again"; my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } ); $mc or die "Could not connect to memcached at $mchost:$mcport\n"; $mc->flush_all(); # # Do some work up front to ensure we can load the memcache with all the data we need. # my $ssfam; if (-f $in_ssfam) { $ssfam = $in_ssfam; } else { $ssfam = "$FIG_Config::temp/subsys.based.families.$$"; &FIG::run("FFB2_make_subsys_based_families > $ssfam"); } print "Loading memcache\n"; &FIG::run("FFB2_load_memcache $ssfam $oldD/families.2c $mchost $mcport"); print "Done\n"; my $parallel = !$skip_otu; my $child_pid; if ($parallel) { # # Run the FF update in a child process while we manage getting the phylo oligos here. # $child_pid = fork; if ($child_pid == 0) { figfam_processing(); exit; } } else { figfam_processing(); } if ($skip_otu) { # # Create empty kmer files in PhyloSigs. # my $dir = "$newD/PhyloSigs"; if (! -d $dir) { mkdir $dir or die "Could not mkdir $dir: $!"; } for my $k (7..12) { my $dir = "$newD/PhyloSigs/$k"; if (! -d $dir) { mkdir $dir or die "Could not mkdir $dir: $!"; } open(TT, ">", "$dir/good.oligos"); close(TT); } } else { # # Run the new parallel get-prots # my $n_get_prots = 4; my @handles; for my $i (0..$n_get_prots - 1) { my $outfile = "$FIG_Config::temp/ffb2_xx.$$.$i"; my $h = start(['FFB2_xx', 12, "$newD/setI", $n_get_prots, $i, $mchost, $mcport], "<", "/dev/null", ">", $outfile); push(@handles, [$i, $h, $outfile]); } for my $e (@handles) { my($i, $h, $file) = @$e; print "Wait for finish $i $file\n"; $h->finish(); } my @files = map { $_->[2] } @handles; print STDERR "Merging phylo @files\n"; # # The Debian sort on the mac pro's does bad things with # large merges. Use the latest one there, built in a local dir. my @sort; if (-x "/scratch/olson/coreutils/bin/sort") { my $n = @files; $n++; @sort = ("/scratch/olson/coreutils/bin/sort", "--batch-size=$n", "-m", @files); } else { @sort = ("sort", "-S", "2G", "-m", @files); } &FIG::run("@sort | FFB2_usable_motifs $newD/PhyloSigs 7-12"); #&FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI"); #&FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort $sort_args | FFB2_usable_motifs $newD/PhyloSigs"); } if ($parallel) { my $rc = waitpid($child_pid, 0); if ($? != 0) { die "Error running parallel child: $?\n"; } } die "We're at the merge\n"; mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!"; my @merge_handles; foreach my $i (7..12) { mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!"; my $h = start(["FFB2_merge_oligos", "$newD/Kmers/$i/good.oligos", "$newD/PhyloSigs/$i/good.oligos"], "|", ["FFB2_create_binary_kmers", "-l", $i, "-s", "4,2", "-", "$newD/Merged/$i/table.binary"]); push(@merge_handles, [$h, $i]); # &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos"); } for my $ent (@merge_handles) { my($h, $k) = @$ent; print "Wait for finish for k=$k\n"; my $r = $h->finish(); print "done r=$r\n"; } my $load1 = start(["FFB2_load_oligo_index", "$newD/function.index", "$newD/FRI.db"]); my $load2 = start(["FFB2_load_oligo_index", "$newD/setI", "$newD/setI.db"]); my $build = start(["build_prok_nonff_fasta", "$newD"], ">", "$newD/extra_prok_seqs.fasta"); #foreach my $i (7..12) #{ # &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary"); #} system("FFB2_run_tests $newD"); system("FFB2_compare_tests $oldD $newD"); $build->finish(); &FIG::run("formatdb -p T -i $newD/extra_prok_seqs.fasta"); &FIG::run("compute_fasta_lengths", "$newD/extra_prok_seqs.fasta", "$newD/extra_prok_seqs.fasta.lengths"); $load1->finish(); $load2->finish(); &FIG::run("FFB2_make_FF_index $newD $newD/fam.func.index $newD/FamFuncBlastD"); &FIG::run("FFB2_build_ff_indexes $newD"); &FIG::run("get_coupling_values $newD/families.2c > $newD/coupling.values"); sub figfam_processing { my $override_arg = ""; if (defined($override_file)) { $override_arg = "--override $override_file"; } &FIG::run("FFB2_update_FIGfams -ssfams $ssfam $override_arg $newD/families.2c $newD/family.functions $oldD"); &FIG::run("FFB2_get_oligos2 $newD/families.2c $newD/family.functions $newD/function.index $newD/KmerBuild $mchost $mcport"); # &FIG::run("FFB2_update_FIGfams -f $ssfam $newD/families.2c $newD/family.functions $oldD"); # &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions $newD/function.index $mchost $mcport |" . # "FFB2_usable_motifs $newD/Kmers"); }