[Bio] / FigKernelScripts / FFB2_build_updated_FF.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB2_build_updated_FF.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (download) (as text) (annotate)
Tue Nov 30 00:16:46 2010 UTC (9 years ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2010_1206, rast_rel_2011_0119
Changes since 1.21: +1 -1 lines
fix memcache stuff

########################################################################

use strict;
use FIG;
use IPC::Run qw(start finish);
use Cache::Memcached::Fast;
use Getopt::Long;

my $usage = "usage: FFB2_build_updated_FF [--no-reload-memcache] [--override fn-override-file] [--skipotu] OldReleaseDir NewReleaseDir memcache-host memcache-port [ssfam]";

my $pseed = "/vol/pseed/FIGdisk/FIG/Data";

my $skip_otu;
my $override_file;
my $no_reload_memcache = 0;

my $rc = GetOptions("skipotu" => \$skip_otu,
		    "override=s" => \$override_file,
		    "no-reload-memcache" => \$no_reload_memcache);

$rc or die $usage;

my($oldD,$newD,$mchost, $mcport);

(
 ($oldD = shift @ARGV) && (-d $oldD) &&
 ($newD = shift @ARGV) &&
 ($mchost = shift @ARGV) &&
 ($mcport = shift @ARGV)
)
    || die $usage;

my $in_ssfam = shift;

$ENV{TMPDIR} = $FIG_Config::temp;

my $sort_args = "-T $FIG_Config::temp -S 4G";

if (-d $newD)
{
    if (-f "$newD/families.2c")
    {
	die "Not overwriting existing families in $newD\n";
    }
}
else
{
    mkdir($newD,0777) || die "Error creating $newD: $!";
}

my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } );
$mc or die "Could not connect to memcached at $mchost:$mcport\n";

#
# Do some work up front to ensure we can load the memcache with all the data we need.
#

my $ssfam;
if (-f $in_ssfam)
{
    $ssfam = $in_ssfam;
}
else
{
    $ssfam = "$FIG_Config::temp/subsys.based.families.$$";
    &FIG::run("FFB2_make_subsys_based_families > $ssfam");
}

if (!$no_reload_memcache)
{
    $mc->flush_all();

    print "Loading memcache\n";
    &FIG::run("FFB2_load_memcache --seed $pseed $ssfam $oldD/families.2c $mchost $mcport");
    print "Done\n";
}

my $parallel = !$skip_otu;
my $child_pid;
if ($parallel)
{
    #
    # Run the FF update in a child process while we manage getting the phylo oligos here.
    #

    $child_pid = fork;

    if ($child_pid == 0)
    {
	figfam_processing();
	exit;
    }
}
else
{
    figfam_processing();
}

if ($skip_otu)
{
    #
    # Create empty kmer files in PhyloSigs.
    #

    my $dir = "$newD/PhyloSigs";
    if (! -d $dir)
    {
	mkdir $dir or die "Could not mkdir $dir: $!";
    }
    for my $k (7..12)
    {
	my $dir = "$newD/PhyloSigs/$k";
	if (! -d $dir)
	{
	    mkdir $dir or die "Could not mkdir $dir: $!";
	}
	open(TT, ">", "$dir/good.oligos");
	close(TT);
    }
}
else
{
    #
    # Run the new parallel get-prots
    #
    my $n_get_prots = 4;
    
    my @handles;
    for my $i (0..$n_get_prots - 1)
    {
	my $outfile = "$FIG_Config::temp/ffb2_xx.$$.$i";
	
	my $h = start(['FFB2_xx', 12,  "$newD/setI", $n_get_prots, $i, $mchost, $mcport],
		      "<", "/dev/null", ">", $outfile);
	push(@handles, [$i, $h, $outfile]);
    }
    for my $e (@handles)
    {
	my($i, $h, $file) = @$e;
	print "Wait for finish $i $file\n";
	$h->finish();
    }
    
    my @files = map { $_->[2] } @handles;
    print STDERR "Merging phylo @files\n";

    #
    # The Debian sort on the mac pro's does bad things with
    # large merges. Use the latest one there, built in a local dir.
    my @sort;
    if (-x "/scratch/olson/coreutils/bin/sort")
    {
	my $n = @files;
	$n++;
	@sort = ("/scratch/olson/coreutils/bin/sort", "--batch-size=$n", "-m", @files);
    }
    else
    {
	@sort = ("sort", "-S", "2G", "-m", @files);
    }


    &FIG::run("@sort | FFB2_usable_motifs $newD/PhyloSigs 7-12");
    
    #&FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");
    #&FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort $sort_args | FFB2_usable_motifs $newD/PhyloSigs");
}

if ($parallel)
{
    my $rc = waitpid($child_pid, 0);
    if ($? != 0)
    {
	die "Error running parallel child: $?\n";
    }
}

die "We're at the merge\n";

mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";

my @merge_handles;

foreach my $i (7..12)
{
    mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";
    my $h = start(["FFB2_merge_oligos", "$newD/Kmers/$i/good.oligos", "$newD/PhyloSigs/$i/good.oligos"], "|",
		  ["FFB2_create_binary_kmers", "-l", $i, "-s", "4,2", "-", "$newD/Merged/$i/table.binary"]);

    push(@merge_handles, [$h, $i]);

#    &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");
}

for my $ent (@merge_handles)
{
    my($h, $k) = @$ent;
    print "Wait for finish for k=$k\n";
    my $r = $h->finish();
    print "done r=$r\n";
}

my $load1 = start(["FFB2_load_oligo_index", "$newD/function.index",  "$newD/FRI.db"]);
my $load2 = start(["FFB2_load_oligo_index", "$newD/setI", "$newD/setI.db"]);
my $build = start(["build_prok_nonff_fasta", "$newD"],  ">", "$newD/extra_prok_seqs.fasta");

#
# create merge files and use the -merge to create_binary_kmers
#

my %mergefile;
foreach my $i (7..12)
{
    my $mf = "$newD/KmerBuild/mergefile.$i";
    $mergefile{$i} = $mf;
    open(MF, ">", $mf) or die "Cannot write $mf: $!";

    for my $f (sort <$newD/Kmers/kmers.2.*/$i/good.oligos.gz>)
    {
	print MF "$f\t1\n";
    }
    for my $f (sort <$newD/Kmers/kmers.3.*/$i/good.oligos.gz>)
    {
	print MF "$f\t3\n";
    }
    print MF "$newD/PhyloSigs/$i/good.oligos.gz\t2\n";
    close(MF);
}

foreach my $i (7..12)
{
    my $mf = $mergefile{$i};
    &FIG::run("FFB2_create_binary_kmers -l $i -s 4,2,4 -merge $mf - $newD/Merged/$i/table.binary");
}

system("FFB2_run_tests $newD");
system("FFB2_compare_tests $oldD $newD");
$build->finish();
&FIG::run("formatdb -p T -i $newD/extra_prok_seqs.fasta");
&FIG::run("compute_fasta_lengths", "$newD/extra_prok_seqs.fasta", "$newD/extra_prok_seqs.fasta.lengths");

$load1->finish();
$load2->finish();

&FIG::run("FFB2_make_FF_index $newD $newD/fam.func.index $newD/FamFuncBlastD");
&FIG::run("FFB2_build_ff_indexes $newD");

&FIG::run("get_coupling_values $newD/families.2c > $newD/coupling.values");

sub figfam_processing
{
    my $override_arg = "";
    if (defined($override_file))
    {
	$override_arg = "--override $override_file";
    }
    &FIG::run("FFB2_update_FIGfams -memcache $mchost:$mcport -ssfams $ssfam $override_arg $newD/families.2c $newD/family.functions $oldD > $newD.update.$$.out 2> $newD.update.$$.err");
    &FIG::run("FFB2_get_oligos2 $newD/families.2c $newD/family.functions $newD/function.index $newD/KmerBuild $mchost $mcport");

    &FIG::run("FFB2_xy $newD/KmerBuild $newD/Kmers 7-12");

#    &FIG::run("FFB2_update_FIGfams -f $ssfam $newD/families.2c $newD/family.functions $oldD");
#    &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions $newD/function.index $mchost $mcport |" .
#	      "FFB2_usable_motifs $newD/Kmers");

}    



MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3