[Bio] / FigKernelScripts / FFB2_build_updated_FF.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/FFB2_build_updated_FF.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Nov 16 21:56:22 2009 UTC revision 1.21, Mon Nov 29 22:25:34 2010 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2    
3    use strict;
4  use FIG;  use FIG;
5    use IPC::Run qw(start finish);
6    use Cache::Memcached::Fast;
7    use Getopt::Long;
8    
9  my $usage = "usage: FFB2_build_updated_FF OldReleaseDir NewReleaseDir";  my $usage = "usage: FFB2_build_updated_FF [--no-reload-memcache] [--override fn-override-file] [--skipotu] OldReleaseDir NewReleaseDir memcache-host memcache-port [ssfam]";
10    
11  my($oldD,$newD);  my $pseed = "/vol/pseed/FIGdisk/FIG/Data";
12    
13    my $skip_otu;
14    my $override_file;
15    my $no_reload_memcache = 0;
16    
17    my $rc = GetOptions("skipotu" => \$skip_otu,
18                        "override=s" => \$override_file,
19                        "no-reload-memcache" => \$no_reload_memcache);
20    
21    $rc or die $usage;
22    
23    my($oldD,$newD,$mchost, $mcport);
24    
25  (  (
26   ($oldD = shift @ARGV) && (-d $oldD) &&   ($oldD = shift @ARGV) && (-d $oldD) &&
27   ($newD = shift @ARGV)   ($newD = shift @ARGV) &&
28     ($mchost = shift @ARGV) &&
29     ($mcport = shift @ARGV)
30  )  )
31      || die $usage;      || die $usage;
32    
33  mkdir($newD,0777) || die "You have an existing $newD; remove it and try again";  my $in_ssfam = shift;
34  &FIG::run("FFB2_update_FIGfams $newD/families.2c $newD/family.functions $oldD");  
35  &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions 2> $newD/function.index |" .  $ENV{TMPDIR} = $FIG_Config::temp;
36            "sort -T . | FFB2_usable_motifs $newD/Kmers");  
37    my $sort_args = "-T $FIG_Config::temp -S 4G";
38    
39    if (-d $newD)
40    {
41        if (-f "$newD/families.2c")
42        {
43            die "Not overwriting existing families in $newD\n";
44        }
45    }
46    else
47    {
48        mkdir($newD,0777) || die "Error creating $newD: $!";
49    }
50    
51    my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } );
52    $mc or die "Could not connect to memcached at $mchost:$mcport\n";
53    
54    #
55    # Do some work up front to ensure we can load the memcache with all the data we need.
56    #
57    
58    my $ssfam;
59    if (-f $in_ssfam)
60    {
61        $ssfam = $in_ssfam;
62    }
63    else
64    {
65        $ssfam = "$FIG_Config::temp/subsys.based.families.$$";
66        &FIG::run("FFB2_make_subsys_based_families > $ssfam");
67    }
68    
69    if (!$no_reload_memcache)
70    {
71        $mc->flush_all();
72    
73        print "Loading memcache\n";
74        &FIG::run("FFB2_load_memcache --seed $pseed $ssfam $oldD/families.2c $mchost $mcport");
75        print "Done\n";
76    }
77    
78    my $parallel = !$skip_otu;
79    my $child_pid;
80    if ($parallel)
81    {
82        #
83        # Run the FF update in a child process while we manage getting the phylo oligos here.
84        #
85    
86        $child_pid = fork;
87    
88        if ($child_pid == 0)
89        {
90            figfam_processing();
91            exit;
92        }
93    }
94    else
95    {
96        figfam_processing();
97    }
98    
99    if ($skip_otu)
100    {
101        #
102        # Create empty kmer files in PhyloSigs.
103        #
104    
105        my $dir = "$newD/PhyloSigs";
106        if (! -d $dir)
107        {
108            mkdir $dir or die "Could not mkdir $dir: $!";
109        }
110        for my $k (7..12)
111        {
112            my $dir = "$newD/PhyloSigs/$k";
113            if (! -d $dir)
114            {
115                mkdir $dir or die "Could not mkdir $dir: $!";
116            }
117            open(TT, ">", "$dir/good.oligos");
118            close(TT);
119        }
120    }
121    else
122    {
123        #
124        # Run the new parallel get-prots
125        #
126        my $n_get_prots = 4;
127    
128        my @handles;
129        for my $i (0..$n_get_prots - 1)
130        {
131            my $outfile = "$FIG_Config::temp/ffb2_xx.$$.$i";
132    
133            my $h = start(['FFB2_xx', 12,  "$newD/setI", $n_get_prots, $i, $mchost, $mcport],
134                          "<", "/dev/null", ">", $outfile);
135            push(@handles, [$i, $h, $outfile]);
136        }
137        for my $e (@handles)
138        {
139            my($i, $h, $file) = @$e;
140            print "Wait for finish $i $file\n";
141            $h->finish();
142        }
143    
144        my @files = map { $_->[2] } @handles;
145        print STDERR "Merging phylo @files\n";
146    
147        #
148        # The Debian sort on the mac pro's does bad things with
149        # large merges. Use the latest one there, built in a local dir.
150        my @sort;
151        if (-x "/scratch/olson/coreutils/bin/sort")
152        {
153            my $n = @files;
154            $n++;
155            @sort = ("/scratch/olson/coreutils/bin/sort", "--batch-size=$n", "-m", @files);
156        }
157        else
158        {
159            @sort = ("sort", "-S", "2G", "-m", @files);
160        }
161    
162    
163        &FIG::run("@sort | FFB2_usable_motifs $newD/PhyloSigs 7-12");
164    
165        #&FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");
166        #&FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort $sort_args | FFB2_usable_motifs $newD/PhyloSigs");
167    }
168    
169    if ($parallel)
170    {
171        my $rc = waitpid($child_pid, 0);
172        if ($? != 0)
173        {
174            die "Error running parallel child: $?\n";
175        }
176    }
177    
178    die "We're at the merge\n";
179    
 &FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");  
 &FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort -T . | FFB2_usable_motifs $newD/PhyloSigs");  
180  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";
181    
182    my @merge_handles;
183    
184  foreach my $i (7..12)  foreach my $i (7..12)
185  {  {
186      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";
187      &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");      my $h = start(["FFB2_merge_oligos", "$newD/Kmers/$i/good.oligos", "$newD/PhyloSigs/$i/good.oligos"], "|",
188                      ["FFB2_create_binary_kmers", "-l", $i, "-s", "4,2", "-", "$newD/Merged/$i/table.binary"]);
189    
190        push(@merge_handles, [$h, $i]);
191    
192    #    &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");
193    }
194    
195    for my $ent (@merge_handles)
196    {
197        my($h, $k) = @$ent;
198        print "Wait for finish for k=$k\n";
199        my $r = $h->finish();
200        print "done r=$r\n";
201    }
202    
203    my $load1 = start(["FFB2_load_oligo_index", "$newD/function.index",  "$newD/FRI.db"]);
204    my $load2 = start(["FFB2_load_oligo_index", "$newD/setI", "$newD/setI.db"]);
205    my $build = start(["build_prok_nonff_fasta", "$newD"],  ">", "$newD/extra_prok_seqs.fasta");
206    
207    #
208    # create merge files and use the -merge to create_binary_kmers
209    #
210    
211    my %mergefile;
212    foreach my $i (7..12)
213    {
214        my $mf = "$newD/KmerBuild/mergefile.$i";
215        $mergefile{$i} = $mf;
216        open(MF, ">", $mf) or die "Cannot write $mf: $!";
217    
218        for my $f (sort <$newD/Kmers/kmers.2.*/$i/good.oligos.gz>)
219        {
220            print MF "$f\t1\n";
221        }
222        for my $f (sort <$newD/Kmers/kmers.3.*/$i/good.oligos.gz>)
223        {
224            print MF "$f\t3\n";
225        }
226        print MF "$newD/PhyloSigs/$i/good.oligos.gz\t2\n";
227        close(MF);
228  }  }
 &FIG::run("FFB2_load_oligo_index $newD/function.index $newD/FRI.db");  
 &FIG::run("FFB2_load_oligo_index $newD/setI $newD/setI.db");  
229    
230  foreach my $i (7..12)  foreach my $i (7..12)
231  {  {
232      &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary");      my $mf = $mergefile{$i};
233        &FIG::run("FFB2_create_binary_kmers -l $i -s 4,2,4 -merge $mf - $newD/Merged/$i/table.binary");
234    }
235    
236    system("FFB2_run_tests $newD");
237    system("FFB2_compare_tests $oldD $newD");
238    $build->finish();
239    &FIG::run("formatdb -p T -i $newD/extra_prok_seqs.fasta");
240    &FIG::run("compute_fasta_lengths", "$newD/extra_prok_seqs.fasta", "$newD/extra_prok_seqs.fasta.lengths");
241    
242    $load1->finish();
243    $load2->finish();
244    
245    &FIG::run("FFB2_make_FF_index $newD $newD/fam.func.index $newD/FamFuncBlastD");
246    &FIG::run("FFB2_build_ff_indexes $newD");
247    
248    &FIG::run("get_coupling_values $newD/families.2c > $newD/coupling.values");
249    
250    sub figfam_processing
251    {
252        my $override_arg = "";
253        if (defined($override_file))
254        {
255            $override_arg = "--override $override_file";
256  }  }
257        &FIG::run("FFB2_update_FIGfams -ssfams $ssfam $override_arg $newD/families.2c $newD/family.functions $oldD");
258        &FIG::run("FFB2_get_oligos2 $newD/families.2c $newD/family.functions $newD/function.index $newD/KmerBuild $mchost $mcport");
259    
260        &FIG::run("FFB2_xy $newD/KmerBuild $newD/Kmers 7-12");
261    
262    #    &FIG::run("FFB2_update_FIGfams -f $ssfam $newD/families.2c $newD/family.functions $oldD");
263    #    &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions $newD/function.index $mchost $mcport |" .
264    #             "FFB2_usable_motifs $newD/Kmers");
265    
266    }
267    
268    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.21

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3