[Bio] / FigKernelScripts / FFB2_build_updated_FF.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/FFB2_build_updated_FF.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Nov 16 21:56:22 2009 UTC revision 1.24, Tue Apr 23 20:31:41 2013 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2    
3    use strict;
4  use FIG;  use FIG;
5    use IPC::Run qw(start finish);
6    use Cache::Memcached::Fast;
7    use Getopt::Long;
8    
9  my $usage = "usage: FFB2_build_updated_FF OldReleaseDir NewReleaseDir";  my $usage = "usage: FFB2_build_updated_FF [--no-reload-memcache] [--override fn-override-file] [--skipotu] OldReleaseDir NewReleaseDir memcache-host memcache-port [ssfam]";
10    
11  my($oldD,$newD);  my $pseed = "/vol/pseed/FIGdisk/FIG/Data";
12    
13    my $skip_otu;
14    my $override_file;
15    my $no_reload_memcache = 0;
16    
17    my $rc = GetOptions("skipotu" => \$skip_otu,
18                        "override=s" => \$override_file,
19                        "no-reload-memcache" => \$no_reload_memcache);
20    
21    $rc or die $usage;
22    
23    my($oldD,$newD,$mchost, $mcport);
24    
25  (  (
26   ($oldD = shift @ARGV) && (-d $oldD) &&   ($oldD = shift @ARGV) && (-d $oldD) &&
27   ($newD = shift @ARGV)   ($newD = shift @ARGV) &&
28     ($mchost = shift @ARGV) &&
29     ($mcport = shift @ARGV)
30  )  )
31      || die $usage;      || die $usage;
32    
33  mkdir($newD,0777) || die "You have an existing $newD; remove it and try again";  my $in_ssfam = shift;
34  &FIG::run("FFB2_update_FIGfams $newD/families.2c $newD/family.functions $oldD");  
35  &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions 2> $newD/function.index |" .  $ENV{TMPDIR} = $FIG_Config::temp;
36            "sort -T . | FFB2_usable_motifs $newD/Kmers");  
37    my $sort_args = "-T $FIG_Config::temp -S 4G";
38    
39    if (-d $newD)
40    {
41        if (-f "$newD/families.2c")
42        {
43            die "Not overwriting existing families in $newD\n";
44        }
45    }
46    else
47    {
48        mkdir($newD,0777) || die "Error creating $newD: $!";
49    }
50    
51    -d "$newD/Kmers" || mkdir("$newD/Kmers",0777) || die "could not make $newD/Kmers: $!";
52    
53    my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } );
54    $mc or die "Could not connect to memcached at $mchost:$mcport\n";
55    
56    #
57    # Do some work up front to ensure we can load the memcache with all the data we need.
58    #
59    
60    my $ssfam;
61    if (-f $in_ssfam)
62    {
63        $ssfam = $in_ssfam;
64    }
65    else
66    {
67        $ssfam = "$FIG_Config::temp/subsys.based.families.$$";
68        &FIG::run("FFB2_make_subsys_based_families > $ssfam");
69    }
70    
71    if (!$no_reload_memcache)
72    {
73        $mc->flush_all();
74    
75        print "Loading memcache\n";
76        &FIG::run("FFB2_load_memcache --seed $pseed $ssfam $oldD/families.2c $mchost $mcport");
77        print "Done\n";
78    }
79    
80    my $parallel = !$skip_otu;
81    my $child_pid;
82    if ($parallel)
83    {
84        #
85        # Run the FF update in a child process while we manage getting the phylo oligos here.
86        #
87    
88        $child_pid = fork;
89    
90        if ($child_pid == 0)
91        {
92            figfam_processing();
93            exit;
94        }
95    }
96    else
97    {
98        figfam_processing();
99    }
100    
101    if ($skip_otu)
102    {
103        #
104        # Create empty kmer files in PhyloSigs.
105        #
106    
107        my $dir = "$newD/PhyloSigs";
108        if (! -d $dir)
109        {
110            mkdir $dir or die "Could not mkdir $dir: $!";
111        }
112        for my $k (7..12)
113        {
114            my $dir = "$newD/PhyloSigs/$k";
115            if (! -d $dir)
116            {
117                mkdir $dir or die "Could not mkdir $dir: $!";
118            }
119            open(TT, ">", "$dir/good.oligos");
120            close(TT);
121            system("gzip", "$dir/good.oligos");
122        }
123        open(X, ">", "$newD/setI");
124        close(X);
125    }
126    else
127    {
128        #
129        # Run the new parallel get-prots
130        #
131        my $n_get_prots = 4;
132    
133        my @handles;
134        for my $i (0..$n_get_prots - 1)
135        {
136            my $outfile = "$FIG_Config::temp/ffb2_xx.$$.$i";
137    
138            my $h = start(['FFB2_xx', 12,  "$newD/setI", $n_get_prots, $i, $mchost, $mcport],
139                          "<", "/dev/null", ">", $outfile);
140            push(@handles, [$i, $h, $outfile]);
141        }
142        for my $e (@handles)
143        {
144            my($i, $h, $file) = @$e;
145            print "Wait for finish $i $file\n";
146            $h->finish();
147        }
148    
149        my @files = map { $_->[2] } @handles;
150        print STDERR "Merging phylo @files\n";
151    
152        #
153        # The Debian sort on the mac pro's does bad things with
154        # large merges. Use the latest one there, built in a local dir.
155        my @sort;
156        if (-x "/scratch/olson/coreutils/bin/sort")
157        {
158            my $n = @files;
159            $n++;
160            @sort = ("/scratch/olson/coreutils/bin/sort", "--batch-size=$n", "-m", @files);
161        }
162        else
163        {
164            @sort = ("sort", "-S", "2G", "-m", @files);
165        }
166    
167    
168        &FIG::run("@sort | FFB2_usable_motifs $newD/PhyloSigs 7-12");
169    
170        #&FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");
171        #&FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort $sort_args | FFB2_usable_motifs $newD/PhyloSigs");
172    }
173    
174    if ($parallel)
175    {
176        my $rc = waitpid($child_pid, 0);
177        if ($? != 0)
178        {
179            die "Error running parallel child: $?\n";
180        }
181    }
182    
183    die "We're at the merge\n";
184    
 &FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");  
 &FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort -T . | FFB2_usable_motifs $newD/PhyloSigs");  
185  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";
186    
187    my @merge_handles;
188    
189  foreach my $i (7..12)  foreach my $i (7..12)
190  {  {
191      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";
192      &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");      my $h = start(["FFB2_merge_oligos", "$newD/Kmers/$i/good.oligos", "$newD/PhyloSigs/$i/good.oligos"], "|",
193                      ["FFB2_create_binary_kmers", "-l", $i, "-s", "4,2", "-", "$newD/Merged/$i/table.binary"]);
194    
195        push(@merge_handles, [$h, $i]);
196    
197    #    &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");
198    }
199    
200    for my $ent (@merge_handles)
201    {
202        my($h, $k) = @$ent;
203        print "Wait for finish for k=$k\n";
204        my $r = $h->finish();
205        print "done r=$r\n";
206    }
207    
208    my $load1 = start(["FFB2_load_oligo_index", "$newD/function.index",  "$newD/FRI.db"]);
209    my $load2 = start(["FFB2_load_oligo_index", "$newD/setI", "$newD/setI.db"]);
210    my $build = start(["build_prok_nonff_fasta", "$newD"],  ">", "$newD/extra_prok_seqs.fasta");
211    
212    #
213    # create merge files and use the -merge to create_binary_kmers
214    #
215    
216    my %mergefile;
217    foreach my $i (7..12)
218    {
219        my $mf = "$newD/KmerBuild/mergefile.$i";
220        $mergefile{$i} = $mf;
221        open(MF, ">", $mf) or die "Cannot write $mf: $!";
222    
223        for my $f (sort <$newD/Kmers/kmers.2.*/$i/good.oligos.gz>)
224        {
225            print MF "$f\t1\n";
226        }
227        for my $f (sort <$newD/Kmers/kmers.3.*/$i/good.oligos.gz>)
228        {
229            print MF "$f\t3\n";
230        }
231        print MF "$newD/PhyloSigs/$i/good.oligos.gz\t2\n";
232        close(MF);
233  }  }
 &FIG::run("FFB2_load_oligo_index $newD/function.index $newD/FRI.db");  
 &FIG::run("FFB2_load_oligo_index $newD/setI $newD/setI.db");  
234    
235  foreach my $i (7..12)  foreach my $i (7..12)
236  {  {
237      &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary");      my $mf = $mergefile{$i};
238        &FIG::run("FFB2_create_binary_kmers -l $i -s 4,2,4 -merge $mf - $newD/Merged/$i/table.binary");
239    }
240    
241    system("FFB2_run_tests $newD");
242    system("FFB2_compare_tests $oldD $newD");
243    $build->finish();
244    &FIG::run("formatdb -p T -i $newD/extra_prok_seqs.fasta");
245    &FIG::run("compute_fasta_lengths", "$newD/extra_prok_seqs.fasta", "$newD/extra_prok_seqs.fasta.lengths");
246    
247    $load1->finish();
248    $load2->finish();
249    
250    &FIG::run("FFB2_make_FF_index $newD $newD/fam.func.index $newD/FamFuncBlastD");
251    &FIG::run("FFB2_build_ff_indexes $newD");
252    
253    &FIG::run("get_coupling_values $newD/families.2c > $newD/coupling.values");
254    
255    sub figfam_processing
256    {
257        my $override_arg = "";
258        if (defined($override_file))
259        {
260            $override_arg = "--override $override_file";
261  }  }
262        &FIG::run("FFB2_update_FIGfams -memcache $mchost:$mcport -ssfams $ssfam $override_arg $newD/families.2c $newD/family.functions $oldD > $newD.update.$$.out 2> $newD.update.$$.err");
263        &FIG::run("FFB2_get_oligos2 $newD/families.2c $newD/family.functions $newD/function.index $newD/KmerBuild $mchost $mcport");
264    
265        &FIG::run("FFB2_xy $newD/KmerBuild $newD/Kmers 7-12");
266    
267    #    &FIG::run("FFB2_update_FIGfams -f $ssfam $newD/families.2c $newD/family.functions $oldD");
268    #    &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions $newD/function.index $mchost $mcport |" .
269    #             "FFB2_usable_motifs $newD/Kmers");
270    
271    }
272    
273    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.24

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3