[Bio] / FigKernelScripts / FFB2_build_updated_FF.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/FFB2_build_updated_FF.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Nov 16 21:56:22 2009 UTC revision 1.19, Mon Nov 22 17:58:30 2010 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2    
3    use strict;
4  use FIG;  use FIG;
5    use IPC::Run qw(start finish);
6    use Cache::Memcached::Fast;
7    use Getopt::Long;
8    
9  my $usage = "usage: FFB2_build_updated_FF OldReleaseDir NewReleaseDir";  my $usage = "usage: FFB2_build_updated_FF [--override fn-override-file] [--skipotu] OldReleaseDir NewReleaseDir memcache-host memcache-port [ssfam]";
10    
11  my($oldD,$newD);  my $skip_otu;
12    my $override_file;
13    
14    my $rc = GetOptions("skipotu" => \$skip_otu,
15                        "override=s" => \$override_file);
16    
17    $rc or die $usage;
18    
19    my($oldD,$newD,$mchost, $mcport);
20    
21  (  (
22   ($oldD = shift @ARGV) && (-d $oldD) &&   ($oldD = shift @ARGV) && (-d $oldD) &&
23   ($newD = shift @ARGV)   ($newD = shift @ARGV) &&
24     ($mchost = shift @ARGV) &&
25     ($mcport = shift @ARGV)
26  )  )
27      || die $usage;      || die $usage;
28    
29  mkdir($newD,0777) || die "You have an existing $newD; remove it and try again";  my $in_ssfam = shift;
30  &FIG::run("FFB2_update_FIGfams $newD/families.2c $newD/family.functions $oldD");  
31  &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions 2> $newD/function.index |" .  $ENV{TMPDIR} = $FIG_Config::temp;
32            "sort -T . | FFB2_usable_motifs $newD/Kmers");  
33    my $sort_args = "-T $FIG_Config::temp -S 4G";
34    
35    if (-d $newD)
36    {
37        if (-f "$newD/families.2c")
38        {
39            die "Not overwriting existing families in $newD\n";
40        }
41    }
42    else
43    {
44        mkdir($newD,0777) || die "Error creating $newD: $!";
45    }
46    
47    my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } );
48    $mc or die "Could not connect to memcached at $mchost:$mcport\n";
49    
50    $mc->flush_all();
51    
52    #
53    # Do some work up front to ensure we can load the memcache with all the data we need.
54    #
55    
56    my $ssfam;
57    if (-f $in_ssfam)
58    {
59        $ssfam = $in_ssfam;
60    }
61    else
62    {
63        $ssfam = "$FIG_Config::temp/subsys.based.families.$$";
64        &FIG::run("FFB2_make_subsys_based_families > $ssfam");
65    }
66    
67    print "Loading memcache\n";
68    &FIG::run("FFB2_load_memcache $ssfam $oldD/families.2c $mchost $mcport");
69    print "Done\n";
70    
71    my $parallel = !$skip_otu;
72    my $child_pid;
73    if ($parallel)
74    {
75        #
76        # Run the FF update in a child process while we manage getting the phylo oligos here.
77        #
78    
79        $child_pid = fork;
80    
81        if ($child_pid == 0)
82        {
83            figfam_processing();
84            exit;
85        }
86    }
87    else
88    {
89        figfam_processing();
90    }
91    
92    if ($skip_otu)
93    {
94        #
95        # Create empty kmer files in PhyloSigs.
96        #
97    
98        my $dir = "$newD/PhyloSigs";
99        if (! -d $dir)
100        {
101            mkdir $dir or die "Could not mkdir $dir: $!";
102        }
103        for my $k (7..12)
104        {
105            my $dir = "$newD/PhyloSigs/$k";
106            if (! -d $dir)
107            {
108                mkdir $dir or die "Could not mkdir $dir: $!";
109            }
110            open(TT, ">", "$dir/good.oligos");
111            close(TT);
112        }
113    }
114    else
115    {
116        #
117        # Run the new parallel get-prots
118        #
119        my $n_get_prots = 4;
120    
121        my @handles;
122        for my $i (0..$n_get_prots - 1)
123        {
124            my $outfile = "$FIG_Config::temp/ffb2_xx.$$.$i";
125    
126            my $h = start(['FFB2_xx', 12,  "$newD/setI", $n_get_prots, $i, $mchost, $mcport],
127                          "<", "/dev/null", ">", $outfile);
128            push(@handles, [$i, $h, $outfile]);
129        }
130        for my $e (@handles)
131        {
132            my($i, $h, $file) = @$e;
133            print "Wait for finish $i $file\n";
134            $h->finish();
135        }
136    
137        my @files = map { $_->[2] } @handles;
138        print STDERR "Merging phylo @files\n";
139    
140        #
141        # The Debian sort on the mac pro's does bad things with
142        # large merges. Use the latest one there, built in a local dir.
143        my @sort;
144        if (-x "/scratch/olson/coreutils/bin/sort")
145        {
146            my $n = @files;
147            $n++;
148            @sort = ("/scratch/olson/coreutils/bin/sort", "--batch-size=$n", "-m", @files);
149        }
150        else
151        {
152            @sort = ("sort", "-S", "2G", "-m", @files);
153        }
154    
155    
156        &FIG::run("@sort | FFB2_usable_motifs $newD/PhyloSigs 7-12");
157    
158        #&FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");
159        #&FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort $sort_args | FFB2_usable_motifs $newD/PhyloSigs");
160    }
161    
162    if ($parallel)
163    {
164        my $rc = waitpid($child_pid, 0);
165        if ($? != 0)
166        {
167            die "Error running parallel child: $?\n";
168        }
169    }
170    
171    die "We're at the merge\n";
172    
 &FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");  
 &FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort -T . | FFB2_usable_motifs $newD/PhyloSigs");  
173  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";
174    
175    my @merge_handles;
176    
177  foreach my $i (7..12)  foreach my $i (7..12)
178  {  {
179      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";
180      &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");      my $h = start(["FFB2_merge_oligos", "$newD/Kmers/$i/good.oligos", "$newD/PhyloSigs/$i/good.oligos"], "|",
181                      ["FFB2_create_binary_kmers", "-l", $i, "-s", "4,2", "-", "$newD/Merged/$i/table.binary"]);
182    
183        push(@merge_handles, [$h, $i]);
184    
185    #    &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");
186  }  }
 &FIG::run("FFB2_load_oligo_index $newD/function.index $newD/FRI.db");  
 &FIG::run("FFB2_load_oligo_index $newD/setI $newD/setI.db");  
187    
188  foreach my $i (7..12)  for my $ent (@merge_handles)
189    {
190        my($h, $k) = @$ent;
191        print "Wait for finish for k=$k\n";
192        my $r = $h->finish();
193        print "done r=$r\n";
194    }
195    
196    my $load1 = start(["FFB2_load_oligo_index", "$newD/function.index",  "$newD/FRI.db"]);
197    my $load2 = start(["FFB2_load_oligo_index", "$newD/setI", "$newD/setI.db"]);
198    my $build = start(["build_prok_nonff_fasta", "$newD"],  ">", "$newD/extra_prok_seqs.fasta");
199    
200    
201    #foreach my $i (7..12)
202    #{
203    #    &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary");
204    #}
205    
206    system("FFB2_run_tests $newD");
207    system("FFB2_compare_tests $oldD $newD");
208    $build->finish();
209    &FIG::run("formatdb -p T -i $newD/extra_prok_seqs.fasta");
210    &FIG::run("compute_fasta_lengths", "$newD/extra_prok_seqs.fasta", "$newD/extra_prok_seqs.fasta.lengths");
211    
212    $load1->finish();
213    $load2->finish();
214    
215    &FIG::run("FFB2_make_FF_index $newD $newD/fam.func.index $newD/FamFuncBlastD");
216    &FIG::run("FFB2_build_ff_indexes $newD");
217    
218    &FIG::run("get_coupling_values $newD/families.2c > $newD/coupling.values");
219    
220    sub figfam_processing
221    {
222        my $override_arg = "";
223        if (defined($override_file))
224  {  {
225      &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary");          $override_arg = "--override $override_file";
226  }  }
227        &FIG::run("FFB2_update_FIGfams -ssfams $ssfam $override_arg $newD/families.2c $newD/family.functions $oldD");
228        &FIG::run("FFB2_get_oligos2 $newD/families.2c $newD/family.functions $newD/function.index $newD/KmerBuild $mchost $mcport");
229    
230    #    &FIG::run("FFB2_update_FIGfams -f $ssfam $newD/families.2c $newD/family.functions $oldD");
231    #    &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions $newD/function.index $mchost $mcport |" .
232    #             "FFB2_usable_motifs $newD/Kmers");
233    
234    }
235    
236    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3