[Bio] / FigKernelScripts / FFB2_build_updated_FF.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/FFB2_build_updated_FF.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Mon Nov 16 21:56:22 2009 UTC revision 1.17, Fri Oct 29 17:10:34 2010 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2    
3    use strict;
4  use FIG;  use FIG;
5    use IPC::Run qw(start finish);
6    use Cache::Memcached::Fast;
7    use Getopt::Long;
8    
9  my $usage = "usage: FFB2_build_updated_FF OldReleaseDir NewReleaseDir";  my $usage = "usage: FFB2_build_updated_FF [--skipotu] OldReleaseDir NewReleaseDir memcache-host memcache-port [ssfam]";
10    
11  my($oldD,$newD);  my $skip_otu;
12    
13    my $rc = GetOptions("skipotu" => \$skip_otu);
14    
15    $rc or die $usage;
16    
17    my($oldD,$newD,$mchost, $mcport);
18    
19  (  (
20   ($oldD = shift @ARGV) && (-d $oldD) &&   ($oldD = shift @ARGV) && (-d $oldD) &&
21   ($newD = shift @ARGV)   ($newD = shift @ARGV) &&
22     ($mchost = shift @ARGV) &&
23     ($mcport = shift @ARGV)
24  )  )
25      || die $usage;      || die $usage;
26    
27    my $in_ssfam = shift;
28    
29    $ENV{TMPDIR} = $FIG_Config::temp;
30    
31    my $sort_args = "-T $FIG_Config::temp -S 4G";
32    
33  mkdir($newD,0777) || die "You have an existing $newD; remove it and try again";  mkdir($newD,0777) || die "You have an existing $newD; remove it and try again";
 &FIG::run("FFB2_update_FIGfams $newD/families.2c $newD/family.functions $oldD");  
 &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions 2> $newD/function.index |" .  
           "sort -T . | FFB2_usable_motifs $newD/Kmers");  
34    
35  &FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");  my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } );
36  &FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort -T . | FFB2_usable_motifs $newD/PhyloSigs");  $mc or die "Could not connect to memcached at $mchost:$mcport\n";
37    
38    $mc->flush_all();
39    
40    #
41    # Do some work up front to ensure we can load the memcache with all the data we need.
42    #
43    
44    my $ssfam;
45    if (-f $in_ssfam)
46    {
47        $ssfam = $in_ssfam;
48    }
49    else
50    {
51        $ssfam = "$FIG_Config::temp/subsys.based.families.$$";
52        &FIG::run("FFB2_make_subsys_based_families > $ssfam");
53    }
54    
55    print "Loading memcache\n";
56    &FIG::run("FFB2_load_memcache $ssfam $oldD/families.2c $mchost $mcport");
57    print "Done\n";
58    
59    my $parallel = !$skip_otu;
60    my $child_pid;
61    if ($parallel)
62    {
63        #
64        # Run the FF update in a child process while we manage getting the phylo oligos here.
65        #
66    
67        $child_pid = fork;
68    
69        if ($child_pid == 0)
70        {
71            figfam_processing();
72            exit;
73        }
74    }
75    else
76    {
77        figfam_processing();
78    }
79    
80    if ($skip_otu)
81    {
82        #
83        # Create empty kmer files in PhyloSigs.
84        #
85    
86        my $dir = "$newD/PhyloSigs";
87        if (! -d $dir)
88        {
89            mkdir $dir or die "Could not mkdir $dir: $!";
90        }
91        for my $k (7..12)
92        {
93            my $dir = "$newD/PhyloSigs/$k";
94            if (! -d $dir)
95            {
96                mkdir $dir or die "Could not mkdir $dir: $!";
97            }
98            open(TT, ">", "$dir/good.oligos");
99            close(TT);
100        }
101    }
102    else
103    {
104        #
105        # Run the new parallel get-prots
106        #
107        my $n_get_prots = 4;
108    
109        my @handles;
110        for my $i (0..$n_get_prots - 1)
111        {
112            my $outfile = "$FIG_Config::temp/ffb2_xx.$$.$i";
113    
114            my $h = start(['FFB2_xx', 12,  "$newD/setI", $n_get_prots, $i, $mchost, $mcport],
115                          "<", "/dev/null", ">", $outfile);
116            push(@handles, [$i, $h, $outfile]);
117        }
118        for my $e (@handles)
119        {
120            my($i, $h, $file) = @$e;
121            print "Wait for finish $i $file\n";
122            $h->finish();
123        }
124    
125        my @files = map { $_->[2] } @handles;
126        print STDERR "Merging phylo @files\n";
127        &FIG::run("sort -S 2G -m @files | FFB2_usable_motifs $newD/PhyloSigs 7-12");
128    
129        #&FIG::run("FFB2_get_prots $newD/ProtsByGenomeSet $newD/setI");
130        #&FIG::run("FFB2_get_prot_gs_oligos $newD/ProtsByGenomeSet 12 | sort $sort_args | FFB2_usable_motifs $newD/PhyloSigs");
131    }
132    
133    if ($parallel)
134    {
135        my $rc = waitpid($child_pid, 0);
136        if ($? != 0)
137        {
138            die "Error running parallel child: $?\n";
139        }
140    }
141    
142    
143  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";  mkdir("$newD/Merged",0777) || die "could not make $newD/Merged: $!";
144    
145    my @merge_handles;
146    
147  foreach my $i (7..12)  foreach my $i (7..12)
148  {  {
149      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";      mkdir("$newD/Merged/$i",0777) || die "could not make $newD/Merged/$i: $!";
150      &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");      my $h = start(["FFB2_merge_oligos", "$newD/Kmers/$i/good.oligos", "$newD/PhyloSigs/$i/good.oligos"], "|",
151                      ["FFB2_create_binary_kmers", "-l", $i, "-s", "4,2", "-", "$newD/Merged/$i/table.binary"]);
152    
153        push(@merge_handles, [$h, $i]);
154    
155    #    &FIG::run("FFB2_merge_oligos $newD/Kmers/$i/good.oligos $newD/PhyloSigs/$i/good.oligos > $newD/Merged/$i/merged.oligos");
156  }  }
 &FIG::run("FFB2_load_oligo_index $newD/function.index $newD/FRI.db");  
 &FIG::run("FFB2_load_oligo_index $newD/setI $newD/setI.db");  
157    
158  foreach my $i (7..12)  for my $ent (@merge_handles)
159    {
160        my($h, $k) = @$ent;
161        print "Wait for finish for k=$k\n";
162        my $r = $h->finish();
163        print "done r=$r\n";
164    }
165    
166    my $load1 = start(["FFB2_load_oligo_index", "$newD/function.index",  "$newD/FRI.db"]);
167    my $load2 = start(["FFB2_load_oligo_index", "$newD/setI", "$newD/setI.db"]);
168    my $build = start(["build_prok_nonff_fasta", "$newD"],  ">", "$newD/extra_prok_seqs.fasta");
169    
170    
171    #foreach my $i (7..12)
172    #{
173    #    &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary");
174    #}
175    
176    system("FFB2_run_tests $newD");
177    system("FFB2_compare_tests $oldD $newD");
178    $build->finish();
179    &FIG::run("formatdb -p T -i $newD/extra_prok_seqs.fasta");
180    &FIG::run("compute_fasta_lengths", "$newD/extra_prok_seqs.fasta", "$newD/extra_prok_seqs.fasta.lengths");
181    
182    $load1->finish();
183    $load2->finish();
184    
185    &FIG::run("FFB2_make_FF_index $newD $newD/fam.func.index $newD/FamFuncBlastD");
186    &FIG::run("FFB2_build_ff_indexes $newD");
187    
188    &FIG::run("get_coupling_values $newD/families.2c > $newD/coupling.values");
189    
190    sub figfam_processing
191  {  {
192      &FIG::run("FFB2_create_binary_kmers -s 4,2 $newD/Merged/$i/merged.oligos $newD/Merged/$i/table.binary");      &FIG::run("FFB2_update_FIGfams -f $ssfam $newD/families.2c $newD/family.functions $oldD");
193        &FIG::run("FFB2_get_oligos $newD/families.2c $newD/family.functions $newD/function.index $mchost $mcport |" .
194                  "FFB2_usable_motifs $newD/Kmers");
195  }  }
196    
197    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3