[Bio] / FigKernelScripts / FFB3_build_ff_oligos.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/FFB3_build_ff_oligos.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : olson 1.1
2 :    
3 :     #
4 :     # Coordination script that invokes FFB3_get_oligos to create oligos
5 :     # and usable_motifs to pull the ones that are unique.
6 :     #
7 :     # Creates the figfam index file.
8 :     # Creates large intermediate data in work_dir.
9 :     # Creates usable motifs for each kmer size kmin-kmax in outputdir.
10 :     #
11 :    
12 :     use strict;
13 :     use FIG_Config;
14 :     use FIG;
15 :     use Data::Dumper;
16 :     use IPC::Run 'run';
17 :     use Proc::ParallelLoop;
18 :    
19 :     my $usage = "FFB3_build_ff_oligos.pl parallel ffdir function-index kmin kmax work_dir outputdir";
20 :    
21 :     @ARGV == 7 or die "Usage: $usage\n";
22 :    
23 :     my $parallel = shift;
24 :     my $ff_dir = shift;
25 :     my $func_idx = shift;
26 :     my $kmin = shift;
27 :     my $kmax = shift;
28 :     my $work_dir = shift;
29 :     my $output_dir = shift;
30 :    
31 :     -d $ff_dir or die "ffdir $ff_dir does not exist\n";
32 :    
33 :     my $trans_btree = "$ff_dir/translation.btree";
34 :    
35 :     -f $trans_btree or die "Required file $trans_btree does not exist\n";
36 :    
37 :     -d $work_dir or die "Work directory $work_dir does not exist\n";
38 :     -d $output_dir or die "Output directory $output_dir does not exist\n";
39 :    
40 :     #
41 :     # Offset is one since we use ordering constraints for family kmers.
42 :     #
43 :    
44 :     my @cmd = ("$FIG_Config::bin/FFB3_get_oligos", $ff_dir, $func_idx, $work_dir);
45 :     print "Start @cmd\n";
46 :     my $rc = system(@cmd);
47 :    
48 :     if ($rc != 0)
49 :     {
50 :     die "Error $rc running @cmd";
51 :     }
52 :    
53 :     #
54 :     # We feed the oligos with the output of a merge of the data created by get_gs_oligos.
55 :     #
56 :     # Each set induces a work unit to be run in parallel.
57 :     #
58 :    
59 :     my @work;
60 :    
61 :     opendir(D, "$work_dir") or die "Cannot open $work_dir: $!";
62 :     my @sets = sort map { s/^kmers\.//; $_ } grep { /^kmers\./ && -d "$work_dir/$_" } readdir(D);
63 :     closedir(D);
64 :     for my $set (@sets)
65 :     {
66 :     my $set_dir = "$work_dir/kmers.$set";
67 :     opendir(D, $set_dir) or die "Cannot open $set_dir: $!";
68 :     my @files = map { "$set_dir/$_" } sort grep { /^\d+$/ && -f "$set_dir/$_" } readdir(D);
69 :     for my $col (2, 3)
70 :     {
71 :     push(@work, [$set, $set_dir, $col, \@files]);
72 :     }
73 :     }
74 :    
75 :     print Dumper(\@work);
76 :    
77 :     pareach \@work, sub {
78 :     my($work) = shift;
79 :     my($set, $set_dir, $col, $input_files) = @$work;
80 :    
81 :     my $n = @$input_files + 1;
82 :     my $out = "$output_dir/kmers.$col.$set";
83 :     &FIG::verify_dir($out);
84 :     my $rc = run(["$FIG_Config::kmer_tools/sort", "-m", "--parallel", 1, "--batch-size", $n, @$input_files], '|',
85 :     ["$FIG_Config::kmer_tools/usable_motifs", $out, "$kmin-$kmax", $col, '-']);
86 :     print "Return is $rc\n";
87 :     }, { N_Workers => $parallel };
88 :    
89 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3