[Bio] / Kmers2 / mk-dna-kmers.pl Repository:
ViewVC logotype

Annotation of /Kmers2/mk-dna-kmers.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : olson 1.2 use File::Basename;
2 :     use lib '../FigKernelPackages';
3 : olson 1.1 use strict;
4 :     use Proc::ParallelLoop;
5 :     use gjoseqlib;
6 :     #
7 :     # Make DNA kmers.
8 :     #
9 :     # Given N files, each of which contains a header line with the tag for the
10 :     # set, and the rest is a set of fasta files to be processed for that set.
11 :     #
12 : olson 1.2 # Writes out N files with the kmers found in each.
13 : olson 1.1 #
14 :    
15 : olson 1.2 my $k = 25;
16 : olson 1.1 my $nprocs = 6;
17 :     my @files = @ARGV;
18 :    
19 :     for my $inp (@files)
20 :     {
21 :     open(I, "<", $inp) or die "Cannot open $inp: $!";
22 : olson 1.2
23 : olson 1.1 my $tag = <I>;
24 :     chomp $tag;
25 :    
26 :     my @work = <I>;
27 :     chomp @work;
28 :    
29 :     @work = map { [$tag, $_] } @work;
30 :    
31 :     pareach \@work, sub {
32 :     my $ent = shift;
33 :     my($tag, $file) = @$ent;
34 :    
35 :     open(my $fh, "<", $file) or die "Cannot open $file; $!";
36 : olson 1.2 my $base = basename($file);
37 :     open(my $out, ">", "out/$base") or die"Cannot open out/$file: $!";
38 : olson 1.1
39 :     while (my($id, $def, $seq) = read_next_fasta_seq($fh))
40 :     {
41 : olson 1.2 my $s = length($seq);
42 :     for (my $i=0; ($i < (length($seq) - $k)); $i++)
43 :     {
44 :     my $kmer = uc substr($seq,$i,$k);
45 :     if ($kmer !~ /[^ACDEFGHIKLMNPQRSTVWY]/)
46 :     {
47 :     print $out $kmer, "\t", $tag, "\n";
48 :     }
49 :     }
50 : olson 1.1 }
51 :    
52 :     close($fh);
53 :     }, { Max_Workers => $nprocs };
54 :    
55 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3