Parent Directory
|
Revision Log
Kmer / partition creation code.
use File::Basename; use lib '../FigKernelPackages'; use strict; use Proc::ParallelLoop; use gjoseqlib; # # Make DNA kmers. # # Given N files, each of which contains a header line with the tag for the # set, and the rest is a set of fasta files to be processed for that set. # # Writes out N files with the kmers found in each. # my $k = 25; my $nprocs = 6; my @files = @ARGV; for my $inp (@files) { open(I, "<", $inp) or die "Cannot open $inp: $!"; my $tag = <I>; chomp $tag; my @work = <I>; chomp @work; @work = map { [$tag, $_] } @work; pareach \@work, sub { my $ent = shift; my($tag, $file) = @$ent; open(my $fh, "<", $file) or die "Cannot open $file; $!"; my $base = basename($file); open(my $out, ">", "out/$base") or die"Cannot open out/$file: $!"; while (my($id, $def, $seq) = read_next_fasta_seq($fh)) { my $s = length($seq); for (my $i=0; ($i < (length($seq) - $k)); $i++) { my $kmer = uc substr($seq,$i,$k); if ($kmer !~ /[^ACDEFGHIKLMNPQRSTVWY]/) { print $out $kmer, "\t", $tag, "\n"; } } } close($fh); }, { Max_Workers => $nprocs }; }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |