[Bio] / Kmers2 / mk-dna-kmers.pl Repository:
ViewVC logotype

View of /Kmers2/mk-dna-kmers.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Feb 13 20:42:16 2013 UTC (6 years, 9 months ago) by olson
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +16 -3 lines
Kmer / partition creation code.

use File::Basename;
use lib '../FigKernelPackages';
use strict;
use Proc::ParallelLoop;
use gjoseqlib;
#
# Make DNA kmers.
#
# Given N files, each of which contains a header line with the tag for the
# set, and the rest is a set of fasta files to be processed for that set.
#
# Writes out N files with the kmers found in each.
#

my $k = 25;
my $nprocs = 6;
my @files = @ARGV;

for my $inp (@files)
{
    open(I, "<", $inp) or die "Cannot open $inp: $!";
    
    my $tag = <I>;
    chomp $tag;

    my @work = <I>;
    chomp @work;

    @work = map { [$tag, $_] } @work;

    pareach \@work, sub {
	my $ent = shift;
	my($tag, $file) = @$ent;

	open(my $fh, "<", $file) or die "Cannot open $file; $!";
	my $base = basename($file);
	open(my $out, ">", "out/$base") or die"Cannot open out/$file: $!";

	while (my($id, $def, $seq) = read_next_fasta_seq($fh))
	{
	    my $s = length($seq);
	    for (my $i=0; ($i < (length($seq) - $k)); $i++)
	    {
		my $kmer = uc substr($seq,$i,$k);
		if ($kmer !~ /[^ACDEFGHIKLMNPQRSTVWY]/)
		{
		    print $out $kmer, "\t", $tag, "\n";
		}
	    }
	}

	close($fh);
    }, { Max_Workers => $nprocs };
	
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3