[Bio] / FigKernelScripts / assign_to_dna_using_kmers.pl Repository:
ViewVC logotype

View of /FigKernelScripts/assign_to_dna_using_kmers.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Fri Dec 11 20:48:41 2009 UTC (10 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011
Changes since 1.4: +20 -14 lines
more anno server fixes, add some missing files

########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
use strict;
use Carp;
use Data::Dumper;
use DB_File;
use Getopt::Long;

use Kmers;

my $usage = "usage: assign_to_dna_using_kmers [--kmer K] [--minHits N] [--minSize N] [--maxGap N] KmerData< fasta > assignments 2> non-matched\n";

my $kmer_size = 8;
my $min_hits = 2;
my $min_sz   = 60;
my $max_gap = 600;
my $blast;

if (!GetOptions("kmer=i" => \$kmer_size,
		"minHits=i" => \$min_hits,
		"minSize=i" => \$min_sz,
		"maxGap=i" => \$max_gap,
		))
		
{
    die $usage;
}

@ARGV == 1 or die $usage;

my $kmerD = shift;
my $kmers = Kmers->new($kmerD);

my $line = <STDIN>;
while ($line && ($line =~ /^>(\S+)/))
{
    my $id = $1;
    my @seq = ();
    while (defined($line = <STDIN>) && ($line !~ /^>/))
    {
	$line =~ s/\s//g;
	push(@seq,$line);
    }
    my $seq = join("",@seq);
    my $hits = $kmers->assign_functions_to_PEGs_in_DNA($kmer_size, $seq,$min_hits,$max_gap,$blast);
    foreach my $hit (@$hits)
    {
	my($nhits,$b,$e,$func,$set,$blastout) = @$hit;
	next if ((abs($b-$e) + 1) < $min_sz);

	print join("\t",($id,join("_",($id,$b,$e)),$func)),"\n";
	if ($blastout && (@$blastout > 0))
	{
	    foreach $_ (@$blastout)
	    {
		print join("\t",@$_),"\n";
	    }
	    print "=======\n";
	}
    }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3