[Bio] / FigKernelScripts / assign_using_kmers.pl Repository:
ViewVC logotype

View of /FigKernelScripts/assign_using_kmers.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Tue Jan 19 00:05:25 2010 UTC (10 years, 1 month ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.7: +25 -3 lines
Add support for explicitly specifying FRI.db, setI.db, table.binary

########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
use strict;
use Carp;
use Data::Dumper;
use DB_File;
use Kmers;
use Getopt::Long;

=head1 assign_using_kmers Script

=head2 Introduction

    assign_using_kmers [options] KmerData

Assign Using Kmers

This script takes a FASTA file of proteins from the standard input and writes
the function of each to the standard output. A local Kmers data set is used to determine the
function when possible. When not possible, a message will be written to the
standard error output.

=head2 Command-Line Options

=over 4

=item --kmer N

=item --all

=item --scoreThreshold N

=item --hitThreshold N

=item --seqHitThreshold N

=item --normalizeScores

=item --detailed

=cut

my $usage = "usage: assign_using_kmers [opts] KmerData< fasta > assignments 2> non-matched\n";


my %kmer_opts = (-kmer => 8 );
sub setopt {
    my($name, $val) = @_;
    #print "$name => $val\n";
    $kmer_opts{"-$name"} = $val;
}

my @opts = qw(kmer all scoreThreshold hitThreshold seqHitThreshold normalizeScores);

if (!GetOptions("kmer=i" => \&setopt,
		"all" => \&setopt,
		"scoreThreshold=f" => \&setopt,
		"hitThreshold=f" => \&setopt,
		"seqHitThreshold=f" => \&setopt,
		"normalizeScores" => \&setopt))
		
{
    die $usage;
}

my $kmers;
if (@ARGV == 1)
{
    #
    # Traditional usage.
    #
    my $kmerD = shift;
    $kmers = Kmers->new($kmerD);
}
elsif (@ARGV == 3)
{
    #
    # Experiment support - specify setI.db, FRI.db, table.binary
    #
    # Table will need to match the -kmer selected. That's up to the user,
    # if it doesn't the lookup later will fail.
    #
    my ($setI, $frI, $table) = @ARGV;
    $kmers = Kmers->new(-table => $table, -setIdb => $setI, -frIdb => $frI);
}
else
{
    die $usage;
}

$kmers or die "Could not create kmers object";

my $line = <STDIN>;
my @seqs;
while ($line && ($line =~ /^>(\S+)/))
{
    my $id = $1;
    my @seq = ();
    while (defined($line = <STDIN>) && ($line !~ /^>/))
    {
	$line =~ s/\s//g;
	push(@seq,$line);
    }
    my $seq = join("",@seq);

    my @ans = $kmers->assign_functions_to_prot_set({ %kmer_opts, -seqs => [[$id, undef, $seq]] });

    for my $ans (@ans)
    {
	my($id,$func,$set,$score, $non_overlap_hits, $overlap_hits, $details) = @$ans;
	print join("\t", $id, $func, $score, $non_overlap_hits, $overlap_hits), "\n";
    }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3