[Bio] / FigKernelPackages / Kmers.pm Repository:
ViewVC logotype

View of /FigKernelPackages/Kmers.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.27 - (download) (as text) (annotate)
Wed Dec 1 19:27:39 2010 UTC (8 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2010_1206
Changes since 1.26: +1 -1 lines
add id to diagnostic

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

package Kmers;
no warnings 'redefine';

use strict;
use DB_File;
use File::Basename;
use FIG;

use Tracer;

use ProtSims;

use Data::Dumper;
use Carp;
use FFs;

our $KmersC_available;
eval {
    require KmersC;
    $KmersC_available++;
};


sub new {
    my $class = shift;

    my $figfams = {};

    $KmersC_available or die "KmersC module not available in this perl build";
    
    #
    # Support experiments with kmer builds by allowing
    # arguments to specify fri.db and setI.db as well as
    # a single kmer data file.
    #

    my $dir;
    my $FRIDB;
    my $setIDB;

    if ($_[0] =~ /^-/)
    {
	my %args = @_;

	$FRIDB = $args{-frIdb};
	$setIDB = $args{-setIdb};
	my $table = $args{-table};

	if (!defined($FRIDB) || !defined($setIDB) || !defined($table))
	{
	    warn "Kmer experimental interface must define -frIDb, -setIdb, and -table\n";
	    return undef;
	}

	#
	# Using this interface, we expect that the
	# actual kmer sdirectory is the one containing FRIDB.
	#

	my $ffdir = dirname($FRIDB);
	$figfams->{ffs} = FFs->new($ffdir);

	$figfams->{dir} = $ffdir;

	my $kmerc = new KmersC;
	$kmerc->open_data($table);
	my $sz = $kmerc->get_motif_len();
	warn "Experimental Kmers $table of size $sz\n";
	$figfams->{KmerC}->{$sz} = $kmerc;
	$figfams->{default_kmer_size} = $sz;
    }
    else
    {
	$dir = shift;
	-d $dir or return undef;

	$FRIDB = "$dir/FRI.db";
	$setIDB = "$dir/setI.db";
	
	#
	# Open a KmersC for each Kmer size.
	#
	
	for my $binary (<$dir/binary/table.binary.*>)
	{
	    if ($binary =~ /(\d+)$/)
	    {
		my $k = $1;
		my $kc = new KmersC;
		$kc->open_data($binary) or die "cannot load Kmer binary database $binary";
		
		$figfams->{KmerC}->{$k} = $kc;
	    }
	}
	$figfams->{default_kmer_size} = 8;

	$figfams->{ffs} = FFs->new($dir);
	$figfams->{dir} = $dir;
    }
    
    my %fr_hash;
    my $fr_hash_tie   = tie %fr_hash,   'DB_File', $FRIDB,  O_RDONLY, 0666, $DB_HASH;
    $fr_hash_tie    || die "tie failed for function index $FRIDB";

    my %set_hash;
    my $set_hash_tie  = tie %set_hash,   'DB_File', $setIDB,  O_RDONLY, 0666, $DB_HASH;
    $set_hash_tie   || warn "tie failed for function index $setIDB";

    $figfams->{friH}  = \%fr_hash;
    $figfams->{setiH} = \%set_hash;
    $figfams->{fig} = new FIG;


    bless $figfams,$class;
    return $figfams;
}

sub get_available_kmer_sizes
{
    my($self) = @_;
    return sort { $a <=> $b }  keys %{$self->{KmerC}};
}

sub dir
{
    my($self) = @_;
    return $self->{dir};
}

sub DESTROY {
    my ($self) = @_;
    delete $self->{fig};
}

sub match_seq {
    my($self, $motif_sz, $seq) = @_;

    if ($self->{KmerC}->{$motif_sz})
    {
	my $matches = [];
	Confess("No sequence specified.") if ! $seq;
	$self->{KmerC}->{$motif_sz}->find_all_hits(uc $seq, $matches);
	return $matches;
    }
    else
    {
	die "No KmerC found for size $motif_sz";
    }
}

=head3 assign_functions_to_prot_set

    my $result = $kmers->assign_functions_to_prot_set($args)

=over 4

=item args

Reference to a hash of parameters.

=item RETURN

Returns a list of tuples. If -all => 1 was specified, there will be a tuple for
each input sequence, otherwise there will be a tuple only for each input sequence
that had a successful match.

Each tuple is of the form

    [ sequence-id, assigned-function, OTU, score, non-overlapping hit-count, overlapping hit-count, detailed-hits]

where detailed-hits is optional, depending on the value of the -detailed parameter.

=back

=head4 Parameter Hash Fields

=over 4

=item -seqs list

Reference to a list of triples [sequence-id, comment, sequence-data].

=item -kmer N

Specify the kmer size to use for analysis.

=item -all 0|1

If set, return an entry for each input sequence. If the scoring threshold was not
met, the assigned function will be undefined, but the scores and hit counts 
will still be returned..

=item -scoreThreshold N

Require a Kmer score of at least N for a Kmer match to succeed.

=item -hitThreshold N

Require at least N (possibly overlapping) Kmer hits for a Kmer match to succeed.

=item -seqHitThreshold N

Require at least N sequential (non-overlapping) Kmer hits for a Kmer match to succeed.

=item -normalizeScores 0|1

Normalize the scores to the size of the protein.

=item -detailed 0|1

If true, return a detailed accounting of the kmers hit for each protein.

=back


=cut

sub assign_functions_to_prot_set {

    #
    # This is ugly but needed for the transition to the new form of the code.
    # If we were not invoked with the new args hash, reinvoke the previous
    # version of the code which is named assign_functions_to_prot_set_compat.
    # Otherwise fall through and use the new form.
    #
    my($self, @args) = @_;

    my $args;
    if (@args == 1 && ref($args[0]) eq 'HASH')
    {
	$args = $args[0];
    }
    elsif (@args > 0 && $args[0] !~ /^-/)
    {
	return &assign_functions_to_prot_set_compat;
    }
    else
    {
	$args = { @args };
    }

    my $seq_set = $args->{-seqs};
    if (!defined($seq_set))
    {
	die "assign_functions_to_prot_set: No sequences provided via the -seqs argument";
    }

    my $motif_sz = $args->{-kmer};
    if (!ref($self->{KmerC}->{$motif_sz}))
    {
	die "No KmerC defined for kmer size $motif_sz";
    }

    #
    # Define the good-hit function.
    #

    my $min_hits = $args->{-hitThreshold};
    my $min_seqhits = $args->{-seqHitThreshold};
    my $min_score = $args->{-scoreThreshold};

    # print "min_seqhits=$min_seqhits\n";
    
    my $good_hit = sub {
	my($score, $seqhits, $hits) = @_;

	# print "goodhit $score $hits $seqhits == $min_hits $min_seqhits $min_score\n";

	return 0 if defined($min_hits) && $hits < $min_hits;
	return 0 if defined($min_seqhits) && $seqhits < $min_seqhits;
	return 0 if defined($min_score) && $score < $min_score;
	return 1;
    };

    my $fr_hash   = $self->{friH};
    my $set_hash  = $self->{setiH};
    my $fig = $self->{fig};

    my @out;

    #
    # Run the query.
    #

    for my $seqent (@$seq_set)
    {
	my($id, $com, $seq) = @$seqent;
	my $seq_len = length($seq);
	my $matches = $self->match_seq($motif_sz, $seq);
#	print Dumper($id, $matches);
	
	my(%hitsF,%hitsS, %hitsFam);

	#
	# Compute the non-overlapping hits.
	# Do this by walking the list of hits in order of offset, and for each,
	# compute the distance to the start of the last hit. If that distance
	# is K or greater, increment the non-overlapping hit count.
	#
	my $last_hit_start;
	my $last_frI;
	my %non_overlapping_F;

	my @sorted_matches = sort { $a->[2] <=> $b->[2] or $a->[0] <=> $b->[0] } @$matches;
	my @details;

	foreach my $match (@sorted_matches) 
	{
	    my($offset, $oligo, $frI, $setI, $fam) = @$match;

	    if ($args->{-detailed})
	    {
		push(@details, [$offset, $oligo, $fr_hash->{$frI}, $set_hash->{$setI}, $fam]);
	    }

	    #
	    # $offset - offset in target of this hit
	    # $oligo - actual oligo that hit
	    # $frI - functional role index for this kmer hit
	    # $setI - OTU  set index for this kmer hit
	    # $fam - figfam id (if present in the kmer data set)
	    #

	    #
	    # The sort above groups the hits by functional role and orders by
	    # offset within the target.
	    #
	  
	    if ($frI != $last_frI)
	    {
		#
		# We're searching in a new role now.
		#
		
		undef $last_hit_start;
		$last_frI = $frI;
	    }
	    # print "@$match lhs=$last_hit_start\n";

	    #
	    # If this is the first hit, or if we are a kmer width away at least from the
	    # last non-overlapping hit, count another non-overlapping hit.
	    #
	    
	    if (!defined($last_hit_start) || ($offset - $last_hit_start) >= $motif_sz)
	    {
		$non_overlapping_F{$frI}++;
		$last_hit_start = $offset;
	    }

	    #
	    # We count all functional role and OTU hits.
	    #
	    $hitsF{$frI}++; 
	    if ($setI)
	    { 
		$hitsS{$setI}++ ;
	    }
	    if (defined($fam))
	    {
		$hitsFam{$fam}++;
	    }
	}

#	print Dumper(\%non_overlapping_F, \%hitsF, \%hitsFam);

	#
	# Find the functional roles that had the best overlapping and non-overlapping hit counts.
	#
	my $nonoverlapFRI = &best_hit(\%non_overlapping_F, $min_hits);
	my ($FRI, $bh2) = &best_2hits(\%hitsF, $min_seqhits);

	#
	# Compute score and normalize if required. Score is based on the number of overlapping hits.
	#

	my $nonoverlap_hits = $non_overlapping_F{$nonoverlapFRI};
	my $overlap_hits  = $hitsF{$FRI};
	my $overlap_hits2 = $hitsF{$bh2};

	my $score = 0 + ($overlap_hits - $overlap_hits2);
	if ($args->{-normalizeScores})
	{
	    $score = $score / ($seq_len - $motif_sz + 1);
	}

	my $setI  = &best_hit(\%hitsS,$min_hits);
	my $bestFam = &best_hit(\%hitsFam, $min_hits);

	my $fun;
	my $set;
	my $family;
	my $family_score = 0;
	my $family_sims;
	# print "$score $nonoverlap_hits $overlap_hits\n";
	if ($FRI >= 0 && ($nonoverlapFRI == $FRI) && &$good_hit($score, $nonoverlap_hits, $overlap_hits))
	{
	    $fun = $fr_hash->{$nonoverlapFRI};
	    $set = $set_hash->{$setI};

	    if (!defined($fun))
	    {
		warn "No function found for $nonoverlapFRI\n";
	    }

	    if (defined($bestFam) && $bestFam >= 0)
	    {
		my $f = sprintf("FIG%08d", $bestFam);

		#
		# Ensure the function of that family matches the function
		# we're calling. If it does not, do not call the figfam.
		#
		if ($self->{ffs}->family_function($f) eq $fun)
		{
		    $family = $f;
		    $family_score = $hitsFam{$bestFam};
		}
		else
		{
		    warn "$id: called family $bestFam $f but function " . $self->{ffs}->family_function($f) . " ne $fun\n";
		}
	    }
	    elsif (defined($args->{-determineFamily}) && $args->{-determineFamily} && ref($self->{ffs}))
	    {
		my($fam, $sims) = $self->{ffs}->place_seq_and_function_in_family($seq, $fun);
		$family = $fam;
		$family_sims = $sims if $args->{-returnFamilySims};
	    }
	    push(@out, [$id, $fun, $set, $score, $nonoverlap_hits, $overlap_hits,
			($args->{-detailed} ? \@details : undef),
			$family, $family_sims, $family_score]);
	}
	elsif ($args->{-all})
	{
	    push(@out, [$id, $fun, $set, $score, $nonoverlap_hits, $overlap_hits,
			($args->{-detailed} ? \@details : undef),
			$family, $family_sims, $family_score]);
	}
    }
    return @out;
}

sub assign_functions_using_similarity
{
    my($self, $args) = _handle_args(@_);

    my $seqs = $args->{-seqs};
    if (!defined($seqs))
    {
	die "assign_functions_using_similarity: No sequences provided via the -seqs argument";
    }

    my $db = $args->{-simDb};
    if (!defined($db))
    {
	die "assign_functions_using_similarity: No similarity database provided by the -simDb argument";
    }

    if (@$seqs ==  0)
    {
	return ();
    }

    my @blastout = ProtSims::blastP($seqs, $db, 5);

    my $cur;
    my @set;
    my @out;
    for my $ent (@blastout)
    {
	if ($cur && $ent->id1 ne $cur)
	{
	    my $val = $self->process_blast_set(@set);
	    push(@out, $val);
	    @set = ();
	}
	$cur = $ent->id1;
	push(@set, $ent);
    }
    my $val = $self->process_blast_set(@set);
    push(@out, $val);
    return @out;
}

sub process_blast_set
{
    my($self, @blastout) = @_;

    my $id = $blastout[0]->id1;
    my $fig = $self->{fig};

    if (@blastout > 5) { $#blastout = 4 }
	    
    my %hit_pegs = map { $_->id2 => 1 } @blastout;
    my @pegs = keys(%hit_pegs);
    if (@pegs == 0)
    {
	return [$id,'hypothetical protein', undef, 0, 0, 0, undef];
    }
    else
    {
	my %funcs;
	foreach my $peg (@pegs)
	{
	    my $func = $fig->function_of($peg,1);
	    if (! &FIG::hypo($func))
	    {
		$funcs{$func}++;
	    }
	}
	my @pos = sort { $funcs{$b} <=> $funcs{$a} } keys(%funcs);
	my $proposed = (@pos > 0) ? $pos[0] : "hypothetical protein";
	return [$id, $proposed, undef, 0, 0, 0, undef];
    }
}

sub assign_functions_to_prot_set_compat {
    my($self,$seq_set,$blast,$min_hits,$extra_blastdb) = @_;
    $min_hits = 3 unless defined($min_hits);
    
    my %match_set = map { my($id, $com, $seq) = @$_;  $id => [$self->match_seq($self->{default_kmer_size},$seq), $seq] } @$seq_set;
    
    my $fr_hash   = $self->{friH};
    my $set_hash  = $self->{setiH};
    
    my $fig = $self->{fig};
    
    my @missing;
    while (my($id, $ent) = each %match_set)
    {
	my($matches, $seq) = @$ent;
	
	my(%hitsF,%hitsS);
	foreach my $match (@$matches)
	{
	    my($offset, $oligo, $frI, $setI) = @$match;
	    $hitsF{$frI}++;
	    if ($setI)
	    {
		$hitsS{$setI}++ ;
	    }
	}
	
	my $FRI = &best_hit(\%hitsF,$min_hits);
	my $setI  = &best_hit(\%hitsS,$min_hits);
	push(@$ent, $FRI, $setI, \%hitsF);
	
	if (!$fr_hash->{$FRI})
	{
	    push(@missing, [$id, undef, $seq]);
	}
    }
    
    #
    # @missing now has the list of sequences that had no Kmer hits. If we have a
    # blast db, blast 'em.
    
    my @all_blastout;
    if (@missing && -s $extra_blastdb)
    {
	#print Dumper(\@missing);
	@all_blastout = ProtSims::blastP(\@missing, $extra_blastdb, 5);
	#print Dumper(\@all_blastout);
    }
    
    #
    # We now have Kmers output and blast output. Go through the original data and
    # create the output.
    #
    
    my @out;
    
    for my $ent (@$seq_set)
    {
	my $id = $ent->[0];
	my ($matches, $seq, $FRI, $setI, $hitsF)  = @{$match_set{$id}};
	
	my $blast_results = [];
	if ($fr_hash->{$FRI})
	{
	    if ($blast && ($fr_hash->{$FRI} || $set_hash->{$setI}))
	    {
		$blast_results = &blast_data($self,$id,$seq,$fr_hash->{$FRI},$blast,'blastp');
	    }
	    
	    push(@out, [$id, $fr_hash->{$FRI},$set_hash->{$setI}, $blast_results,$hitsF->{$FRI}]);
	}
	else
	{
	    my @blastout = grep { $_->id1 eq $id } @all_blastout;
	    
	    if (@blastout > 5) { $#blastout = 4 }
	    
	    my %hit_pegs = map { $_->id2 => 1 } @blastout;
	    my @pegs = keys(%hit_pegs);
	    if (@pegs == 0)
	    {
		push(@out, [$id,'hypothetical protein','',[],0]);
	    }
	    else
	    {
		my %funcs;
		foreach my $peg (@pegs)
		{
		    my $func = $fig->function_of($peg,1);
		    if (! &FIG::hypo($func))
		    {
			$funcs{$func}++;
		    }
		}
		my @pos = sort { $funcs{$b} <=> $funcs{$a} } keys(%funcs);
		my $proposed = (@pos > 0) ? $pos[0] : "hypothetical protein";
		push(@out, [$id, $proposed,'',[],0]);
	    }
	}
    }
    return @out;
}





sub best_hit {
    my($hits,$min_hits) = @_;
    my @poss = sort { $hits->{$b} <=> $hits->{$a} } keys(%$hits);

    my $val;
    if ((@poss > 0) && ($hits->{$poss[0]} >= $min_hits))
    {
	$val = $poss[0];
    }
    return $val;
}

sub best_2hits {
    my($hits,$min_hits) = @_;
    my @poss = sort { $hits->{$b} <=> $hits->{$a} } keys(%$hits);

    my $val;
    if ((@poss > 0) && ($hits->{$poss[0]} >= $min_hits))
    {
	return @poss[0,1];
    }
    else
    {
	return undef;
    }
}

sub best_hit_in_group
{
    my($group) = @_;

    my %hash;
    for my $tuple (@$group)
    {
    	my($off,$oligo,$frI,$setI) = @$tuple;
	if ($setI > 0)
	{
	    $hash{$setI}++;
	}
    }
    my @sorted = sort { $hash{$b} <=> $hash{$a} } keys %hash;
    my $max = $sorted[0];
    return $max;
}

sub assign_functions_to_DNA_features {
    my($self,$motif_sz,$seq,$min_hits,$max_gap,$blast) = @_;

    $min_hits = 3 unless defined($min_hits);
    $max_gap  = 200 unless defined($max_gap);

    my $fr_hash   = $self->{friH};
    my $set_hash  = $self->{setiH};

    my %hits;
    my @ans;
    my $matches = $self->process_dna_seq($seq);

    push(@ans,&process_hits($self,$motif_sz, $matches,1,length($seq),$motif_sz, $min_hits, $max_gap,$blast,$seq));
    undef %hits;

    $matches = $self->process_dna_seq(&FIG::reverse_comp($seq));
    push(@ans,&process_hits($self,$motif_sz, $matches,length($seq),1,$motif_sz, $min_hits, $max_gap,$blast,$seq));
    return \@ans;
}

sub process_dna_seq {
    my($self, $seq,$hits) = @_;

    my $matches = $self->match_seq($seq);
    return $matches;
}


sub process_hits {
    my($self,$motif_sz, $matches,$beg,$end,$sz_of_match, $min_hits, $max_gap,$blast,$seq) = @_;

    my $fr_hash   = $self->{friH};
    my $set_hash  = $self->{setiH};

    my $hits;
    my %sets;
    foreach my $tuple (@$matches)
    {
	my($off,$oligo,$frI,$setI) = @$tuple;
	push(@{$hits->{$frI}},$tuple);
    }

    my @got = ();
    my @poss = sort { (@{$hits->{$b}} <=> @{$hits->{$a}}) } keys(%$hits);
    if (@poss != 0)
    {
	foreach my $frI (@poss)
	{
	    my $hit_list = $hits->{$frI};
	    my @grouped = &group_hits($hit_list, $max_gap);
	    foreach my $group_ent (@grouped)
	    {
		my($group, $group_hits) = @$group_ent;
		my $N = @$group;
		if ($N >= $min_hits)   # consider only runs containing 3 or more hits
		{
		    my $b1 = $group->[0];
		    my $e1 = $group->[-1] + ($sz_of_match-1);

		    my $loc;
		    if ($beg < $end)
		    {
			$loc = [$beg+$b1,$beg+$e1];
		    }
		    else
		    {
			$loc = [$beg-$b1,$beg-$e1];
		    }
		    my $func = $fr_hash->{$frI};

		    my $set = &best_hit_in_group($group_hits);
		    $set = $set_hash->{$set};

		    my $blast_output = [];
		    if ($blast)
		    {
			$blast_output = &blast_data($self,join("_",@$loc),$seq,$func,$blast,
						    ($motif_sz == $sz_of_match) ? 'blastn' : 'blastx');
		    }
		    
		    my $tuple = [$N,@$loc,$func,$set,$blast_output];
		    
		    push(@got,$tuple);
		}
	    }
	}
    }
    return @got;
}

sub group_hits {
    my($hits, $max_gap) = @_;

    my @sorted = sort { $a->[0] <=> $b->[0] } @$hits;
    my @groups = ();
    my $position;
    while (defined(my $hit = shift @sorted))
    {
	my($position,$oligo,$frI,$setI) = @$hit;

	my $group = [$position];
	my $ghits = [$hit];
	while ((@sorted > 0) && (($sorted[0]->[0] - $position) < $max_gap))
	{
	    $hit = shift @sorted;
	    ($position,$oligo,$frI,$setI) = @$hit;
	    push(@$group,$position+1);
	    push(@$ghits, $hit);
	}

	push(@groups,[$group, $ghits]);
    }
    return @groups;
}

sub assign_functions_to_PEGs_in_DNA {
    my($self,$motif_sz,$seq,$min_hits,$max_gap,$blast) = @_;

    $blast = 0 unless defined($blast);
    $min_hits = 3 unless defined($min_hits);
    $max_gap  = 200 unless defined($max_gap);

    my $fr_hash   = $self->{friH};
    my $set_hash  = $self->{setiH};

    my %hits;
    my @ans;
    my $matches = $self->process_prot_seq($motif_sz, $seq);
    push(@ans,&process_hits($self,$motif_sz, $matches,1,length($seq),3 * $motif_sz, $min_hits, $max_gap,$blast,$seq));
    undef %hits;
    $matches = $self->process_prot_seq($motif_sz, &FIG::reverse_comp($seq));
    push(@ans,&process_hits($self,$motif_sz, $matches,length($seq),1,3 * $motif_sz, $min_hits, $max_gap,$blast,$seq));
    return \@ans;
}    

sub process_prot_seq {
    my($self, $motif_sz, $seq) = @_;

    my $ans = [];
    my $ln = length($seq);
    my($i,$off);
    for ($off=0; ($off < 3); $off++)
    {
	my $ln_tran = int(($ln - $off)/3) * 3;
	next if $off > $ln;
	my $tran = uc &FIG::translate(substr($seq,$off,$ln_tran));

	next if $tran eq '';
	
	my $matches = $self->match_seq($motif_sz, $tran);
	
	push(@$ans, map { $_->[0] = ((3 * $_->[0]) + $off); $_ } @$matches);
    }
    return $ans;
}

use Sim;

sub blast_data {
    my($self,$id,$seq,$func,$blast,$tool) = @_;

    if ($tool eq "blastp")   
    { 
	return &blast_data1($self,$id,$seq,$func,$blast,$tool);
    }

    if ($id =~ /^(\d+)_(\d+)$/)
    {
	my($b,$e) = ($1 < $2) ? ($1,$2) : ($2,$1);
	my $b_adj = (($b - 5000) > 0) ? $b-5000 : 1;
	my $e_adj = (($b + 5000) <= length($seq)) ? $b+5000 : length($seq);
	my $seq1 = substr($seq,$b_adj-1, ($e_adj - $b_adj)+1);
	my $blast_out = &blast_data1($self,$id,$seq1,$func,$blast,$tool);
	foreach $_ (@$blast_out)
	{
	    $_->[2] += $b_adj - 1;
	    $_->[3] += $b_adj - 1;
	    $_->[8] = length($seq);
	}
	return $blast_out;
    }
    else
    {
	return &blast_data1($self,$id,$seq,$func,$blast,$tool);
    }
}

sub blast_data1 {
    my($self,$id,$seq,$func,$blast,$tool) = @_;


    if (! $tool) { $tool = 'blastx' }
    my $fig = $self->{fig};

    my @blastout = ();
    if ($tool ne 'blastn')
    {
	my $ffs = $self->{ffs};
	my @fams = $ffs->families_implementing_role($func);
	foreach my $fam (@fams)
	{
	    my $subD = substr($fam,-3);
	    my $pegs_in_fam = "$FIG_Config::FigfamsData/FIGFAMS/$subD/$fam/PEGs.fasta";
	    push(@blastout,map { [$_->id2,$_->iden,$_->b1,$_->e1,$_->b2,$_->e2,$_->psc,$_->bsc,$_->ln1,$_->ln2,$fam] } 
		 $fig->blast($id,$seq,$pegs_in_fam,0.1,"-FF -p $tool -b $blast"));
	}
    }
    else
    {
	push(@blastout,map { [$_->id2,$_->iden,$_->b1,$_->e1,$_->b2,$_->e2,$_->psc,$_->bsc,$_->ln1,$_->ln2,$self->{what}] } 
	     $fig->blast($id,$seq,$self->{blastdb},0.1,"-FF -p $tool -b $blast"));
    }
    @blastout = sort { $b->[7] <=> $a->[7] }  @blastout;
    if (@blastout > $blast) { $#blastout = $blast-1 }
    return \@blastout;
}

#
# Turn an argument list into a $self ref and an argument hash.
# Code lifted from ClientThing.
#
sub _handle_args
{
    my $self = shift;
    my $args = $_[0];
    if (defined $args)
    {
        if (scalar @_ gt 1)
	{
            # Here we have multiple arguments. We check the first one for a
            # leading hyphen.
            if ($args =~ /^-/) {
                # This means we have hash-form parameters.
                my %args = @_;
                $args = \%args;
            } else {
                # This means we have list-form parameters.
                my @args = @_;
                $args = \@args;
            }
        } else {
            # Here we have a single argument. If it's a scalar, we convert it
            # to a singleton list.
            if (! ref $args) {
                $args = [$args];
            }
        }
    }
    return($self, $args);
}





1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3