[Bio] / FigKernelPackages / ProtSims.pm Repository:
ViewVC logotype

View of /FigKernelPackages/ProtSims.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Wed Aug 19 16:12:10 2009 UTC (10 years, 3 months ago) by overbeek
Branch: MAIN
Changes since 1.2: +3 -3 lines
fix passing tuples

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
package ProtSims;

use strict;
use Data::Dumper;
use Carp;
use FIG_Config;
use gjoseqlib;

sub blastP {
    my($q,$db,$min_hits) = @_;

    my(@q,@db);

    my $qF;
    if (ref $q)
    {
	$qF = "/tmp/query.$$";
	&gjoseqlib::print_alignment_as_fasta($qF,$q);
    }
    else
    {
	$qF = $q;
	if (-e $qF)
	{
	    if (! ((-e "$qF.psq") && ((-M "$qF.psq") < (-M $qF))))
	    {
		system "$FIG_Config::ext_bin/formatdb -i $qF";
	    }
	    @q = &gjoseqlib::read_fasta($qF);
	    $q = \@q;
	}
	else
	{
	    die "$qF is missing";
	}
    }

    my $dbF;
    if (ref $db)
    {
	$dbF = "/tmp/db.$$";
	&gjoseqlib::print_alignment_as_fasta($dbF,$db);
	system "$FIG_Config::ext_bin/formatdb -i $dbF";
    }
    else
    {
	$dbF = $db;
	if (-e $dbF)
	{
	    if (! ((-e "$dbF.psq") && ((-M "$dbF.psq") < (-M $dbF))))
	    {
		system "$FIG_Config::ext_bin/formatdb -i $dbF";
	    }
	    @db = &gjoseqlib::read_fasta($dbF);
	    $db = \@db;
	}
	else
	{
	    die "$dbF is missing";
	}
    }

    my $tmpF = "/tmp/sim.out.$$";
    system "$FIG_Config::ext_bin/blat $dbF $qF -prot -out=blast8 $tmpF > /dev/null";

    my @sims1 = map { chomp; [split(/\s+/,$_)] } `cat $tmpF`;
    unlink $tmpF;
#    print STDERR &Dumper(\@sims1);

    my @rerun = ();
    my @sims  = ();
    my $qI    = 0;
    my $simI  = 0;
    while ($qI < @$q)
    {
	my $peg = $q->[$qI]->[0];
#	print STDERR "processing $peg\n";
	my $hits = 0;
	my $simI1 = $simI;
	while (($simI1 < @sims1) && ($sims1[$simI1]->[0] eq $peg)) 
	{
	    if (($simI1 == $#sims1) || ($sims1[$simI1]->[1] ne $sims1[$simI1+1]->[1]))
	    {
		$hits++;
	    }
	    $simI1++;
	}
#	print STDERR "hits=$hits min_hits=$min_hits\n";
	if ($hits >= $min_hits)
	{
	    push(@sims,@sims1[$simI..$simI1]);
	}
	else
	{
	    push(@rerun,$q->[$qI]);
	}
	$simI = $simI1;
	$qI++;
    }
#    print STDERR &Dumper(\@rerun);

    if (@rerun > 0)
    {
	my $tmpQ = "/tmp/tmpQ.$$";
	&gjoseqlib::print_alignment_as_fasta($tmpQ,\@rerun);
	my @blastout = map { chomp; [split(/\s+/,$_)] } 
	               `$FIG_Config::ext_bin/blastall -m 8 -i $tmpQ -d $dbF -FF -p blastp`;
	push(@sims,@blastout);
	unlink $tmpQ;
    }

    my %lnQ   = map { $_->[0] => length($_->[2]) } @$q;
    my %lnDB  = map { $_->[0] => length($_->[2]) } @$db;
    @sims = map { push(@$_,$lnQ{$_->[0]},$lnDB{$_->[1]}); bless($_,'Sim') } @sims;

    if ($qF      eq "/tmp/query.$$")   { unlink $qF  }
    if ($dbF     eq "/tmp/db.$$")      { unlink $dbF }
    return sort { ($a->id1 cmp $b->id1) or ($b->bsc <=> $a->bsc) or ($a->id2 cmp $b->id2) } @sims;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3