[Bio] / FigKernelScripts / patric_call_proteins.pl Repository:
ViewVC logotype

View of /FigKernelScripts/patric_call_proteins.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Tue Jun 7 18:23:22 2016 UTC (3 years, 5 months ago) by olson
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +3 -3 lines
reduce fig obj creation

use strict;
use Kmers;
use FIG;
use Data::Dumper;
use Getopt::Long;
use File::Temp;

my $sims_cutoff = 1e-10;
my $iden;
my $iden2;
my $md5_to_fam_file;
my $ff_dir;
my $out;

my $rc = GetOptions("ff=s" => \$ff_dir,
		    "md5-to-fam=s" => \$md5_to_fam_file,
		    "sims-cutoff=s" => \$sims_cutoff,
		    "iden=s" => \$iden,
		    "iden2=s" => \$iden2,
		    "out=s" => \$out,
		   );

if (!$rc || @ARGV != 1)
{
    die "Usage: patric_call_proteins [-ff figfam-directory] proteins.fasta\n";
}

my $out_fh;
if ($out)
{
    open($out_fh, ">", $out) or die "Cannot write output file $out: $!";
}
else
{
    $out_fh = \*STDOUT;
}
    
my $fasta = shift;

my $tmp_fasta;
if ($fasta eq '-')
{
    $tmp_fasta = File::Temp->new();
    my $buf;
    while (read(STDIN, $buf, 4096))
    {
	print $tmp_fasta $buf;
    }
    close($tmp_fasta);
    $fasta = "$tmp_fasta";
}

-f $fasta or die "$fasta does not exist\n";

my $fig = FIG->new;

my $annO = AnnoUsingFFDir->new($ff_dir, $fig);

my %md5_to_fam;
if ($md5_to_fam_file)
{
    open(F, "<", $md5_to_fam_file) or die "Cannot open $md5_to_fam_file: $!";
    while (<F>)
    {
	chomp;
	my($md5, $fam) = split(/\t/);
	push(@{$md5_to_fam{$md5}}, $fam);
    }
    close(F);
}

my($kfunc, $kfam, $kscore, $pegs, $nomatch) = Kmers::patric_figfam_call($ff_dir, $fig, $fasta, $annO, \%md5_to_fam, $sims_cutoff, $iden, $iden2);

for my $peg (@$pegs)
{
    print $out_fh join("\t", $peg, $kfam->{$peg}, $kfunc->{$peg}, @{$kscore->{$peg}}[0..2]), "\n";
}
for my $peg (@$nomatch)
{
    print $out_fh "$peg\n";
}

close($out_fh) if $out;

package AnnoUsingFFDir;
use FIG;
use strict;
use Kmers;
use Data::Dumper;

sub new
{
    my($class, $ff_dir, $fig) = @_;

    -d $ff_dir or die "Invalid ff dir $ff_dir";

    my $kmer = 8;
    my $friDB = "$ff_dir/FRI.db";
    my $setIDB = "$ff_dir/setI.db";
    my $table = "$ff_dir/Merged/$kmer/table.binary";

    my $kmers = Kmers->new(-fig => $fig, -frIdb => $friDB, -setIdb => $setIDB, -table => $table);
    my $self = {
	kmers => $kmers,
    };
    return bless $self, $class;
}

sub assign_function_to_prot
{
    my($self, %args) = @_;

    my $in = delete $args{-input};

    my $handle =  Getter->new($self->{kmers}, $in, \%args);
    return $handle;
}

package Getter;
use FIG;
use strict;

sub new
{
    my($class, $kmers, $inp, $args) = @_;

    my $self = {
	kmers => $kmers,
	input => $inp,
	args => $args,
    };
    return bless $self, $class;
}

sub get_next
{
    my($self) = @_;


    while (my($id, $seqp, $com) = &FIG::read_fasta_record($self->{input}))
    {
	my @res = $self->{kmers}->assign_functions_to_prot_set(-seqs => [[$id, $com, $$seqp]], %{$self->{args}});
	if (@res)
	{
	    return $res[0];
	}
    }

    return undef;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3