[Bio] / FigKernelPackages / FFs.pm Repository:
ViewVC logotype

View of /FigKernelPackages/FFs.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.49 - (download) (as text) (annotate)
Wed Sep 15 21:48:19 2010 UTC (9 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011
Changes since 1.48: +13 -2 lines
Clean up errors.

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

package FFs;
no warnings 'redefine';

use Sim;
use strict;
use DB_File;
use FIG;
use SeedUtils;
use ANNOserver;

use FF;
use Tracer;

use Data::Dumper;
use Carp;
use Digest::MD5;

# This is the constructor.  Presumably, $class is 'FFs'.  
#

sub new {
    my($class,$fam_data,$fig) = @_;

    my $figfams = {};

    defined($fam_data) || return undef;
    $figfams->{dir} = $fam_data;
    $figfams->{blast_dir} = $fam_data;
    $figfams->{fig} = $fig;    #(defined $fig ? $fig : new FIG);

    $figfams->{function2families} = &SeedUtils::open_berk_table("$fam_data/function2families.db", -results_as_list => 1);
    $figfams->{function2index} = &SeedUtils::open_berk_table("$fam_data/function2index.db");
    $figfams->{role2families} = &SeedUtils::open_berk_table("$fam_data/role2families.db",  -results_as_list => 1);
    $figfams->{genome2families} = &SeedUtils::open_berk_table("$fam_data/genome2families.db",  -results_as_list => 1);
    $figfams->{peg2family} = &SeedUtils::open_berk_table("$fam_data/peg2family.db");
    $figfams->{family2function} = &SeedUtils::open_berk_table("$fam_data/family2function.db");
    $figfams->{family2pegs} = &SeedUtils::open_berk_table("$fam_data/family2pegs.db", -results_as_list => 1);

    bless $figfams,$class;
    return $figfams;
}

#sub DESTROY {
#    my ($self) = @_;
#    delete $self->{fig};
#}


sub PDB_connections {
    my($self,$fam,$raw) = @_;

    return [];
#     $self->check_db_PDB_connections;
#     my $sims = $self->{PDB_connections_db}->{$fam};
#     my @sims = map { $_ =~ /pdb\|([0-9a-zA-Z]+)/; [$1,[split(/\t/,$_)]] } split(/\n/,$sims);
#     if (! $raw)  { @sims = map { $_->[0] } grep { ($_->[1]->[11] > 0.5) && ((($_->[1]->[4] - $_->[1]->[3]) / $_->[1]->[5]) > 0.8) } @sims}
#     return \@sims;
}

sub figfam
{
    my($self, $figfam_id) = @_;
    return FF->new($figfam_id, $self);
}

sub families_with_function {
    my($self,$function) = @_;

    return @{$self->{function2families}->{$function}};
}

sub families_implementing_role {
    my($self,$role) = @_;
    return @{$self->{role2families}->{$role}};
}

sub family_containing_peg {
    my($self,$peg) = @_;

    return $self->{peg2family}->{$peg};
}

sub families_containing_peg {
    my($self,$peg) = @_;

    return ($self->family_containing_peg($peg));
}

sub families_in_genome {
    my($self,$genome) = @_;

    return @{$self->{genome2families}->{$genome}};
}

sub all_families {
    my($self) = @_;

    return sort keys %{$self->{family2function}};
}

sub place_in_family {
    my($self,$seq) = @_;

    my $anno = new ANNOserver();

    my $handle = $anno->assign_function_to_prot(-hitThreshold => 3, -seqHitThreshold => 2, -kmer => 8, -input => [['id', undef, $seq]]);
    my $res = $handle->get_next();

    if (!@$res || !defined($res->[1]))
    {
	return undef;
    }

    my $function = $res->[1];

    my ($figfam_id, $sims) = $self->place_seq_and_function_in_family($seq, $function);

    return ($figfam_id ? FF->new($figfam_id, $self) : undef, $sims);
}

sub place_seq_and_function_in_family
{
    my($self, $seq, $function) = @_;

    my $index = $self->index_for_function($function);
    if (!defined($index) || $index eq '')
    {
	warn "No index found for $function\n";
	return undef;
    }

    my $sub = $index % 1000;
    my $blast_db = "$self->{blast_dir}/FamFuncBlastD/$sub/$index.fasta";

    if (! -f $blast_db)
    {
	warn "No blast db found for function $function index $index\n";
	return undef;
    }

    my $fh;
    my $tmp = "$FIG_Config::temp/seq.$$";
    open($fh, ">", $tmp) or die "Cannot write $tmp: $!";
    print $fh ">id\n$seq\n";
    close($fh);
    
    open($fh, "$FIG_Config::ext_bin/blastall -FF -m8 -d $blast_db -i $tmp -e 1.0e-20 -p blastp|")
	or die "Cannot run blastall: $!";

    my @sims;
    my $id1_len = length($seq);
    while (<$fh>)
    {
	chomp;
	my(@a) = split(/\t/);
	if ($a[1] =~ /^(FIG\d+):(\d+):(.*)$/)
	{
	    push(@a, $id1_len, $2);
	    $a[1] = "$1:$3";
	}
	my $sim = [@a];
	bless($sim, 'Sim');
	push(@sims, $sim);
    }
    close($fh);
    my $fam_id;
    if (@sims)
    {
	if ($sims[0]->id2 =~ /^(FIG\d+):/)
	{
	    $fam_id = $1;
	}
    }
    else
    {
	warn "No sims for $function\n";
    }
    
    return ($fam_id, \@sims);
}

sub index_for_function
{
    my($self,$function) = @_;

    return $self->{function2index}->{$function};
}


=head3
usage: $figfams->family_functions();

returns a hash of all the functions for all figfams from the family.functions file

=cut

sub family_functions {
    my($self) = @_;
    return $self->{family2function};
}

sub family_pegs {
    my($self, $fam) = @_;
    return $self->{family2pegs}->{$fam};
}

sub family_function {
    my($self, $fam) = @_;
    return $self->{family2function}->{$fam};
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3