[Bio] / FigKernelPackages / FFs.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/FFs.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.49 - (view) (download) (as text)

1 : overbeek 1.1 # -*- perl -*-
2 :     ########################################################################
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     ########################################################################
18 :    
19 :     package FFs;
20 : parrello 1.36 no warnings 'redefine';
21 : overbeek 1.1
22 : olson 1.48 use Sim;
23 : overbeek 1.1 use strict;
24 :     use DB_File;
25 : arodri7 1.34 use FIG;
26 : olson 1.48 use SeedUtils;
27 :     use ANNOserver;
28 : overbeek 1.1
29 : gdpusch 1.9 use FF;
30 : parrello 1.27 use Tracer;
31 : gdpusch 1.9
32 : overbeek 1.1 use Data::Dumper;
33 :     use Carp;
34 : arodri7 1.26 use Digest::MD5;
35 : overbeek 1.1
36 :     # This is the constructor. Presumably, $class is 'FFs'.
37 :     #
38 :    
39 :     sub new {
40 : parrello 1.36 my($class,$fam_data,$fig) = @_;
41 : overbeek 1.1
42 :     my $figfams = {};
43 :    
44 :     defined($fam_data) || return undef;
45 :     $figfams->{dir} = $fam_data;
46 : olson 1.49 $figfams->{blast_dir} = $fam_data;
47 : gdpusch 1.43 $figfams->{fig} = $fig; #(defined $fig ? $fig : new FIG);
48 : overbeek 1.1
49 : olson 1.48 $figfams->{function2families} = &SeedUtils::open_berk_table("$fam_data/function2families.db", -results_as_list => 1);
50 :     $figfams->{function2index} = &SeedUtils::open_berk_table("$fam_data/function2index.db");
51 :     $figfams->{role2families} = &SeedUtils::open_berk_table("$fam_data/role2families.db", -results_as_list => 1);
52 :     $figfams->{genome2families} = &SeedUtils::open_berk_table("$fam_data/genome2families.db", -results_as_list => 1);
53 :     $figfams->{peg2family} = &SeedUtils::open_berk_table("$fam_data/peg2family.db");
54 :     $figfams->{family2function} = &SeedUtils::open_berk_table("$fam_data/family2function.db");
55 :     $figfams->{family2pegs} = &SeedUtils::open_berk_table("$fam_data/family2pegs.db", -results_as_list => 1);
56 :    
57 : overbeek 1.4 bless $figfams,$class;
58 :     return $figfams;
59 :     }
60 :    
61 : paczian 1.39 #sub DESTROY {
62 :     # my ($self) = @_;
63 :     # delete $self->{fig};
64 :     #}
65 : parrello 1.36
66 : arodri7 1.37
67 : overbeek 1.15 sub PDB_connections {
68 :     my($self,$fam,$raw) = @_;
69 :    
70 : olson 1.48 return [];
71 :     # $self->check_db_PDB_connections;
72 :     # my $sims = $self->{PDB_connections_db}->{$fam};
73 :     # my @sims = map { $_ =~ /pdb\|([0-9a-zA-Z]+)/; [$1,[split(/\t/,$_)]] } split(/\n/,$sims);
74 :     # if (! $raw) { @sims = map { $_->[0] } grep { ($_->[1]->[11] > 0.5) && ((($_->[1]->[4] - $_->[1]->[3]) / $_->[1]->[5]) > 0.8) } @sims}
75 :     # return \@sims;
76 : overbeek 1.15 }
77 :    
78 : olson 1.48 sub figfam
79 :     {
80 :     my($self, $figfam_id) = @_;
81 :     return FF->new($figfam_id, $self);
82 : overbeek 1.2 }
83 : arodri7 1.40
84 : olson 1.48 sub families_with_function {
85 :     my($self,$function) = @_;
86 : overbeek 1.4
87 : olson 1.48 return @{$self->{function2families}->{$function}};
88 : overbeek 1.1 }
89 :    
90 :     sub families_implementing_role {
91 :     my($self,$role) = @_;
92 : olson 1.48 return @{$self->{role2families}->{$role}};
93 : overbeek 1.1 }
94 :    
95 : overbeek 1.6 sub family_containing_peg {
96 :     my($self,$peg) = @_;
97 : olson 1.48
98 :     return $self->{peg2family}->{$peg};
99 : overbeek 1.6 }
100 : overbeek 1.4
101 : overbeek 1.1 sub families_containing_peg {
102 :     my($self,$peg) = @_;
103 :    
104 : olson 1.48 return ($self->family_containing_peg($peg));
105 : overbeek 1.1 }
106 :    
107 :     sub families_in_genome {
108 :     my($self,$genome) = @_;
109 :    
110 : olson 1.48 return @{$self->{genome2families}->{$genome}};
111 : arodri7 1.26 }
112 :    
113 : overbeek 1.1 sub all_families {
114 :     my($self) = @_;
115 :    
116 : olson 1.48 return sort keys %{$self->{family2function}};
117 : overbeek 1.1 }
118 :    
119 : olson 1.48 sub place_in_family {
120 :     my($self,$seq) = @_;
121 : arodri7 1.35
122 : olson 1.48 my $anno = new ANNOserver();
123 : arodri7 1.35
124 : olson 1.48 my $handle = $anno->assign_function_to_prot(-hitThreshold => 3, -seqHitThreshold => 2, -kmer => 8, -input => [['id', undef, $seq]]);
125 :     my $res = $handle->get_next();
126 : arodri7 1.40
127 : olson 1.48 if (!@$res || !defined($res->[1]))
128 : arodri7 1.40 {
129 : olson 1.48 return undef;
130 : arodri7 1.40 }
131 :    
132 : olson 1.48 my $function = $res->[1];
133 : arodri7 1.40
134 : olson 1.48 my ($figfam_id, $sims) = $self->place_seq_and_function_in_family($seq, $function);
135 : arodri7 1.40
136 : olson 1.48 return ($figfam_id ? FF->new($figfam_id, $self) : undef, $sims);
137 : arodri7 1.40 }
138 :    
139 : olson 1.48 sub place_seq_and_function_in_family
140 :     {
141 :     my($self, $seq, $function) = @_;
142 : arodri7 1.40
143 : olson 1.48 my $index = $self->index_for_function($function);
144 : olson 1.49 if (!defined($index) || $index eq '')
145 : olson 1.48 {
146 :     warn "No index found for $function\n";
147 :     return undef;
148 : arodri7 1.17 }
149 :    
150 : olson 1.48 my $sub = $index % 1000;
151 : olson 1.49 my $blast_db = "$self->{blast_dir}/FamFuncBlastD/$sub/$index.fasta";
152 :    
153 :     if (! -f $blast_db)
154 :     {
155 :     warn "No blast db found for function $function index $index\n";
156 :     return undef;
157 :     }
158 : arodri7 1.37
159 : olson 1.48 my $fh;
160 :     my $tmp = "$FIG_Config::temp/seq.$$";
161 :     open($fh, ">", $tmp) or die "Cannot write $tmp: $!";
162 :     print $fh ">id\n$seq\n";
163 :     close($fh);
164 : arodri7 1.26
165 : olson 1.48 open($fh, "$FIG_Config::ext_bin/blastall -FF -m8 -d $blast_db -i $tmp -e 1.0e-20 -p blastp|")
166 :     or die "Cannot run blastall: $!";
167 : overbeek 1.1
168 : olson 1.48 my @sims;
169 :     my $id1_len = length($seq);
170 :     while (<$fh>)
171 : overbeek 1.1 {
172 : olson 1.48 chomp;
173 :     my(@a) = split(/\t/);
174 :     if ($a[1] =~ /^(FIG\d+):(\d+):(.*)$/)
175 : arodri7 1.37 {
176 : olson 1.48 push(@a, $id1_len, $2);
177 :     $a[1] = "$1:$3";
178 : overbeek 1.1 }
179 : olson 1.48 my $sim = [@a];
180 :     bless($sim, 'Sim');
181 :     push(@sims, $sim);
182 : overbeek 1.1 }
183 : olson 1.48 close($fh);
184 :     my $fam_id;
185 :     if (@sims)
186 : overbeek 1.11 {
187 : olson 1.48 if ($sims[0]->id2 =~ /^(FIG\d+):/)
188 : overbeek 1.11 {
189 : olson 1.48 $fam_id = $1;
190 : overbeek 1.11 }
191 :     }
192 : olson 1.49 else
193 :     {
194 :     warn "No sims for $function\n";
195 :     }
196 : arodri7 1.31
197 : olson 1.48 return ($fam_id, \@sims);
198 : arodri7 1.26 }
199 :    
200 : olson 1.48 sub index_for_function
201 :     {
202 :     my($self,$function) = @_;
203 : arodri7 1.35
204 : olson 1.48 return $self->{function2index}->{$function};
205 : overbeek 1.2 }
206 :    
207 : arodri7 1.7
208 :     =head3
209 :     usage: $figfams->family_functions();
210 :    
211 :     returns a hash of all the functions for all figfams from the family.functions file
212 :    
213 :     =cut
214 :    
215 : overbeek 1.13 sub family_functions {
216 :     my($self) = @_;
217 : olson 1.48 return $self->{family2function};
218 :     }
219 : arodri7 1.7
220 : olson 1.48 sub family_pegs {
221 :     my($self, $fam) = @_;
222 :     return $self->{family2pegs}->{$fam};
223 :     }
224 : arodri7 1.7
225 : olson 1.48 sub family_function {
226 :     my($self, $fam) = @_;
227 :     return $self->{family2function}->{$fam};
228 : arodri7 1.7 }
229 :    
230 : olson 1.48
231 : overbeek 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3