[Bio] / FigKernelPackages / FigFam.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/FigFam.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : overbeek 1.1 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     package FigFam;
19 :    
20 :     use strict;
21 :     use FIG;
22 :     use SameFunc;
23 :     use Data::Dumper;
24 :    
25 :     # This is the constructor. Presumably, $class is 'FigFam'.
26 :     #
27 :     # $fam_id is the ID of the family (of the form FIGnnnnnnn). It is required.
28 :     #
29 :     # $fam_dir is optional. If it exists,
30 :     # it gives the directory that is (or will) contain the FigFams. This directory will be
31 :     # created, if it does not exist. If it is not specified, $FIG_Config::data/FIGFAMS will be
32 :     # used.
33 :     #
34 :     # $fam_file is also optional. If it is given, it contains a 2-column version of the FIG families.
35 :     # If the $fam_dir/yyy/FIGxxxxyyy directory (which is where the dikrectory for the specific family
36 :     # will go) does not exist, the specification of the PEGs in $fam_file will be used to initialize
37 :     # the family.
38 :     #
39 :    
40 :     sub new {
41 :     my($class,$fig,$fam_id,$fam_dir,$fam_file) = @_;
42 :    
43 :     ($fam_id =~ /^FIG\d{3}(\d{3})$/) || die "$fam_id is not a valid FIG family ID";
44 :    
45 :     my $yyy = $1;
46 :     my $fam = {};
47 :     $fam->{id} = $fam_id;
48 :     $fam->{fig} = $fig;
49 :    
50 :     if (! defined($fam_dir))
51 :     {
52 :     $fam_dir = "$FIG_Config::data/FIGFAMS";
53 :     }
54 :    
55 :     my $dir = "$fam_dir/$yyy/$fam_id";
56 :     &FIG::verify_dir($dir);
57 :     if ((! -s "$dir/PEGs") && defined($fam_file))
58 :     {
59 :     system("grep $fam_id \"$fam_file\" > \"$dir/PEGs\"");
60 :     my @first = `head -n 1 \"$dir/PEGs\"`;
61 :     if ($first[0] =~ /^\S+\t\S+\t(\S.*\S)/)
62 :     {
63 :     open(FUNC,">$dir/function") || die "could not open $dir/function";
64 :     print FUNC "$1\n";
65 :     close(FUNC);
66 :     }
67 :     else
68 :     {
69 :     die "$dir has no valid function file";
70 :     }
71 :     }
72 :     open(FUNC,"<$dir/function") || die "could not open $dir/function";
73 :     my $func = <FUNC>;
74 :     chomp $func;
75 :     close(FUNC);
76 :     $fam->{function} = $func;
77 :    
78 :     my($peg,$pegs);
79 :     my $pegsL = [sort { &FIG::by_fig_id($a,$b) } map { $_ =~ /^\S+\t(\S+)/; $1 } `cat \"$dir/PEGs\"`];
80 :    
81 :     $fam->{pegsL} = $pegsL;
82 :     my $pegsH = {};
83 :     foreach $peg (@$pegsL)
84 :     {
85 :     $pegsH->{$peg} = 1;
86 :     }
87 :     $fam->{pegsH} = $pegsH;
88 :    
89 :     if (! -s "$dir/PEGs.fasta.pin")
90 :     {
91 :     open(FASTA,">$dir/PEGs.fasta") || die "could not open $dir/PEGs.fasta";
92 :     foreach $peg (@$pegsL)
93 :     {
94 :     my $seq = $fig->get_translation($peg);
95 :     if ($seq)
96 :     {
97 :     print FASTA ">$peg\n$seq\n";
98 :     }
99 :     }
100 :     close(FASTA);
101 :    
102 :     system "$FIG_Config::ext_bin/formatdb -i \"$dir/PEGs.fasta\" -p";
103 :     }
104 :    
105 :     if (! -s "$dir/bounds")
106 :     {
107 :     open(BOUNDS,">$dir/bounds") || die "could not open $dir/bounds";
108 :     foreach $peg (@$pegsL)
109 :     {
110 :     my @sims = $fig->sims($peg,1000,1.0e-20,"fig");
111 :    
112 :     my($i,$hits,$bad,$bad_sc,$last_good,$func2);
113 :     for ($i=0, $hits=0, $bad="", $bad_sc=0, $last_good=1; ($i < @sims) && (! $bad); $i++)
114 :     {
115 :     my $sim = $sims[$i];
116 :     my $id2 = $sim->id2;
117 :     if ($pegsH->{$id2})
118 :     {
119 :     $last_good = $sim->psc;
120 :     $hits++;
121 :     }
122 :     else
123 :     {
124 :     $func2 = $fig->function_of($id2);
125 :     if (! &ok_func($fig,$func,$func2,$id2))
126 :     {
127 :     $bad = $id2;
128 :     $bad_sc = $sim->psc;
129 :     }
130 :     }
131 :     }
132 :    
133 :     if ($hits > 0)
134 :     {
135 :     print BOUNDS join("\t",($peg,$hits,$last_good,$bad,$bad_sc)),"\n";
136 :     }
137 :     }
138 :     close(BOUNDS);
139 :     }
140 :     open(BOUNDS,"<$dir/bounds") || die "could not open $dir/bounds";
141 :    
142 :     my $bounds = {};
143 :     my $x;
144 :     while (defined($x = <BOUNDS>))
145 :     {
146 :     chomp $x;
147 :     my @flds = split(/\t/,$x);
148 :     my $peg =shift @flds;
149 :     $bounds->{$peg} = [@flds];
150 :     }
151 :     close(BOUNDS);
152 :     $fam->{bounds} = $bounds;
153 :    
154 :     if (! -d "$dir/blastout/Split")
155 :     {
156 :     &FIG::run("split_and_trim_sequences $dir/Split blastout=$dir/Split/blastout < $dir/PEGs.fasta");
157 :     }
158 :    
159 :     bless $fam,$class;
160 :     return $fam;
161 :     }
162 :    
163 :     sub ok_func {
164 :     my($fig,$func,$func2,$id2) = @_;
165 :    
166 :     my $i;
167 :     if (&SameFunc::same_func($func,$func2)) { return 1 }
168 :     if (! &FIG::hypo($func2)) { return 0 }
169 :     my @sims = $fig->sims($id2,5,1.0e-30,"fig");
170 :     for ($i=0; ($i < @sims) && ($func eq $fig->function_of($sims[$i]->id2)); $i++) {}
171 :     return ($i == 5);
172 :     }
173 :    
174 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3