[Bio] / FigKernelPackages / NCBI_genetic_code.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/NCBI_genetic_code.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : olson 1.3
2 :     #
3 :     # This is a SAS component
4 :     #
5 :    
6 : golsen 1.2 #
7 : golsen 1.5 # Copyright (c) 2003-2014 University of Chicago and Fellowship
8 : golsen 1.2 # for Interpretations of Genomes. All Rights Reserved.
9 :     #
10 :     # This file is part of the SEED Toolkit.
11 :     #
12 :     # The SEED Toolkit is free software. You can redistribute
13 :     # it and/or modify it under the terms of the SEED Toolkit
14 :     # Public License.
15 :     #
16 :     # You should have received a copy of the SEED Toolkit Public License
17 :     # along with this program; if not write to the University of Chicago
18 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
19 :     # Genomes at veronika@thefig.info or download a copy from
20 :     # http://www.theseed.org/LICENSE.TXT.
21 :     #
22 :    
23 : golsen 1.1 package NCBI_genetic_code;
24 : golsen 1.2
25 : golsen 1.1 #
26 :     # Access to the numbered genetic codes used by NCBI. A code is returned
27 :     # as a reference to a hash that has both uppercase and lowercase translations.
28 :     #
29 : golsen 1.4 # There are two access methods: by hash lookup and by function:
30 : golsen 1.1 #
31 : golsen 1.4 # \%genetic_code = $NCBI_genetic_code::genetic_code{ $n };
32 :     # \%genetic_code = NCBI_genetic_code::genetic_code( $n );
33 : golsen 1.1 #
34 : golsen 1.4 # \%genetic_code_name = $NCBI_genetic_code::genetic_code_name{ $n };
35 :     # \%genetic_code_name = NCBI_genetic_code::genetic_code_name( $n );
36 :     #
37 :     # \%is_start_codon = $NCBI_genetic_code::is_start_codon{ $n };
38 :     # \%is_start_codon = NCBI_genetic_code::is_start_codon( $n );
39 :     #
40 :     # \%is_stop_codon = $NCBI_genetic_code::is_stop_codon{ $n };
41 :     # \%is_stop_codon = NCBI_genetic_code::is_stop_codon( $n );
42 : golsen 1.1 #
43 : golsen 1.5 # The only difference in behaviour is that when no parameter is supplied the
44 :     # subroutine defaults to code number 11, but with only the three most common
45 :     # bacterial and archaeal initiator codons:
46 : golsen 1.1 #
47 : golsen 1.5 # \%genetic_code = NCBI_genetic_code::genetic_code();
48 : golsen 1.1 #
49 : golsen 1.5 # All other invalid code numbers return an undefined value.
50 : golsen 1.1 #
51 :    
52 :     use strict;
53 :    
54 :     my @nt = qw( T C A G ); # The order here must match that in the tables
55 :    
56 :     my ( $nt1, $nt12 );
57 :     my @triplets = map { $nt1 = $_;
58 :     map { $nt12 = $nt1 . $_;
59 :     map { $nt12 . $_ } @nt
60 :     } @nt
61 :     } @nt;
62 :    
63 : golsen 1.4 our %genetic_code;
64 :     our %genetic_code_name;
65 :     our %is_start_codon;
66 :     our %is_stop_codon;
67 :    
68 : golsen 1.1 my $code_num;
69 : golsen 1.4 while ( <DATA> )
70 :     {
71 :     chomp;
72 :     if ( /\d+\.\s+(.*)\s+\(transl_table=(\d+)/ )
73 :     {
74 :     $code_num = $2;
75 :     $genetic_code_name{ $code_num } = $1;
76 :     }
77 :    
78 :     if ( /AAs\s+=\s+(\S+)/ )
79 :     {
80 :     my @aas = split //, $1;
81 :     my %code;
82 :     my %stops;
83 :     foreach my $codon ( @triplets )
84 :     {
85 :     my $aa = shift( @aas );
86 :     $code{ $codon } = $aa;
87 :     $code{ lc $codon } = lc $aa;
88 :     if ( $aa eq '*' ) { $stops{ $codon } = $stops{ lc $codon } = 1 }
89 :     }
90 :     $genetic_code{ $code_num } = \%code;
91 :     $is_stop_codon{ $code_num } = \%stops;
92 :     }
93 :    
94 :     if ( /Starts\s+=\s+(\S+)/ )
95 :     {
96 :     my @data = split //, $1;
97 :     my %starts;
98 :     foreach my $codon ( @triplets )
99 :     {
100 :     if ( shift( @data ) eq 'M' )
101 :     {
102 :     $starts{ $codon } = $starts{ lc $codon } = 1;
103 :     }
104 :     }
105 :     $is_start_codon{ $code_num } = \%starts;
106 :     }
107 :     }
108 :    
109 : golsen 1.5 sub genetic_code { $genetic_code{ $_[0] || '0' } }
110 :     sub genetic_code_name { $genetic_code_name{ $_[0] || '0' } }
111 :     sub is_start_codon { $is_start_codon{ $_[0] || '0' } }
112 :     sub is_stop_codon { $is_stop_codon{ $_[0] || '0' } }
113 : golsen 1.4
114 :     1;
115 : golsen 1.1
116 : golsen 1.4 __DATA__
117 : golsen 1.1
118 : golsen 1.5 0. The Standard Code with the most common bacterial and archaeal initiators (transl_table=0)
119 :    
120 :     AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
121 :     Starts = ---M-------------------------------M---------------M------------
122 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
123 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
124 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
125 :    
126 : golsen 1.1 1. The Standard Code (transl_table=1)
127 :    
128 :     AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
129 :     Starts = ---M---------------M---------------M----------------------------
130 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
131 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
132 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
133 :    
134 :     2. The Vertebrate Mitochondrial Code (transl_table=2)
135 :    
136 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
137 :     Starts = --------------------------------MMMM---------------M------------
138 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
139 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
140 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
141 :    
142 :     3. The Yeast Mitochondrial Code (transl_table=3)
143 :    
144 :     AAs = FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
145 :     Starts = ----------------------------------MM----------------------------
146 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
147 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
148 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
149 :    
150 :     4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code (transl_table=4)
151 :    
152 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
153 :     Starts = --MM---------------M------------MMMM---------------M------------
154 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
155 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
156 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
157 :    
158 :     5. The Invertebrate Mitochondrial Code (transl_table=5)
159 :    
160 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
161 :     Starts = ---M----------------------------MMMM---------------M------------
162 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
163 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
164 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
165 :    
166 :     6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
167 :    
168 :     AAs = FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
169 :     Starts = -----------------------------------M----------------------------
170 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
171 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
172 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
173 :    
174 :     9. The Echinoderm and Flatworm Mitochondrial Code (transl_table=9)
175 :    
176 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
177 :     Starts = -----------------------------------M---------------M------------
178 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
179 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
180 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
181 :    
182 :     10. The Euplotid Nuclear Code (transl_table=10)
183 :    
184 :     AAs = FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
185 :     Starts = -----------------------------------M----------------------------
186 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
187 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
188 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
189 :    
190 :     11. The Bacterial and Plant Plastid Code (transl_table=11)
191 :    
192 :     AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
193 :     Starts = ---M---------------M------------MMMM---------------M------------
194 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
195 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
196 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
197 :    
198 :     12. The Alternative Yeast Nuclear Code (transl_table=12)
199 :    
200 :     AAs = FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
201 :     Starts = -------------------M---------------M----------------------------
202 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
203 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
204 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
205 :    
206 :     13. The Ascidian Mitochondrial Code (transl_table=13)
207 :    
208 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
209 :     Starts = ---M------------------------------MM---------------M------------
210 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
211 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
212 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
213 :    
214 :     14. The Alternative Flatworm Mitochondrial Code (transl_table=14)
215 :    
216 :     AAs = FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
217 :     Starts = -----------------------------------M----------------------------
218 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
219 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
220 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
221 :    
222 :     15. Blepharisma Nuclear Code (transl_table=15)
223 :    
224 :     AAs = FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
225 :     Starts = -----------------------------------M----------------------------
226 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
227 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
228 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
229 :    
230 :     16. Chlorophycean Mitochondrial Code (transl_table=16)
231 :    
232 :     AAs = FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
233 :     Starts = -----------------------------------M----------------------------
234 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
235 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
236 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
237 :    
238 :     21. Trematode Mitochondrial Code (transl_table=21)
239 :    
240 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
241 :     Starts = -----------------------------------M---------------M------------
242 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
243 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
244 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
245 :    
246 :     22. Scenedesmus obliquus mitochondrial Code (transl_table=22)
247 :    
248 :     AAs = FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
249 :     Starts = -----------------------------------M----------------------------
250 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
251 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
252 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
253 :    
254 :     23. Thraustochytrium Mitochondrial Code (transl_table=23)
255 :    
256 :     AAs = FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
257 :     Starts = --------------------------------M--M---------------M------------
258 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
259 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
260 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3