[Bio] / FigKernelPackages / NCBI_genetic_code.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/NCBI_genetic_code.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : olson 1.3
2 :     #
3 :     # This is a SAS component
4 :     #
5 :    
6 : golsen 1.2 #
7 :     # Copyright (c) 2003-2007 University of Chicago and Fellowship
8 :     # for Interpretations of Genomes. All Rights Reserved.
9 :     #
10 :     # This file is part of the SEED Toolkit.
11 :     #
12 :     # The SEED Toolkit is free software. You can redistribute
13 :     # it and/or modify it under the terms of the SEED Toolkit
14 :     # Public License.
15 :     #
16 :     # You should have received a copy of the SEED Toolkit Public License
17 :     # along with this program; if not write to the University of Chicago
18 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
19 :     # Genomes at veronika@thefig.info or download a copy from
20 :     # http://www.theseed.org/LICENSE.TXT.
21 :     #
22 :    
23 : golsen 1.1 package NCBI_genetic_code;
24 : golsen 1.2
25 : golsen 1.1 #
26 :     # Access to the numbered genetic codes used by NCBI. A code is returned
27 :     # as a reference to a hash that has both uppercase and lowercase translations.
28 :     #
29 : golsen 1.4 # There are two access methods: by hash lookup and by function:
30 : golsen 1.1 #
31 : golsen 1.4 # \%genetic_code = $NCBI_genetic_code::genetic_code{ $n };
32 :     # \%genetic_code = NCBI_genetic_code::genetic_code( $n );
33 : golsen 1.1 #
34 : golsen 1.4 # \%genetic_code_name = $NCBI_genetic_code::genetic_code_name{ $n };
35 :     # \%genetic_code_name = NCBI_genetic_code::genetic_code_name( $n );
36 :     #
37 :     # \%is_start_codon = $NCBI_genetic_code::is_start_codon{ $n };
38 :     # \%is_start_codon = NCBI_genetic_code::is_start_codon( $n );
39 :     #
40 :     # \%is_stop_codon = $NCBI_genetic_code::is_stop_codon{ $n };
41 :     # \%is_stop_codon = NCBI_genetic_code::is_stop_codon( $n );
42 : golsen 1.1 #
43 :     # The only difference in behaviour is that the subroutine defaults to code
44 :     # number 1 when no parameter is supplied:
45 :     #
46 : golsen 1.2 # \%genetic_code_1 = NCBI_genetic_code::genetic_code();
47 : golsen 1.1 #
48 :     # All other invalid numbers return an undefined value.
49 :     #
50 :    
51 :     use strict;
52 :    
53 :     my @nt = qw( T C A G ); # The order here must match that in the tables
54 :    
55 :     my ( $nt1, $nt12 );
56 :     my @triplets = map { $nt1 = $_;
57 :     map { $nt12 = $nt1 . $_;
58 :     map { $nt12 . $_ } @nt
59 :     } @nt
60 :     } @nt;
61 :    
62 : golsen 1.4 our %genetic_code;
63 :     our %genetic_code_name;
64 :     our %is_start_codon;
65 :     our %is_stop_codon;
66 :    
67 : golsen 1.1 my $code_num;
68 : golsen 1.4 while ( <DATA> )
69 :     {
70 :     chomp;
71 :     if ( /\d+\.\s+(.*)\s+\(transl_table=(\d+)/ )
72 :     {
73 :     $code_num = $2;
74 :     $genetic_code_name{ $code_num } = $1;
75 :     }
76 :    
77 :     if ( /AAs\s+=\s+(\S+)/ )
78 :     {
79 :     my @aas = split //, $1;
80 :     my %code;
81 :     my %stops;
82 :     foreach my $codon ( @triplets )
83 :     {
84 :     my $aa = shift( @aas );
85 :     $code{ $codon } = $aa;
86 :     $code{ lc $codon } = lc $aa;
87 :     if ( $aa eq '*' ) { $stops{ $codon } = $stops{ lc $codon } = 1 }
88 :     }
89 :     $genetic_code{ $code_num } = \%code;
90 :     $is_stop_codon{ $code_num } = \%stops;
91 :     }
92 :    
93 :     if ( /Starts\s+=\s+(\S+)/ )
94 :     {
95 :     my @data = split //, $1;
96 :     my %starts;
97 :     foreach my $codon ( @triplets )
98 :     {
99 :     if ( shift( @data ) eq 'M' )
100 :     {
101 :     $starts{ $codon } = $starts{ lc $codon } = 1;
102 :     }
103 :     }
104 :     $is_start_codon{ $code_num } = \%starts;
105 :     }
106 :     }
107 :    
108 :     sub genetic_code { $genetic_code{ $_[0] || '1' } }
109 :     sub genetic_code_name { $genetic_code_name{ $_[0] || '1' } }
110 :     sub is_start_codon { $is_start_codon{ $_[0] || '1' } }
111 :     sub is_stop_codon { $is_stop_codon{ $_[0] || '1' } }
112 :    
113 :     1;
114 : golsen 1.1
115 : golsen 1.4 __DATA__
116 : golsen 1.1
117 :     1. The Standard Code (transl_table=1)
118 :    
119 :     AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
120 :     Starts = ---M---------------M---------------M----------------------------
121 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
122 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
123 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
124 :    
125 :     2. The Vertebrate Mitochondrial Code (transl_table=2)
126 :    
127 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
128 :     Starts = --------------------------------MMMM---------------M------------
129 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
130 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
131 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
132 :    
133 :     3. The Yeast Mitochondrial Code (transl_table=3)
134 :    
135 :     AAs = FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
136 :     Starts = ----------------------------------MM----------------------------
137 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
138 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
139 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
140 :    
141 :     4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code (transl_table=4)
142 :    
143 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
144 :     Starts = --MM---------------M------------MMMM---------------M------------
145 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
146 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
147 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
148 :    
149 :     5. The Invertebrate Mitochondrial Code (transl_table=5)
150 :    
151 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
152 :     Starts = ---M----------------------------MMMM---------------M------------
153 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
154 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
155 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
156 :    
157 :     6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
158 :    
159 :     AAs = FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
160 :     Starts = -----------------------------------M----------------------------
161 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
162 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
163 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
164 :    
165 :     9. The Echinoderm and Flatworm Mitochondrial Code (transl_table=9)
166 :    
167 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
168 :     Starts = -----------------------------------M---------------M------------
169 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
170 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
171 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
172 :    
173 :     10. The Euplotid Nuclear Code (transl_table=10)
174 :    
175 :     AAs = FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
176 :     Starts = -----------------------------------M----------------------------
177 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
178 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
179 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
180 :    
181 :     11. The Bacterial and Plant Plastid Code (transl_table=11)
182 :    
183 :     AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
184 :     Starts = ---M---------------M------------MMMM---------------M------------
185 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
186 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
187 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
188 :    
189 :     12. The Alternative Yeast Nuclear Code (transl_table=12)
190 :    
191 :     AAs = FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
192 :     Starts = -------------------M---------------M----------------------------
193 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
194 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
195 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
196 :    
197 :     13. The Ascidian Mitochondrial Code (transl_table=13)
198 :    
199 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
200 :     Starts = ---M------------------------------MM---------------M------------
201 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
202 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
203 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
204 :    
205 :     14. The Alternative Flatworm Mitochondrial Code (transl_table=14)
206 :    
207 :     AAs = FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
208 :     Starts = -----------------------------------M----------------------------
209 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
210 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
211 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
212 :    
213 :     15. Blepharisma Nuclear Code (transl_table=15)
214 :    
215 :     AAs = FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
216 :     Starts = -----------------------------------M----------------------------
217 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
218 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
219 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
220 :    
221 :     16. Chlorophycean Mitochondrial Code (transl_table=16)
222 :    
223 :     AAs = FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
224 :     Starts = -----------------------------------M----------------------------
225 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
226 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
227 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
228 :    
229 :     21. Trematode Mitochondrial Code (transl_table=21)
230 :    
231 :     AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
232 :     Starts = -----------------------------------M---------------M------------
233 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
234 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
235 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
236 :    
237 :     22. Scenedesmus obliquus mitochondrial Code (transl_table=22)
238 :    
239 :     AAs = FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
240 :     Starts = -----------------------------------M----------------------------
241 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
242 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
243 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
244 :    
245 :     23. Thraustochytrium Mitochondrial Code (transl_table=23)
246 :    
247 :     AAs = FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
248 :     Starts = --------------------------------M--M---------------M------------
249 :     Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
250 :     Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
251 :     Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3