11 |
# map from model's compound abbreviations to KEGG compound ids |
# map from model's compound abbreviations to KEGG compound ids |
12 |
# and reaction abbreviations to KEGG reaction ids |
# and reaction abbreviations to KEGG reaction ids |
13 |
|
|
14 |
my (%compound_abbrev_2_name, %compound_id_2_abbrev, %reaction_abbrev_2_id, %reaction_abbrev_2_name, %reaction_2_substrates, %reaction_2_products); |
our (%compound_abbrev_2_name, %compound_abbrev_2_id, %compound_id_2_abbrev, %reaction_abbrev_2_id, %reaction_abbrev_2_name, %reaction_2_substrates, %reaction_2_products, %reversibility); |
15 |
|
|
16 |
open(MODEL_COMPOUNDS, "<$FIG_Config::global/Models/$genome_id/compound_name") or die("Run parse_model first"); |
open(MODEL_COMPOUNDS, "<$FIG_Config::global/Models/$genome_id/compound_name") or die("Run parse_model first"); |
17 |
open(MODEL_REACTIONS, "<$FIG_Config::global/Models/$genome_id/reaction") or die("Run parse_model first"); |
open(MODEL_REACTIONS, "<$FIG_Config::global/Models/$genome_id/reaction") or die("Run parse_model first"); |
21 |
open(COMPOUND_OUT, ">$FIG_Config::global/Models/$genome_id/compound_to_kegg"); |
open(COMPOUND_OUT, ">$FIG_Config::global/Models/$genome_id/compound_to_kegg"); |
22 |
open(REAC_OUT, ">$FIG_Config::global/Models/$genome_id/reaction_to_kegg"); |
open(REAC_OUT, ">$FIG_Config::global/Models/$genome_id/reaction_to_kegg"); |
23 |
|
|
|
my %reversibility; |
|
|
|
|
24 |
while (my $line = <MODEL_REACTIONS>) |
while (my $line = <MODEL_REACTIONS>) |
25 |
{ |
{ |
26 |
chomp $line; |
chomp $line; |
48 |
@cids = $fig->ids_of_compound_like_name($cabbrev); |
@cids = $fig->ids_of_compound_like_name($cabbrev); |
49 |
} |
} |
50 |
|
|
51 |
|
$compound_abbrev_2_id{$cabbrev} = []; |
52 |
|
|
53 |
if (@cids > 0) |
if (@cids > 0) |
54 |
{ |
{ |
55 |
foreach my $cid (@cids) |
foreach my $cid (@cids) |
56 |
{ |
{ |
57 |
$compound_id_2_abbrev{$cid} = $cabbrev; |
$compound_id_2_abbrev{$cid} = $cabbrev; |
58 |
|
push @{$compound_abbrev_2_id{$cabbrev}}, $cid; |
59 |
} |
} |
60 |
} |
} |
61 |
} |
} |
81 |
} |
} |
82 |
} |
} |
83 |
|
|
84 |
my %cabbrevs_used_in_reactions; |
our %cabbrevs_used_in_reactions; |
85 |
|
|
86 |
foreach my $rabbrev (keys %reaction_abbrev_2_name) |
foreach my $rabbrev (keys %reaction_abbrev_2_name) |
87 |
{ |
{ |
111 |
print "$rabbrev, $rname, ($reversibility{$rabbrev}): No reaction id matched on KEGG name\n"; |
print "$rabbrev, $rname, ($reversibility{$rabbrev}): No reaction id matched on KEGG name\n"; |
112 |
|
|
113 |
# Now see if the name is associated with an EC number |
# Now see if the name is associated with an EC number |
114 |
my @tmp = `grep -i "$rname" $FIG_Config::data/KEGG/ECtable`; |
my @ec_info = `grep -i "$rname" $FIG_Config::data/KEGG/ECtable`; |
115 |
|
my %check_reactions; |
116 |
|
|
117 |
if (@tmp == 0) |
if (@ec_info == 0) |
118 |
{ |
{ |
119 |
#try removing anything in parentheses to get rid of comments |
#try removing anything in parentheses to get rid of comments |
120 |
my $tmp_name = $rname; |
my $tmp_name = $rname; |
125 |
if ($tmp_name ne $rname) |
if ($tmp_name ne $rname) |
126 |
{ |
{ |
127 |
print "\t Trying $tmp_name\n"; |
print "\t Trying $tmp_name\n"; |
128 |
@tmp = `grep -i "$tmp_name" $FIG_Config::data/KEGG/ECtable`; |
@ec_info = `grep -i "$tmp_name" $FIG_Config::data/KEGG/ECtable`; |
129 |
} |
} |
130 |
} |
} |
131 |
|
|
132 |
if (@tmp > 0) |
if (@ec_info > 0) |
133 |
{ |
{ |
134 |
my %reaction_scores; |
foreach my $ec_line (@ec_info) |
|
|
|
|
ec_loop: foreach my $ec_line (@tmp) |
|
135 |
{ |
{ |
136 |
$ec_line =~ /\s+(\d+\.\d+\.\d+\.\d+)/; |
$ec_line =~ /\s+(\d+\.\d+\.\d+\.\d+)/; |
137 |
my $ec = $1; |
my $ec = $1; |
139 |
my @reactions = $fig->catalyzes($ec); |
my @reactions = $fig->catalyzes($ec); |
140 |
print "\t Found reactions: @reactions\n"; |
print "\t Found reactions: @reactions\n"; |
141 |
|
|
142 |
if (@reactions > 0) |
foreach my $reaction (@reactions) |
143 |
|
{ |
144 |
|
$check_reactions{$reaction} = 1; |
145 |
|
} |
146 |
|
} |
147 |
|
|
148 |
|
} |
149 |
|
else |
150 |
|
{ |
151 |
|
print "\t No EC found\n"; |
152 |
|
|
153 |
|
# try matching on compounds |
154 |
|
my @compounds = keys %{$reaction_2_substrates{$rabbrev}}; |
155 |
|
push @compounds, keys %{$reaction_2_products{$rabbrev}}; |
156 |
|
my %cids; |
157 |
|
foreach my $cabbrev (@compounds) |
158 |
|
{ |
159 |
|
my @cids = @{$compound_abbrev_2_id{$cabbrev}}; |
160 |
|
map { $cids{$_} = 1 } @cids; |
161 |
|
} |
162 |
|
delete @cids{ ('C00001', 'C00002', 'C00003', 'C00004', 'C00005', 'C00006', 'C00007', 'C00008', |
163 |
|
'C00009', 'C00010', 'C00011', 'C00012', 'C00013', 'C00014', 'C00015', 'C00016', |
164 |
|
'C00017', 'C00018', 'C00019', 'C00020', 'C00021', 'C00080') }; |
165 |
|
print "\t Found @{[keys %cids]}\n"; |
166 |
|
|
167 |
|
my %no_reactions; |
168 |
|
|
169 |
|
foreach my $cid (keys %cids) |
170 |
|
{ |
171 |
|
my @rids = $fig->comp2react($cid); |
172 |
|
|
173 |
|
foreach my $rid (@rids) |
174 |
|
{ |
175 |
|
if (! exists $check_reactions{$rid} && ! exists $no_reactions{$rid}) |
176 |
|
{ |
177 |
|
my $main = 0; |
178 |
|
my @tuples = $fig->reaction2comp($rid, 0); |
179 |
|
push @tuples, $fig->reaction2comp($rid, 1); |
180 |
|
|
181 |
|
for my $tuple (@tuples) |
182 |
|
{ |
183 |
|
if (@$tuple[0] eq $cid && @$tuple[2]) |
184 |
|
{ |
185 |
|
$main = 1; |
186 |
|
last; |
187 |
|
} |
188 |
|
} |
189 |
|
|
190 |
|
$main ? $check_reactions{$rid} = 1 : $no_reactions{$rid} = 1; |
191 |
|
} |
192 |
|
} |
193 |
|
} |
194 |
|
} |
195 |
|
|
196 |
|
my %reaction_scores = %{score_reactions($rabbrev, keys %check_reactions)}; |
197 |
|
|
198 |
|
my @reaction_list = sort { $reaction_scores{$a} <=> $reaction_scores{$b} } keys %reaction_scores; |
199 |
|
|
200 |
|
if (@reaction_list > 0) |
201 |
|
{ |
202 |
|
$reaction_abbrev_2_id{$rabbrev} = \@reaction_list; |
203 |
|
print "\t reaction list: @reaction_list\n"; |
204 |
|
|
205 |
|
if (@reaction_list == 1) |
206 |
|
{ |
207 |
|
my $kegg_rev = $fig->reversible($reaction_list[0]); |
208 |
|
if ($kegg_rev == $reversibility{$rabbrev}) |
209 |
|
{ |
210 |
|
print REAC_OUT "$rabbrev\t@reaction_list\t1\n"; |
211 |
|
} |
212 |
|
else |
213 |
|
{ |
214 |
|
print REAC_OUT "$rabbrev\t@reaction_list\t0\n"; |
215 |
|
} |
216 |
|
} |
217 |
|
else |
218 |
|
{ |
219 |
|
print REAC_OUT "$rabbrev\t@reaction_list\n"; |
220 |
|
} |
221 |
|
} |
222 |
|
else |
223 |
|
{ |
224 |
|
print "\t No matching reactions\n"; |
225 |
|
print REAC_OUT "$rabbrev\n"; |
226 |
|
} |
227 |
|
|
228 |
|
} |
229 |
|
} |
230 |
|
|
231 |
|
close(REAC_OUT); |
232 |
|
|
233 |
|
print "\n\nNumber of reactions with matches: ", scalar(keys(%reaction_abbrev_2_id)), "\n"; |
234 |
|
|
235 |
|
foreach my $cabbrev (keys %cabbrevs_used_in_reactions) |
236 |
|
{ |
237 |
|
my @cids = keys %{$cabbrevs_used_in_reactions{$cabbrev}}; |
238 |
|
print COMPOUND_OUT "$cabbrev\t@cids\n"; |
239 |
|
} |
240 |
|
|
241 |
|
sub get_reaction_id |
242 |
|
{ |
243 |
|
my $match_name = shift; |
244 |
|
|
245 |
|
my ($reaction_id, $entry, $name); |
246 |
|
|
247 |
|
open(KEGG_REACTIONS, "<$FIG_Config::data/KEGG/reaction") or die("Couldn't open KEGG reaction file"); |
248 |
|
|
249 |
|
$/ = "\n///\n"; |
250 |
|
while (<KEGG_REACTIONS>) |
251 |
|
{ |
252 |
|
if ($_ =~ /ENTRY\s+(\S+)\s+Reaction\nNAME\s+(\S[^\n]+\n)((\s+(\S[^\n]+\n))*)/s) |
253 |
|
{ |
254 |
|
$reaction_id = $1; |
255 |
|
|
256 |
|
if ($3) |
257 |
|
{ |
258 |
|
$name = "$2 $3"; |
259 |
|
} |
260 |
|
else |
261 |
|
{ |
262 |
|
$name = $2; |
263 |
|
} |
264 |
|
$name =~ s/(\s+\$|\s+)/ /g; |
265 |
|
chop $name; |
266 |
|
|
267 |
|
if ($name eq $match_name) |
268 |
|
{ |
269 |
|
$/ = "\n"; |
270 |
|
return $reaction_id; |
271 |
|
} |
272 |
|
} |
273 |
|
} |
274 |
|
|
275 |
|
$/ = "\n"; |
276 |
|
return; |
277 |
|
} |
278 |
|
|
279 |
|
sub score_reactions |
280 |
{ |
{ |
281 |
|
my $rabbrev = shift @_; |
282 |
|
my %reaction_scores; |
283 |
|
|
284 |
# try to select reaction based on compounds |
# try to select reaction based on compounds |
285 |
foreach $reaction_id (@reactions) |
foreach my $reaction_id (@_) |
286 |
{ |
{ |
287 |
my $kegg_rev = $fig->reversible($reaction_id); |
my $kegg_rev = $fig->reversible($reaction_id); |
288 |
my $better_reversed = 0; |
my $better_reversed = 0; |
432 |
print "\t\t Model and KEGG reactions have different directions\n"; |
print "\t\t Model and KEGG reactions have different directions\n"; |
433 |
} |
} |
434 |
|
|
435 |
last ec_loop; |
last; |
436 |
} |
} |
437 |
else |
else |
438 |
{ |
{ |
439 |
$reaction_scores{$reaction_id} = $score; |
$reaction_scores{$reaction_id} = $score; |
440 |
} |
} |
441 |
} |
} |
|
} |
|
|
} |
|
442 |
|
|
443 |
my @reaction_list = sort { $reaction_scores{$a} <=> $reaction_scores{$b} } keys %reaction_scores; |
return \%reaction_scores; |
|
|
|
|
if (@reaction_list > 0) |
|
|
{ |
|
|
$reaction_abbrev_2_id{$rabbrev} = \@reaction_list; |
|
|
print "\t reaction list: @reaction_list\n"; |
|
|
|
|
|
if (@reaction_list == 1) |
|
|
{ |
|
|
my $kegg_rev = $fig->reversible($reaction_list[0]); |
|
|
if ($kegg_rev == $reversibility{$rabbrev}) |
|
|
{ |
|
|
print REAC_OUT "$rabbrev\t@reaction_list\t1\n"; |
|
|
} |
|
|
else |
|
|
{ |
|
|
print REAC_OUT "$rabbrev\t@reaction_list\t0\n"; |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
print REAC_OUT "$rabbrev\t@reaction_list\n"; |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
print "\t No matching reactions\n"; |
|
|
print REAC_OUT "$rabbrev\n"; |
|
|
} |
|
|
} |
|
|
else |
|
|
{ |
|
|
print "\t No EC found\n"; |
|
|
print REAC_OUT "$rabbrev\n"; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
close(REAC_OUT); |
|
|
|
|
|
print "\n\nNumber of reactions with matches: ", scalar(keys(%reaction_abbrev_2_id)), "\n"; |
|
|
|
|
|
foreach my $cabbrev (keys %cabbrevs_used_in_reactions) |
|
|
{ |
|
|
my @cids = keys %{$cabbrevs_used_in_reactions{$cabbrev}}; |
|
|
print COMPOUND_OUT "$cabbrev\t@cids\n"; |
|
|
} |
|
|
|
|
|
sub get_reaction_id |
|
|
{ |
|
|
my $match_name = shift; |
|
|
|
|
|
my ($reaction_id, $entry, $name); |
|
|
|
|
|
open(KEGG_REACTIONS, "<$FIG_Config::data/KEGG/reaction") or die("Couldn't open KEGG reaction file"); |
|
|
|
|
|
$/ = "\n///\n"; |
|
|
while (<KEGG_REACTIONS>) |
|
|
{ |
|
|
if ($_ =~ /ENTRY\s+(\S+)\s+Reaction\nNAME\s+(\S[^\n]+\n)((\s+(\S[^\n]+\n))*)/s) |
|
|
{ |
|
|
$reaction_id = $1; |
|
|
|
|
|
if ($3) |
|
|
{ |
|
|
$name = "$2 $3"; |
|
|
} |
|
|
else |
|
|
{ |
|
|
$name = $2; |
|
|
} |
|
|
$name =~ s/(\s+\$|\s+)/ /g; |
|
|
chop $name; |
|
|
|
|
|
if ($name eq $match_name) |
|
|
{ |
|
|
$/ = "\n"; |
|
|
return $reaction_id; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
$/ = "\n"; |
|
|
return; |
|
444 |
} |
} |