1 |
# -*- perl -*- |
# -*- perl -*- |
2 |
|
|
3 |
########################################### |
########################################### |
4 |
# usage: parse_model <genome_id> <model_id> |
# usage: parse_model <genome_id> <model_id> <kegg organism id> |
5 |
# models for a given genome should be stored in Data/Global/Models/<genome_id>/ |
# models for a given genome should be stored in Data/Global/Models/<genome_id>/ |
6 |
# models should be prepended with the model id (e.g., iSB615). Three files are expected: |
# models should be prepended with the model id (e.g., iSB615). Three files are expected: |
7 |
# ${model_id}-metabolites.txt: contains two-column, tab-separated file with compound ids |
# ${model_id}-metabolites.txt: contains two-column, tab-separated file with compound ids |
15 |
use strict; |
use strict; |
16 |
use FIG; |
use FIG; |
17 |
|
|
18 |
my ($genome_id, $model_id) = @ARGV; |
my ($genome_id, $model_id, $kegg_org_id) = @ARGV; |
19 |
|
|
20 |
|
unless ($genome_id && $model_id && $kegg_org_id) |
21 |
|
{ |
22 |
|
print "usage: parse_model <genome_id> <model_id> <kegg organism id>\n"; |
23 |
|
exit(-1); |
24 |
|
} |
25 |
|
|
26 |
my $fig = new FIG; |
my $fig = new FIG; |
27 |
|
|
28 |
open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt") |
open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt") |
171 |
close(REACTION_TO_ROLE); |
close(REACTION_TO_ROLE); |
172 |
close(REACTION_TO_COMPOUND); |
close(REACTION_TO_COMPOUND); |
173 |
|
|
174 |
# extra - use hash for tracking gene/peg correspondences since genes occur in multiple reactions |
# use hash for tracking gene/peg and gene/reaction correspondences since genes occur in multiple reactions |
175 |
my %gene_2_peg; |
my (%gene_2_pegs, %gene_2_reactions); |
176 |
|
|
177 |
while (<MODEL_GRA>) |
while (<MODEL_GRA>) |
178 |
{ |
{ |
181 |
|
|
182 |
if (defined $reaction_abbrev_2_role{$rabbrev}) |
if (defined $reaction_abbrev_2_role{$rabbrev}) |
183 |
{ |
{ |
184 |
while ($genes =~ /(SA\d\d\d\d)/g) |
chomp $genes; |
185 |
{ |
$genes =~ s/\(//g; |
186 |
my $gene = $1; |
$genes =~ s/\)//g; |
187 |
|
$genes =~ s/and//gi; |
188 |
|
$genes =~ s/or//gi; |
189 |
|
$genes =~ s/^\s+//; |
190 |
|
$genes =~ s/\s+$//; |
191 |
|
|
192 |
print GENE_TO_REACTION "$gene\t$rabbrev\n"; |
foreach my $gene (split /\s+/ , $genes) |
193 |
|
{ |
194 |
|
print "Checking $gene ...\n"; |
195 |
|
push @{$gene_2_reactions{$gene}}, $rabbrev; |
196 |
my $rname = $reaction_abbrev_2_role{$rabbrev}; |
my $rname = $reaction_abbrev_2_role{$rabbrev}; |
197 |
print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant |
print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant |
198 |
|
|
199 |
# extra - look for pegs based on gene |
if (! defined($gene_2_pegs{$gene})) |
|
if (! defined($gene_2_peg{$gene})) |
|
200 |
{ |
{ |
201 |
my $peg_count = 0; |
$gene_2_pegs{$gene} = (); |
202 |
my ($peg_index_data,undef) = $fig->search_index($gene); |
# search on kegg gene identifiers because the seem to limit results to the correct peg |
203 |
|
my ($peg_index_data,undef) = $fig->search_index("$kegg_org_id:".$gene); |
204 |
|
|
205 |
foreach my $peg (map { $_->[0] } @$peg_index_data) |
foreach my $peg (map { $_->[0] } @$peg_index_data) |
206 |
{ |
{ |
207 |
if ($peg =~ /$genome_id/ ) |
if ($peg =~ /$genome_id/ ) |
208 |
{ |
{ |
209 |
$peg_count++; |
push @{$gene_2_pegs{$gene}}, $peg; |
|
print GENE_TO_PEG "$gene\t$peg\t$peg_count\n"; |
|
210 |
} |
} |
211 |
} |
} |
|
|
|
|
if ($peg_count == 0) |
|
|
{ |
|
|
print "Warning, no pegs found for gene $gene\n"; |
|
|
} |
|
|
|
|
|
$gene_2_peg{$gene} = 1; |
|
212 |
} |
} |
213 |
} |
} |
214 |
} |
} |
218 |
} |
} |
219 |
} |
} |
220 |
|
|
221 |
|
foreach my $gene (keys %gene_2_pegs) |
222 |
|
{ |
223 |
|
if (defined $gene_2_pegs{$gene}) |
224 |
|
{ |
225 |
|
print GENE_TO_PEG "$gene\t@{$gene_2_pegs{$gene}}\n"; |
226 |
|
} |
227 |
|
else |
228 |
|
{ |
229 |
|
print "Can't find peg for $gene\n"; |
230 |
|
} |
231 |
|
} |
232 |
|
|
233 |
|
foreach my $gene (keys %gene_2_reactions) |
234 |
|
{ |
235 |
|
print GENE_TO_REACTION "$gene\t@{$gene_2_reactions{$gene}}\n"; |
236 |
|
} |
237 |
|
|
238 |
close(GENE_TO_REACTION); |
close(GENE_TO_REACTION); |
239 |
close(GENE_TO_ROLE); |
close(GENE_TO_ROLE); |
240 |
close(GENE_TO_PEG); |
close(GENE_TO_PEG); |