[Bio] / DeJonghStuff / parse_model.pl Repository:
ViewVC logotype

Annotation of /DeJonghStuff/parse_model.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : dejongh 1.1 # -*- perl -*-
2 :    
3 :     ###########################################
4 : dejongh 1.2 # usage: parse_model <genome_id> <model_id> <kegg organism id>
5 : dejongh 1.1 # models for a given genome should be stored in Data/Global/Models/<genome_id>/
6 :     # models should be prepended with the model id (e.g., iSB615). Three files are expected:
7 :     # ${model_id}-metabolites.txt: contains two-column, tab-separated file with compound ids
8 :     # in the first column and compound names in the second column
9 :     # ${model_id}-reactions.txt: contains three-column, tab-separated file with reaction ids
10 :     # in the first column, reaction names in the second column, and reaction equations in
11 :     # the third column
12 :     # ${model_id}-gene-reaction-assoc.txt: contains two-column, tab-separated file with
13 :     # reaction ids in the first column and list of gene ids in the second column
14 :    
15 :     use strict;
16 :     use FIG;
17 :    
18 : dejongh 1.2 my ($genome_id, $model_id, $kegg_org_id) = @ARGV;
19 :    
20 :     unless ($genome_id && $model_id && $kegg_org_id)
21 :     {
22 :     print "usage: parse_model <genome_id> <model_id> <kegg organism id>\n";
23 :     exit(-1);
24 :     }
25 :    
26 : dejongh 1.1 my $fig = new FIG;
27 :    
28 :     open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt")
29 :     or die("Couldn't open $genome_id/${model_id}-metabolites.txt");
30 :     open(MODEL_REACTIONS, "<$FIG_Config::global/Models/$genome_id/${model_id}-reactions.txt")
31 :     or die("Couldn't open $genome_id/${model_id}-reactions.txt");
32 :     open(MODEL_GRA, "<$FIG_Config::global/Models/$genome_id/${model_id}-gene-reaction-assoc.txt")
33 :     or die("Couldn't open $genome_id/${model_id}-gene-reaction-assoc.txt");
34 :     open(COMPOUND, ">$FIG_Config::global/Models/$genome_id/compound");
35 :     open(COMPOUND_NAME, ">$FIG_Config::global/Models/$genome_id/compound_name");
36 :     open(REACTION_TO_ROLE, ">$FIG_Config::global/Models/$genome_id/reaction_to_role");
37 :     open(REACTION, ">$FIG_Config::global/Models/$genome_id/reaction");
38 :     open(REACTION_TO_COMPOUND, ">$FIG_Config::global/Models/$genome_id/reaction_to_compound");
39 :     open(GENE_TO_ROLE, ">$FIG_Config::global/Models/$genome_id/gene_to_role");
40 :     open(GENE_TO_REACTION, ">$FIG_Config::global/Models/$genome_id/gene_to_reaction");
41 :     open(GENE_TO_PEG, ">$FIG_Config::global/Models/$genome_id/gene_to_peg");
42 :    
43 :     my (%compound_abbrev_2_name, %reaction_abbrev_2_role);
44 :    
45 :     while (my $metabolite = <MODEL_METABOLITES>)
46 :     {
47 :     chomp($metabolite);
48 :     if (my ($abbrev, $name) = split /\t/, $metabolite)
49 :     {
50 :     $abbrev =~ s/\"//g;
51 :     $name =~ s/\"//g;
52 :     $name =~ s/'/\\'/g;
53 :    
54 :     if (! defined($compound_abbrev_2_name{$abbrev}))
55 :     {
56 :     $compound_abbrev_2_name{$abbrev} = $name;
57 :     print COMPOUND "$abbrev\n";
58 :     print COMPOUND_NAME "$abbrev\t1\t$compound_abbrev_2_name{$abbrev}\n";
59 :     }
60 :     else
61 :     {
62 :     print "Warning, found multiple names for $abbrev\n";
63 :     }
64 :     }
65 :     else
66 :     {
67 :     print "Can't parse metabolite: $metabolite\n";
68 :     }
69 :     }
70 :    
71 :     close(COMPOUND);
72 :     close(COMPOUND_NAME);
73 :    
74 :     while (my $reaction = <MODEL_REACTIONS>)
75 :     {
76 :     chomp($reaction);
77 :    
78 :     if (my ($rabbrev, $rname, $equation) = split /\t/, $reaction)
79 :     {
80 :     $rabbrev =~ s/\"//g;
81 :     $rname =~ s/\"//g;
82 :     $equation =~ s/\"//g;
83 :    
84 :     if (! defined($reaction_abbrev_2_role{$rabbrev}))
85 :     {
86 :     $reaction_abbrev_2_role{$rabbrev} = $rname;
87 :     print REACTION_TO_ROLE "$rabbrev\t$rname\n";
88 :     }
89 :     else
90 :     {
91 :     print "Warning, found multiple names for $rabbrev\n";
92 :     }
93 :    
94 :     my ($loc, $substrate_list, $direction, $product_list);
95 :    
96 :     if ($equation =~ /^\[(.)\]( : )?(.*)(-->|<==>)(.*)/)
97 :     {
98 :     $loc = $1;
99 :     ($substrate_list, $direction, $product_list) = ($3, $4, $5);
100 :     }
101 :     elsif ($equation =~ /(.*)(-->|<==>)(.*)/)
102 :     {
103 :     $loc = "";
104 :     ($substrate_list, $direction, $product_list) = ($1, $2, $3);
105 :     }
106 :     else
107 :     {
108 :     print "Warning, unparseable equation: $equation\n";
109 :     next;
110 :     }
111 :    
112 :     if ($direction eq "-->")
113 :     {
114 :     print REACTION "$rabbrev\t0\n";
115 :     }
116 :     else
117 :     {
118 :     print REACTION "$rabbrev\t1\n";
119 :     }
120 :    
121 :     $substrate_list =~ s/\s//g;
122 :     $product_list =~ s/\s//g;
123 :    
124 :     for (my $setn = 1; $setn <= 2; $setn++)
125 :     {
126 :     my $compound_list = ($setn == 1 ? $substrate_list : $product_list);
127 :    
128 :     foreach my $cabbrev_info (split /\+/, $compound_list)
129 :     {
130 :     my ($cid, $stoich, $cloc);
131 :    
132 :     if ($cabbrev_info =~ /(.+)\[(.)\]$/)
133 :     {
134 :     $cabbrev_info = $1;
135 :     $cloc = $2;
136 :     }
137 :     else
138 :     {
139 :     $cloc = $loc;
140 :     }
141 :    
142 :     if ($cabbrev_info =~ /\((\d.*)\)(.+)/)
143 :     {
144 :     $stoich = $1;
145 :     $cid = $2;
146 :     }
147 :     else
148 :     {
149 :     $stoich = "1";
150 :     $cid = $cabbrev_info;
151 :     }
152 :    
153 :     if (defined($compound_abbrev_2_name{$cid}))
154 :     {
155 :     print REACTION_TO_COMPOUND "$rabbrev\t$cid\t$setn\t$stoich\t\t$cloc\n";
156 :     }
157 :     else
158 :     {
159 :     print "Warning, undefined compound $cid in reaction $equation\n";
160 :     }
161 :     }
162 :     }
163 :     }
164 :     else
165 :     {
166 :     print "Warning, unparseable reaction: $reaction\n";
167 :     }
168 :     }
169 :    
170 :     close(REACTION);
171 :     close(REACTION_TO_ROLE);
172 :     close(REACTION_TO_COMPOUND);
173 :    
174 : dejongh 1.2 # use hash for tracking gene/peg and gene/reaction correspondences since genes occur in multiple reactions
175 :     my (%gene_2_pegs, %gene_2_reactions);
176 : dejongh 1.1
177 :     while (<MODEL_GRA>)
178 :     {
179 :     my ($rabbrev, $genes) = split "\t", $_;
180 :     my @rids;
181 :    
182 :     if (defined $reaction_abbrev_2_role{$rabbrev})
183 :     {
184 : dejongh 1.2 chomp $genes;
185 :     $genes =~ s/\(//g;
186 :     $genes =~ s/\)//g;
187 :     $genes =~ s/and//gi;
188 :     $genes =~ s/or//gi;
189 :     $genes =~ s/^\s+//;
190 :     $genes =~ s/\s+$//;
191 :    
192 :     foreach my $gene (split /\s+/ , $genes)
193 : dejongh 1.1 {
194 : dejongh 1.2 print "Checking $gene ...\n";
195 :     push @{$gene_2_reactions{$gene}}, $rabbrev;
196 : dejongh 1.1 my $rname = $reaction_abbrev_2_role{$rabbrev};
197 :     print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant
198 :    
199 : dejongh 1.2 if (! defined($gene_2_pegs{$gene}))
200 : dejongh 1.1 {
201 : dejongh 1.2 $gene_2_pegs{$gene} = ();
202 :     # search on kegg gene identifiers because the seem to limit results to the correct peg
203 :     my ($peg_index_data,undef) = $fig->search_index("$kegg_org_id:".$gene);
204 : dejongh 1.1
205 :     foreach my $peg (map { $_->[0] } @$peg_index_data)
206 :     {
207 :     if ($peg =~ /$genome_id/ )
208 :     {
209 : dejongh 1.2 push @{$gene_2_pegs{$gene}}, $peg;
210 : dejongh 1.1 }
211 :     }
212 :     }
213 :     }
214 :     }
215 :     else
216 :     {
217 :     print "Warning, unknown reaction abbreviation in GRA file: $rabbrev\n";
218 :     }
219 :     }
220 :    
221 : dejongh 1.2 foreach my $gene (keys %gene_2_pegs)
222 :     {
223 :     if (defined $gene_2_pegs{$gene})
224 :     {
225 :     print GENE_TO_PEG "$gene\t@{$gene_2_pegs{$gene}}\n";
226 :     }
227 :     else
228 :     {
229 :     print "Can't find peg for $gene\n";
230 :     }
231 :     }
232 :    
233 :     foreach my $gene (keys %gene_2_reactions)
234 :     {
235 :     print GENE_TO_REACTION "$gene\t@{$gene_2_reactions{$gene}}\n";
236 :     }
237 :    
238 : dejongh 1.1 close(GENE_TO_REACTION);
239 :     close(GENE_TO_ROLE);
240 :     close(GENE_TO_PEG);
241 :    
242 :     exit;
243 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3