[Bio] / DeJonghStuff / parse_model.pl Repository:
ViewVC logotype

Annotation of /DeJonghStuff/parse_model.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : dejongh 1.1 # -*- perl -*-
2 :    
3 :     ###########################################
4 :     # usage: parse_model <genome_id> <model_id>
5 :     # models for a given genome should be stored in Data/Global/Models/<genome_id>/
6 :     # models should be prepended with the model id (e.g., iSB615). Three files are expected:
7 :     # ${model_id}-metabolites.txt: contains two-column, tab-separated file with compound ids
8 :     # in the first column and compound names in the second column
9 :     # ${model_id}-reactions.txt: contains three-column, tab-separated file with reaction ids
10 :     # in the first column, reaction names in the second column, and reaction equations in
11 :     # the third column
12 :     # ${model_id}-gene-reaction-assoc.txt: contains two-column, tab-separated file with
13 :     # reaction ids in the first column and list of gene ids in the second column
14 :    
15 :     use strict;
16 :     use FIG;
17 :    
18 :     my ($genome_id, $model_id) = @ARGV;
19 :     my $fig = new FIG;
20 :    
21 :     open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt")
22 :     or die("Couldn't open $genome_id/${model_id}-metabolites.txt");
23 :     open(MODEL_REACTIONS, "<$FIG_Config::global/Models/$genome_id/${model_id}-reactions.txt")
24 :     or die("Couldn't open $genome_id/${model_id}-reactions.txt");
25 :     open(MODEL_GRA, "<$FIG_Config::global/Models/$genome_id/${model_id}-gene-reaction-assoc.txt")
26 :     or die("Couldn't open $genome_id/${model_id}-gene-reaction-assoc.txt");
27 :     open(COMPOUND, ">$FIG_Config::global/Models/$genome_id/compound");
28 :     open(COMPOUND_NAME, ">$FIG_Config::global/Models/$genome_id/compound_name");
29 :     open(REACTION_TO_ROLE, ">$FIG_Config::global/Models/$genome_id/reaction_to_role");
30 :     open(REACTION, ">$FIG_Config::global/Models/$genome_id/reaction");
31 :     open(REACTION_TO_COMPOUND, ">$FIG_Config::global/Models/$genome_id/reaction_to_compound");
32 :     open(GENE_TO_ROLE, ">$FIG_Config::global/Models/$genome_id/gene_to_role");
33 :     open(GENE_TO_REACTION, ">$FIG_Config::global/Models/$genome_id/gene_to_reaction");
34 :     open(GENE_TO_PEG, ">$FIG_Config::global/Models/$genome_id/gene_to_peg");
35 :    
36 :     my (%compound_abbrev_2_name, %reaction_abbrev_2_role);
37 :    
38 :     while (my $metabolite = <MODEL_METABOLITES>)
39 :     {
40 :     chomp($metabolite);
41 :     if (my ($abbrev, $name) = split /\t/, $metabolite)
42 :     {
43 :     $abbrev =~ s/\"//g;
44 :     $name =~ s/\"//g;
45 :     $name =~ s/'/\\'/g;
46 :    
47 :     if (! defined($compound_abbrev_2_name{$abbrev}))
48 :     {
49 :     $compound_abbrev_2_name{$abbrev} = $name;
50 :     print COMPOUND "$abbrev\n";
51 :     print COMPOUND_NAME "$abbrev\t1\t$compound_abbrev_2_name{$abbrev}\n";
52 :     }
53 :     else
54 :     {
55 :     print "Warning, found multiple names for $abbrev\n";
56 :     }
57 :     }
58 :     else
59 :     {
60 :     print "Can't parse metabolite: $metabolite\n";
61 :     }
62 :     }
63 :    
64 :     close(COMPOUND);
65 :     close(COMPOUND_NAME);
66 :    
67 :     while (my $reaction = <MODEL_REACTIONS>)
68 :     {
69 :     chomp($reaction);
70 :    
71 :     if (my ($rabbrev, $rname, $equation) = split /\t/, $reaction)
72 :     {
73 :     $rabbrev =~ s/\"//g;
74 :     $rname =~ s/\"//g;
75 :     $equation =~ s/\"//g;
76 :    
77 :     if (! defined($reaction_abbrev_2_role{$rabbrev}))
78 :     {
79 :     $reaction_abbrev_2_role{$rabbrev} = $rname;
80 :     print REACTION_TO_ROLE "$rabbrev\t$rname\n";
81 :     }
82 :     else
83 :     {
84 :     print "Warning, found multiple names for $rabbrev\n";
85 :     }
86 :    
87 :     my ($loc, $substrate_list, $direction, $product_list);
88 :    
89 :     if ($equation =~ /^\[(.)\]( : )?(.*)(-->|<==>)(.*)/)
90 :     {
91 :     $loc = $1;
92 :     ($substrate_list, $direction, $product_list) = ($3, $4, $5);
93 :     }
94 :     elsif ($equation =~ /(.*)(-->|<==>)(.*)/)
95 :     {
96 :     $loc = "";
97 :     ($substrate_list, $direction, $product_list) = ($1, $2, $3);
98 :     }
99 :     else
100 :     {
101 :     print "Warning, unparseable equation: $equation\n";
102 :     next;
103 :     }
104 :    
105 :     if ($direction eq "-->")
106 :     {
107 :     print REACTION "$rabbrev\t0\n";
108 :     }
109 :     else
110 :     {
111 :     print REACTION "$rabbrev\t1\n";
112 :     }
113 :    
114 :     $substrate_list =~ s/\s//g;
115 :     $product_list =~ s/\s//g;
116 :    
117 :     for (my $setn = 1; $setn <= 2; $setn++)
118 :     {
119 :     my $compound_list = ($setn == 1 ? $substrate_list : $product_list);
120 :    
121 :     foreach my $cabbrev_info (split /\+/, $compound_list)
122 :     {
123 :     my ($cid, $stoich, $cloc);
124 :    
125 :     if ($cabbrev_info =~ /(.+)\[(.)\]$/)
126 :     {
127 :     $cabbrev_info = $1;
128 :     $cloc = $2;
129 :     }
130 :     else
131 :     {
132 :     $cloc = $loc;
133 :     }
134 :    
135 :     if ($cabbrev_info =~ /\((\d.*)\)(.+)/)
136 :     {
137 :     $stoich = $1;
138 :     $cid = $2;
139 :     }
140 :     else
141 :     {
142 :     $stoich = "1";
143 :     $cid = $cabbrev_info;
144 :     }
145 :    
146 :     if (defined($compound_abbrev_2_name{$cid}))
147 :     {
148 :     print REACTION_TO_COMPOUND "$rabbrev\t$cid\t$setn\t$stoich\t\t$cloc\n";
149 :     }
150 :     else
151 :     {
152 :     print "Warning, undefined compound $cid in reaction $equation\n";
153 :     }
154 :     }
155 :     }
156 :     }
157 :     else
158 :     {
159 :     print "Warning, unparseable reaction: $reaction\n";
160 :     }
161 :     }
162 :    
163 :     close(REACTION);
164 :     close(REACTION_TO_ROLE);
165 :     close(REACTION_TO_COMPOUND);
166 :    
167 :     # extra - use hash for tracking gene/peg correspondences since genes occur in multiple reactions
168 :     my %gene_2_peg;
169 :    
170 :     while (<MODEL_GRA>)
171 :     {
172 :     my ($rabbrev, $genes) = split "\t", $_;
173 :     my @rids;
174 :    
175 :     if (defined $reaction_abbrev_2_role{$rabbrev})
176 :     {
177 :     while ($genes =~ /(SA\d\d\d\d)/g)
178 :     {
179 :     my $gene = $1;
180 :    
181 :     print GENE_TO_REACTION "$gene\t$rabbrev\n";
182 :     my $rname = $reaction_abbrev_2_role{$rabbrev};
183 :     print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant
184 :    
185 :     # extra - look for pegs based on gene
186 :     if (! defined($gene_2_peg{$gene}))
187 :     {
188 :     my $peg_count = 0;
189 :     my ($peg_index_data,undef) = $fig->search_index($gene);
190 :    
191 :     foreach my $peg (map { $_->[0] } @$peg_index_data)
192 :     {
193 :     if ($peg =~ /$genome_id/ )
194 :     {
195 :     $peg_count++;
196 :     print GENE_TO_PEG "$gene\t$peg\t$peg_count\n";
197 :     }
198 :     }
199 :    
200 :     if ($peg_count == 0)
201 :     {
202 :     print "Warning, no pegs found for gene $gene\n";
203 :     }
204 :    
205 :     $gene_2_peg{$gene} = 1;
206 :     }
207 :     }
208 :     }
209 :     else
210 :     {
211 :     print "Warning, unknown reaction abbreviation in GRA file: $rabbrev\n";
212 :     }
213 :     }
214 :    
215 :     close(GENE_TO_REACTION);
216 :     close(GENE_TO_ROLE);
217 :     close(GENE_TO_PEG);
218 :    
219 :     exit;
220 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3