[Bio] / Sprout / ModelSaplingLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/ModelSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 : parrello 1.13 #use FIGMODEL;
20 : parrello 1.1
21 :     package ModelSaplingLoader;
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use base 'BaseSaplingLoader';
26 :    
27 : parrello 1.14 =head1 Sapling Model Load Group Class
28 : parrello 1.1
29 :     =head2 Introduction
30 :    
31 : parrello 1.14 The Model Load Group includes a small set of tables that describe reactions and compounds
32 :     and how they relate to the models in the main model database.
33 : parrello 1.1
34 :     =head3 new
35 :    
36 : parrello 1.20 my $sl = ModelSaplingLoader->new($erdb, $options);
37 : parrello 1.1
38 : parrello 1.14 Construct a new ModelSaplingLoader object.
39 : parrello 1.1
40 :     =over 4
41 :    
42 :     =item erdb
43 :    
44 : parrello 1.4 L<Sapling> object for the database being loaded.
45 : parrello 1.1
46 :     =item options
47 :    
48 :     Reference to a hash of command-line options.
49 :    
50 :     =back
51 :    
52 :     =cut
53 :    
54 :     sub new {
55 :     # Get the parameters.
56 :     my ($class, $erdb, $options) = @_;
57 :     # Create the table list.
58 : parrello 1.14 my @tables = qw(Compound Reaction EcNumber Model Media IsTriggeredBy
59 :     IsCategorizedInto IsConsistentWith IsModeledBy Involves
60 : parrello 1.18 IsRequiredBy Complex IsSetOf IsExemplarOf);
61 : parrello 1.1 # Create the BaseSaplingLoader object.
62 :     my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
63 : parrello 1.14 # Create the reaction tracking hash.
64 :     $retVal->{reactions} = {};
65 : parrello 1.1 # Return it.
66 :     return $retVal;
67 :     }
68 :    
69 :     =head2 Public Methods
70 :    
71 :     =head3 Generate
72 :    
73 :     $sl->Generate();
74 :    
75 : parrello 1.14 Generate the data for the model files.
76 : parrello 1.1
77 :     =cut
78 :    
79 :     sub Generate {
80 :     # Get the parameters.
81 :     my ($self) = @_;
82 :     # Get the database object.
83 :     my $erdb = $self->db();
84 :     # Is this the global section?
85 :     if ($self->global()) {
86 : parrello 1.14 # Load the tables from the model dump files.
87 :     $self->LoadModelFiles();
88 : parrello 1.1 } else {
89 :     # Get the section ID.
90 :     my $genomeID = $self->section();
91 :     #NO GENOME SPECIFIC MODEL STUFF
92 :     }
93 :     }
94 :    
95 : parrello 1.14 =head3 LoadModelFiles
96 : parrello 1.1
97 : parrello 1.14 $sl->LoadModelFiles();
98 : parrello 1.1
99 : parrello 1.14 Load the data from the six model dump files.
100 :    
101 :     =cut
102 :    
103 : parrello 1.15 # hash of ubiquitous compounds.
104 :     use constant UBIQUITOUS => {
105 : parrello 1.16 cpd00001 => 'OH-',
106 : parrello 1.15 cpd00002 => 'ATP',
107 : parrello 1.16 cpd00003 => 'Nicotinamideadeninedinucleotide',
108 :     cpd00004 => 'Nicotinamideadeninedinucleotide-reduced',
109 :     cpd00005 => 'Nicotinamideadeninedinucleotidephosphate-reduced',
110 :     cpd00006 => 'Nicotinamideadeninedinucleotidephosphate',
111 :     cpd00007 => 'Oxygen',
112 : parrello 1.15 cpd00008 => 'ADP',
113 : parrello 1.16 cpd00009 => 'Orthophosphoric acid',
114 : parrello 1.15 cpd00010 => 'CoenzymeA',
115 : parrello 1.16 cpd00011 => 'Carbon dioxide',
116 : parrello 1.15 cpd00012 => 'PPi',
117 : parrello 1.16 cpd00018 => 'AMP',
118 :     cpd00020 => 'Pyruvic Acid',
119 : parrello 1.15 cpd00022 => 'Acetyl-CoA',
120 : parrello 1.16 cpd00025 => 'Hydrogen peroxide',
121 :     cpd00067 => 'H+',
122 : parrello 1.15 cpd00971 => 'Sodium',
123 :     cpd15352 => '2-Demethylmenaquinone',
124 : parrello 1.16 cpd15353 => '2-Demethylmenaquinol',
125 :     cpd15499 => 'Menaquinol',
126 :     cpd15500 => 'Menaquinone',
127 :     cpd15560 => 'Ubiquinone-8',
128 :     cpd15561 => 'Ubiquinol-8',
129 : parrello 1.15 };
130 :    
131 : parrello 1.14 sub LoadModelFiles {
132 :     # Get the parameters.
133 :     my ($self) = @_;
134 : parrello 1.19 # Get the sapling database.
135 :     my $erdb = $self->db();
136 : parrello 1.14 # Get the model dump file directory.
137 : parrello 1.19 my $dir = $erdb->LoadDirectory() . "/models";
138 : parrello 1.14 # First we read the compounds.
139 :     my $ih = $self->CheckFile("$dir/CompoundName.txt", qw(CompoundID Name));
140 :     while (! eof $ih) {
141 :     # Get the next compound.
142 :     my ($id, $label) = $self->ReadLine($ih);
143 :     # Create a compound record for it.
144 : parrello 1.15 $self->PutE(Compound => $id, label => $label, ubiquitous => (UBIQUITOUS->{$id} ? 1 : 0));
145 : parrello 1.14 }
146 :     # Next, the compound-reactions relationship. We create the reactions here, too.
147 :     $ih = $self->CheckFile("$dir/CompoundReaction.txt", qw(CompoundID ReactionID
148 : parrello 1.15 Stoichiometry Cofactor));
149 : parrello 1.14 while (! eof $ih) {
150 :     # Get the next link.
151 : parrello 1.15 my ($compound, $reaction, $stoich, $cofactor) = $self->ReadLine($ih);
152 : parrello 1.14 # Insure the reaction exists.
153 :     $self->CreateReaction($reaction);
154 :     # Check for product or substrate.
155 :     my $product;
156 :     if ($stoich < 0) {
157 :     $product = 0;
158 :     $stoich = -$stoich;
159 :     } else {
160 :     $product = 1;
161 :     }
162 :     # Connect the reaction to the compound.
163 :     $self->PutR(Involves => $reaction, $compound, product => $product,
164 : parrello 1.15 stoichiometry => $stoich, cofactor => $cofactor);
165 : parrello 1.14 }
166 : parrello 1.20 # Before we go on, we need to get a map of the modelSEED role IDs to
167 :     # the SEED role IDs. This is found in the Role.txt file, along with the
168 :     # exemplar data.
169 :     my %roleHash;
170 :     $ih = $self->CheckFile("$dir/Role.txt", qw(RoleID Name ExemplarID));
171 :     while (! eof $ih) {
172 :     # Get the next role's record.
173 :     my ($roleID, $role, $exemplarList) = $self->ReadLine($ih);
174 :     # Map the role ID to the role name (which is the SEED's ID).
175 :     $roleHash{$roleID} = $role;
176 :     # If there is are exemplars, store them.
177 :     if ($exemplarList && $exemplarList ne 'NONE') {
178 :     for my $exemplar (split /\s*,\s*/, $exemplarList) {
179 :     $self->PutR(IsExemplarOf => $exemplar, $role);
180 :     }
181 :     }
182 :     }
183 :     # The next step is to create the complexes. We load into memory a
184 :     # hash mapping the complexes to their reactions. This is later
185 :     # used to insure we have reaction-to-role coverage.
186 :     my %cpxHash;
187 :     $ih = $self->CheckFile("$dir/ReactionComplex.txt", qw(ReactionID ComplexID));
188 :     while (! eof $ih) {
189 :     # Get the next reaction/complex pair.
190 :     my ($rxn, $cpx) = $self->ReadLine($ih);
191 :     # Is this a new complex?
192 :     if (! exists $cpxHash{$cpx}) {
193 :     # Yes. Create a record for it.
194 :     $self->PutE(Complex => $cpx);
195 :     $cpxHash{$cpx} = [];
196 :     }
197 :     # Insure the reaction exists.
198 :     $self->CreateReaction($rxn);
199 :     # Connect the complex to the reaction.
200 :     $self->PutR(IsSetOf => $cpx, $rxn);
201 :     push @{$cpxHash{$cpx}}, $rxn;
202 :     }
203 :     # Here we connect the complexes to the roles. Along the way, we
204 :     # create a hash listing of all of a reaction's roles. That hash
205 :     # will be used to check for missing reaction/role links later on.
206 :     my %rxnHash;
207 :     $ih = $self->CheckFile("$dir/ComplexRole.txt", qw(RoleID ComplexID));
208 :     while (! eof $ih) {
209 :     # Get the next role/complex pair.
210 :     my ($roleID, $cpx) = $self->ReadLine($ih);
211 :     # Connect the role to the complex.
212 :     $self->PutR(IsTriggeredBy => $cpx, $roleHash{$roleID}, optional => 0);
213 :     # Denote that this role is connected to the complex's reactions.
214 :     for my $rxn (@{$cpxHash{$cpx}}) {
215 :     push @{$rxnHash{$rxn}}, $roleID;
216 :     }
217 :     }
218 :     # We don't need the complex hash any more. Instead, we're going to
219 :     # use it to track single-reaction complexes we create.
220 :     %cpxHash = ();
221 :     # Now we fill in the missing reaction-to-role connections.
222 : parrello 1.14 $ih = $self->CheckFile("$dir/ReactionRole.txt", qw(ReactionID Role));
223 :     while (! eof $ih) {
224 : parrello 1.20 # Get the next reaction/role pair.
225 : parrello 1.14 my ($reaction, $role) = $self->ReadLine($ih);
226 :     # Insure the reaction exists.
227 :     $self->CreateReaction($reaction);
228 : parrello 1.20 # Is this reaction already connected to this role?
229 :     my $roleList = $rxnHash{$reaction};
230 :     if (! $roleList || ! (grep { $roleHash{$_} eq $role } @{$rxnHash{$reaction}})) {
231 :     # No, so we have to do it the hard way.
232 :     if (! exists $cpxHash{$reaction}) {
233 :     # Here the reaction has not had a complex created, so we
234 :     # must create one.
235 :     $self->Add(pseudoComplex => 1);
236 :     $self->PutE(Complex => $reaction);
237 :     $self->PutR(IsSetOf => $reaction, $reaction);
238 :     }
239 :     # Connect the reaction's complex to this role.
240 :     $self->PutR(IsTriggeredBy => $reaction, $role);
241 :     $self->Add(missingTrigger => 1);
242 :     }
243 : parrello 1.14 }
244 :     # Now we create the models.
245 : parrello 1.17 $ih = $self->CheckFile("$dir/ModelGenome.txt", qw(ModelID Name GenomeID));
246 : parrello 1.14 while (! eof $ih) {
247 :     # Get the next model.
248 : parrello 1.17 my ($model, $name, $genome) = $self->ReadLine($ih);
249 : parrello 1.14 # Create the model.
250 :     $self->PutE(Model => $model);
251 :     # Connect it to the genome. Again, the genomes are created elsewhere.
252 :     $self->PutR(IsModeledBy => $genome, $model);
253 :     }
254 :     # Next we connect the reactions to models.
255 :     $ih = $self->CheckFile("$dir/ModelReaction.txt", qw(ModelID ReactionID));
256 :     while (! eof $ih) {
257 :     # Get the next line.
258 :     my ($model, $reaction) = $self->ReadLine($ih);
259 :     # Only proceed if a reaction is present.
260 :     if ($reaction) {
261 :     # Insure the reaction exists.
262 :     $self->CreateReaction($reaction);
263 :     # Connect the reaction to the model.
264 :     $self->PutR(IsRequiredBy => $reaction, $model);
265 : parrello 1.1 }
266 :     }
267 :     }
268 :    
269 : parrello 1.19 =head3 CheckFile
270 : parrello 1.14
271 :     my $ih = $sl->CheckFile($fileName, @fieldNames);
272 :    
273 :     Read the header record of the specified file and verify that the field names match
274 :     the names in the input list. If they do not, an error will be thrown; if they do, an
275 :     open file handle will be returned, positioned on the first data record.
276 :    
277 :     =over 4
278 :    
279 :     =item fileName
280 :    
281 :     Name for the input file. The file is in standard tab-delimited format. The first record
282 :     contains the field names and the remaining records contain the data.
283 :    
284 :     =item fieldNames
285 :    
286 :     List of the field names expected, in order.
287 :    
288 :     =item RETURN
289 :    
290 :     Returns the open file handle if successful. If there is a mismatch, throws an error.
291 :    
292 :     =back
293 :    
294 :     =cut
295 :    
296 :     sub CheckFile {
297 :     # Get the parameters.
298 :     my ($self, $fileName, @fieldNames) = @_;
299 :     # Open the file.
300 :     my $retVal = Open(undef, "<$fileName");
301 :     $self->Add(files => 1);
302 :     # Read in the file header.
303 :     my @actualFields = Tracer::GetLine($retVal);
304 :     # This will be set to TRUE if there's a mismatch.
305 :     my $error = 0;
306 :     for (my $i = 0; $i <= $#fieldNames; $i++) {
307 :     if ($fieldNames[$i] ne $actualFields[$i]) {
308 :     Trace("Field match error: expected $fieldNames[$i], found $actualFields[$i].") if T(0);
309 :     $error = 1;
310 : parrello 1.1 }
311 :     }
312 : parrello 1.14 # Was there an error?
313 :     if ($error) {
314 :     # Yes, so abort.
315 :     Confess("Invalid field name header in $fileName.");
316 :     } else {
317 :     # No, so trace the open.
318 :     Trace("Processing $fileName.") if T(ERDBLoadGroup => 2);
319 :     }
320 :     # Return the file handle.
321 :     return $retVal;
322 : parrello 1.1 }
323 :    
324 : parrello 1.14 =head3 ReadLine
325 :    
326 :     my @fields = $sl->ReadLine($ih);
327 : parrello 1.1
328 : parrello 1.14 Read a line of data from an input file.
329 :    
330 :     =over 4
331 :    
332 :     =item ih
333 :    
334 :     Open file handle for the input file.
335 :    
336 :     =item RETURN
337 :    
338 :     Returns a list of the field values for the next record in the file.
339 :    
340 :     =back
341 :    
342 :     =cut
343 :    
344 :     sub ReadLine {
345 :     # Get the parameters.
346 :     my ($self, $ih) = @_;
347 :     # Read the line.
348 :     my @retVal = Tracer::GetLine($ih);
349 :     # Count this record.
350 :     $self->Track(records => $retVal[0], 1000);
351 :     # Return the data.
352 :     return @retVal;
353 :     }
354 :    
355 :    
356 :     =head3 CheckReaction
357 :    
358 :     $sl->CheckReaction($reaction);
359 :    
360 :     Insure we have created a rectord for the specified reaction.
361 :    
362 :     =over 4
363 :    
364 :     =item reaction
365 :    
366 :     ID of the reaction in question.
367 :    
368 :     =back
369 :    
370 :     =cut
371 :    
372 :     sub CreateReaction {
373 :     # Get the parameters.
374 :     my ($self, $reaction) = @_;
375 :     # Get the reaction hash.
376 :     my $reactionH = $self->{reactions};
377 :     # See if this reaction is new.
378 :     if (! $reactionH->{$reaction}) {
379 :     # It is, so create it.
380 :     $self->PutE(Reaction => $reaction);
381 :     # Insure we don't create it again.
382 :     $reactionH->{$reaction} = 1;
383 :     }
384 : parrello 1.10 }
385 :    
386 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3