[Bio] / Sprout / ModelSaplingLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/ModelSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13, Mon Jan 24 18:13:56 2011 UTC revision 1.17, Mon May 2 17:28:09 2011 UTC
# Line 22  Line 22 
22      use strict;      use strict;
23      use Tracer;      use Tracer;
24      use ERDB;      use ERDB;
     use HTTP::Date;  
25      use base 'BaseSaplingLoader';      use base 'BaseSaplingLoader';
26    
27  =head1 Sapling ModelLoader Load Group Class  =head1 Sapling Model Load Group Class
28    
29  =head2 Introduction  =head2 Introduction
30    
31  The ModelLoader Load Group includes all of the major mdl tables.  The Model Load Group includes a small set of tables that describe reactions and compounds
32    and how they relate to the models in the main model database.
33    
34  =head3 new  =head3 new
35    
36      my $sl = ModelLoaderSaplingLoader->new($erdb, $options, @tables);      my $sl = ModelSaplingLoader->new($erdb, $options, @tables);
37    
38  Construct a new ModelLoaderSaplingLoader object.  Construct a new ModelSaplingLoader object.
39    
40  =over 4  =over 4
41    
# Line 59  Line 59 
59      # Get the parameters.      # Get the parameters.
60      my ($class, $erdb, $options) = @_;      my ($class, $erdb, $options) = @_;
61      # Create the table list.      # Create the table list.
62      my @tables = sort qw(StructuralCue IsFoundIn Compound HasAliasOf AliasType      my @tables = qw(Compound Reaction EcNumber Model Media IsTriggeredBy
63                           HasReactionAliasOf Reaction Involves IsTriggeredBy                      IsCategorizedInto IsConsistentWith IsModeledBy Involves
64                           RoleSet IsCombinationOf IsCategorizedInto EcNumber                      IsRequiredBy);
                          IsConsistentWith IsStimulatedBy  FeatureSet IsSetOf  
                          IsRequiredBy Model IsModeledBy ModelUser); # Encompasses);  
65      # Create the BaseSaplingLoader object.      # Create the BaseSaplingLoader object.
66      my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);      my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
67        # Create the reaction tracking hash.
68        $retVal->{reactions} = {};
69      # Return it.      # Return it.
70      return $retVal;      return $retVal;
71  }  }
# Line 76  Line 76 
76    
77      $sl->Generate();      $sl->Generate();
78    
79  Generate the data for the mdl files.  Generate the data for the model files.
80    
81  =cut  =cut
82    
# Line 85  Line 85 
85      my ($self) = @_;      my ($self) = @_;
86      # Get the database object.      # Get the database object.
87      my $erdb = $self->db();      my $erdb = $self->db();
     # Get the source object.  
     my $fig = $self->source();  
88      # Is this the global section?      # Is this the global section?
     #my $model = new FIGMODEL;  
89      if ($self->global()) {      if ($self->global()) {
90          #Loading the compound entities and relationships          # Load the tables from the model dump files.
91      #    $self->LoadCompounds($model);          $self->LoadModelFiles();
         #Loading the reaction, and EC number entities and relationships  
     #    $self->LoadReactions($model);  
92      } else {      } else {
93          # Get the section ID.          # Get the section ID.
94          my $genomeID = $self->section();          my $genomeID = $self->section();
# Line 101  Line 96 
96      }      }
97  }  }
98    
99  sub LoadCompounds {  =head3 LoadModelFiles
     my ($self,$model) = @_;  
100    
101      #First getting the compound table which has a list of all compounds in the database as well as alot of compound information      $sl->LoadModelFiles();
     my $CompoundTable = $model->database()->GetDBTable("COMPOUNDS");  
102    
103      #Adding the universal AliasType that will always exist  Load the data from the six model dump files.
     $self->PutE(AliasType => "NAME", source => "NONE");  
     $self->PutE(AliasType => "SEARCHNAME", source => "NONE");  
     $self->PutE(AliasType => "KEGG", source => "http://www.kegg.com/");  
     $self->PutE(AliasType => "ARGONNE", source => "OBSOLETE IDs");  
104    
105      #Cycling through the compound list and adding each individual compound entity  =cut
     for (my $i=0; $i < $CompoundTable->size(); $i++) {  
         my $Row = $CompoundTable->get_row($i);  
         if (defined($Row) && defined($Row->{"DATABASE"}->[0]) && $Row->{"DATABASE"}->[0] =~ m/cpd\d\d\d\d\d/) {  
             my $Object = FIGMODELObject->load($model->{"compound directory"}->[0].$Row->{"DATABASE"}->[0],"\t");  
             my @LoadingArray = ($Row->{"DATABASE"}->[0],$Row->{"DATABASE"}->[0],0,"",0,"",0,10000000,10000000,"");  
             $self->Track(compoundRow => $LoadingArray[0], 1000);  
             if (defined($Row->{"NAME"}->[0])) {  
                 $LoadingArray[1] = $Row->{"NAME"}->[0];  
             }  
             if (defined($Object->{"MASS"}->[0])) {  
                 $LoadingArray[2] = $Object->{"MASS"}->[0];  
             }  
             if (defined($Object->{"FORMULA"}->[0])) {  
                 $LoadingArray[3] = $Object->{"FORMULA"}->[0];  
             }  
             if (defined($Object->{"CHARGE"}->[0])) {  
                 $LoadingArray[4] = $Object->{"CHARGE"}->[0];  
             }  
             if (defined($Object->{"CHARGE"}->[0]) && defined($Object->{"FORMULA"}->[0])) {  
                 $LoadingArray[5] = FIGMODEL::ConvertToNeutralFormula($Object->{"FORMULA"}->[0],$Object->{"CHARGE"}->[0]);  
             }  
             if (-e $model->{"Argonne molfile directory"}->[0].$Row->{"DATABASE"}->[0].".mol" || -e $model->{"Argonne molfile directory"}->[0]."pH7/".$Row->{"DATABASE"}->[0].".mol") {  
                 $LoadingArray[6] = 1;  
             }  
             if (defined($Object->{"DELTAG"}->[0])) {  
                 $LoadingArray[7] = $Object->{"DELTAG"}->[0];  
             }  
             if (defined($Object->{"DELTAGERR"}->[0])) {  
                 $LoadingArray[8] = $Object->{"DELTAGERR"}->[0];  
             }  
             if (defined($Row->{"STRINGCODE"}->[0])) {  
                 $LoadingArray[9] = $Row->{"STRINGCODE"}->[0];  
             }  
             $self->PutE(Compound => $LoadingArray[0], label => $LoadingArray[1], mass => $LoadingArray[2], chargedFormula => $LoadingArray[3], charge => $LoadingArray[4],neutralFormula => $LoadingArray[5], hasMolfile => $LoadingArray[6], deltaG => $LoadingArray[7], deltaGerr => $LoadingArray[8], stringcode => $LoadingArray[9]);  
             #Adding the relationship between compound and structural cue  
             if (defined($Object->{"STRUCTURAL_CUES"})) {  
                 foreach my $Cue (@{$Object->{"STRUCTURAL_CUES"}}) {  
                     my @TempArray = split(/:/,$Cue);  
                     if (@TempArray == 2) {  
                         $self->PutR(IsFoundIn => $TempArray[0],$LoadingArray[0],count => $TempArray[1]);  
                     }  
                 }  
             }  
             #Adding the relationship between compound and alias type  
             if (defined($Object->{"NAME"})) {  
                 foreach my $Name (@{$Object->{"NAME"}}) {  
                     $self->PutR(HasAliasOf => $LoadingArray[0],"NAME",alias => $Name);  
                     my @SearchNames = FIGMODEL::ConvertToSearchNames($Name);  
                     foreach my $SearchName (@SearchNames) {  
                         $self->PutR(HasAliasOf => $LoadingArray[0],"SEARCHNAME",alias => $SearchName);  
                     }  
                 }  
             }  
             if (defined($Object->{"DBLINKS"})) {  
                 foreach my $Link (@{$Object->{"DBLINKS"}}) {  
                     my @TempArray = split(/:/,$Link);  
                     if (@TempArray >= 2) {  
                         if ($TempArray[0] ne "KEGG" && $TempArray[0] ne "ARGONNE") {  
                             $self->PutE(AliasType => $TempArray[0], source => "PUBLISHED MODEL");  
                         }  
                         if ($TempArray[0] eq "ARGONNE") {  
                             splice(@TempArray,1,1);  
                         }  
                         for (my $j=1; $j < @TempArray; $j++) {  
                             $self->PutR(HasAliasOf => $LoadingArray[0],$TempArray[0],alias => $TempArray[$j]);  
                         }  
                     }  
                 }  
             }  
         }  
     }  
 }  
106    
107  sub LoadReactions {  # hash of ubiquitous compounds.
108      my ($self,$model) = @_;  use constant UBIQUITOUS => {
109      #First getting the reaction table which has a list of all reactions in the database as well as alot of reaction information      cpd00001 => 'OH-',
110      my $ReactionTable = $model->database()->GetDBTable("REACTIONS");      cpd00002 => 'ATP',
111        cpd00003 => 'Nicotinamideadeninedinucleotide',
112        cpd00004 => 'Nicotinamideadeninedinucleotide-reduced',
113        cpd00005 => 'Nicotinamideadeninedinucleotidephosphate-reduced',
114        cpd00006 => 'Nicotinamideadeninedinucleotidephosphate',
115        cpd00007 => 'Oxygen',
116        cpd00008 => 'ADP',
117        cpd00009 => 'Orthophosphoric acid',
118        cpd00010 => 'CoenzymeA',
119        cpd00011 => 'Carbon dioxide',
120        cpd00012 => 'PPi',
121        cpd00018 => 'AMP',
122        cpd00020 => 'Pyruvic Acid',
123        cpd00022 => 'Acetyl-CoA',
124        cpd00025 => 'Hydrogen peroxide',
125        cpd00067 => 'H+',
126        cpd00971 => 'Sodium',
127        cpd15352 => '2-Demethylmenaquinone',
128        cpd15353 => '2-Demethylmenaquinol',
129        cpd15499 => 'Menaquinol',
130        cpd15500 => 'Menaquinone',
131        cpd15560 => 'Ubiquinone-8',
132        cpd15561 => 'Ubiquinol-8',
133    };
134    
135      #Cycling through the compound list and adding each individual compound entity  sub LoadModelFiles {
136      my $Count = 0;      # Get the parameters.
137      for (my $i=0; $i < $ReactionTable->size(); $i++) {      my ($self) = @_;
138          my $Row = $ReactionTable->get_row($i);      # Get the model dump file directory.
139          if (defined($Row) && defined($Row->{"DATABASE"}->[0]) && $Row->{"DATABASE"}->[0] =~ m/rxn\d\d\d\d\d/) {      my $dir = $self->db()->LoadDirectory() . "/models";
140              my $Object = FIGMODELObject->load($model->{"reaction directory"}->[0].$Row->{"DATABASE"}->[0],"\t");      # First we read the compounds.
141              my @LoadingArray = ($Row->{"DATABASE"}->[0],"","","","","",10000000,10000000,10000000,"R");      my $ih = $self->CheckFile("$dir/CompoundName.txt", qw(CompoundID Name));
142              $self->Track(reactionRow => $LoadingArray[0], 1000);      while (! eof $ih) {
143              if (defined($Object->{"DEFINITION"}->[0])) {          # Get the next compound.
144                  $LoadingArray[1] = $Object->{"DEFINITION"}->[0];          my ($id, $label) = $self->ReadLine($ih);
145              }          # Create a compound record for it.
146              if (defined($Object->{"EQUATION"}->[0])) {          $self->PutE(Compound => $id, label => $label, ubiquitous => (UBIQUITOUS->{$id} ? 1 : 0));
147                  my ($Direction,$Equation,$ReverseEquation,$FullEquation,$EquationCompartment,$Error) = $model->ConvertEquationToCode($Object->{"EQUATION"}->[0],{"Test" => "Test"});      }
148                  $LoadingArray[2] = $Equation;      # Next, the compound-reactions relationship. We create the reactions here, too.
149              }      $ih = $self->CheckFile("$dir/CompoundReaction.txt", qw(CompoundID ReactionID
150              if (defined($Object->{"OPERATOR"}->[0])) {                             Stoichiometry Cofactor));
151                  $LoadingArray[3] = $Object->{"OPERATOR"}->[0];      while (! eof $ih) {
152              }          # Get the next link.
153              if (defined($Object->{"PATHWAY"}->[0])) {          my ($compound, $reaction, $stoich, $cofactor) = $self->ReadLine($ih);
154                  $LoadingArray[4] = join(";",@{$Object->{"PATHWAY"}});          # Insure the reaction exists.
155              }          $self->CreateReaction($reaction);
156              if (defined($Row->{"KEGG MAPS"}->[0])) {          # Check for product or substrate.
157                  $LoadingArray[5] = join(";",@{$Row->{"KEGG MAPS"}});          my $product;
158              }          if ($stoich < 0) {
159              if (defined($Object->{"DELTAG"}->[0])) {              $product = 0;
160                  $LoadingArray[6] = $Object->{"DELTAG"}->[0];              $stoich = -$stoich;
             }  
             if (defined($Object->{"MMDELTAG"}->[0])) {  
                 $LoadingArray[7] = $Object->{"MMDELTAG"}->[0];  
             }  
             if (defined($Object->{"DELTAGERR"}->[0])) {  
                 $LoadingArray[8] = $Object->{"DELTAGERR"}->[0];  
             }  
             if (defined($Object->{"THERMODYNAMIC REVERSIBILITY"}->[0])) {  
                 my $code = $Object->{"THERMODYNAMIC REVERSIBILITY"}->[0];  
                 if ($code eq '<=>') {  
                     $LoadingArray[9] = "R";  
                 } elsif ($code eq '<=') {  
                     $LoadingArray[9] = "B";  
                 } elsif ($code eq '=>') {  
                     $LoadingArray[9] = "F";  
                 }  
             }  
             $self->PutE(Reaction => $LoadingArray[0], definition => $LoadingArray[1], code => $LoadingArray[2], operator => $LoadingArray[3], pathway => $LoadingArray[4],keggMap => $LoadingArray[5], deltaG => $LoadingArray[6], MMdeltaG => $LoadingArray[7], deltaGerr => $LoadingArray[8], predictedReversibility => $LoadingArray[9]);  
             #Adding relationship between compounds and reactions  
             my ($Reactants,$Products) = $model->GetReactionSubstrateData($LoadingArray[0]);  
             if (! defined $Reactants) {  
                 $self->Add(missingReactants => 1);  
161              } else {              } else {
162                  for (my $j=0; $j < @{$Reactants}; $j++) {              $product = 1;
                     $self->PutR(Involves => $LoadingArray[0],$Reactants->[$j]->{"DATABASE"}->[0],product => 0,stoichiometry => $Reactants->[$j]->{"COEFFICIENT"}->[0],main => 1,compartment => $Reactants->[$j]->{"COMPARTMENT"}->[0],discriminator => $Count);  
                     #TODO:Properly identify main reactants in reactions  
                     $Count++;  
163                  }                  }
164            # Connect the reaction to the compound.
165            $self->PutR(Involves => $reaction, $compound, product => $product,
166                        stoichiometry => $stoich, cofactor => $cofactor);
167        }
168        # Now we connect the reactions to roles.
169        $ih = $self->CheckFile("$dir/ReactionRole.txt", qw(ReactionID Role));
170        while (! eof $ih) {
171            # Get the next reaction/role pair. We don't need to create roles here because
172            # they are built in the Feature module.
173            my ($reaction, $role) = $self->ReadLine($ih);
174            # Insure the reaction exists.
175            $self->CreateReaction($reaction);
176            # Connect the reaction to the role.
177            $self->PutR(IsTriggeredBy => $reaction, $role);
178        }
179        # Now we create the models.
180        $ih = $self->CheckFile("$dir/ModelGenome.txt", qw(ModelID Name GenomeID));
181        while (! eof $ih) {
182            # Get the next model.
183            my ($model, $name, $genome) = $self->ReadLine($ih);
184            # Create the model.
185            $self->PutE(Model => $model);
186            # Connect it to the genome. Again, the genomes are created elsewhere.
187            $self->PutR(IsModeledBy => $genome, $model);
188        }
189        # Next we connect the reactions to models.
190        $ih = $self->CheckFile("$dir/ModelReaction.txt", qw(ModelID ReactionID));
191        while (! eof $ih) {
192            # Get the next line.
193            my ($model, $reaction) = $self->ReadLine($ih);
194            # Only proceed if a reaction is present.
195            if ($reaction) {
196                # Insure the reaction exists.
197                $self->CreateReaction($reaction);
198                # Connect the reaction to the model.
199                $self->PutR(IsRequiredBy => $reaction, $model);
200              }              }
             if (! defined $Products) {  
                 $self->Add(missingProducts => 1);  
             } else {  
                 for (my $j=0; $j < @{$Products}; $j++) {  
                     $self->PutR(Involves => $LoadingArray[0],$Products->[$j]->{"DATABASE"}->[0],product => 1,stoichiometry => $Products->[$j]->{"COEFFICIENT"}->[0],main => 1,compartment => $Products->[$j]->{"COMPARTMENT"}->[0],discriminator => $Count);  
                     #TODO:Properly identify main reactants in reactions  
                     $Count++;  
201                  }                  }
202              }              }
203              #Adding relationship between EC number and reactions  
204              if (defined($Object->{"ENZYME"})) {  =head3 StartFile
205                  foreach my $Enzyme (@{$Object->{"ENZYME"}}) {  
206                     if ($Enzyme =~ /,/) {      my $ih = $sl->CheckFile($fileName, @fieldNames);
207                      #TODO:Invalid enzyme  
208    Read the header record of the specified file and verify that the field names match
209    the names in the input list. If they do not, an error will be thrown; if they do, an
210    open file handle will be returned, positioned on the first data record.
211    
212    =over 4
213    
214    =item fileName
215    
216    Name for the input file. The file is in standard tab-delimited format. The first record
217    contains the field names and the remaining records contain the data.
218    
219    =item fieldNames
220    
221    List of the field names expected, in order.
222    
223    =item RETURN
224    
225    Returns the open file handle if successful. If there is a mismatch, throws an error.
226    
227    =back
228    
229    =cut
230    
231    sub CheckFile {
232        # Get the parameters.
233        my ($self, $fileName, @fieldNames) = @_;
234        # Open the file.
235        my $retVal = Open(undef, "<$fileName");
236        $self->Add(files => 1);
237        # Read in the file header.
238        my @actualFields = Tracer::GetLine($retVal);
239        # This will be set to TRUE if there's a mismatch.
240        my $error = 0;
241        for (my $i = 0; $i <= $#fieldNames; $i++) {
242            if ($fieldNames[$i] ne $actualFields[$i]) {
243                Trace("Field match error: expected $fieldNames[$i], found $actualFields[$i].") if T(0);
244                $error = 1;
245            }
246        }
247        # Was there an error?
248        if ($error) {
249            # Yes, so abort.
250            Confess("Invalid field name header in $fileName.");
251                     } else {                     } else {
252                      $Enzyme =~ s/\s//g;          # No, so trace the open.
253                      $self->PutE(EcNumber => $Enzyme, obsolete => 0, replacedby => "NONE");          Trace("Processing $fileName.") if T(ERDBLoadGroup => 2);
                     $self->PutR(IsCategorizedInto => $LoadingArray[0],$Enzyme,source => "KEGG");  
                    }  
                 }  
             }  
             #Adding the relationship between reaction and alias type  
             if (defined($Object->{"NAME"})) {  
                 foreach my $Name (@{$Object->{"NAME"}}) {  
                     $self->PutR(HasReactionAliasOf => $LoadingArray[0],"NAME",alias => $Name);  
                 }  
             }  
             if (defined($Object->{"DBLINKS"})) {  
                 foreach my $Link (@{$Object->{"DBLINKS"}}) {  
                     my @TempArray = split(/:/,$Link);  
                     if (@TempArray >= 2) {  
                         if ($TempArray[0] ne "KEGG" && $TempArray[0] ne "ARGONNE") {  
                             $self->PutE(AliasType => $TempArray[0], source => "PUBLISHED MODEL");  
                         }  
                         if ($TempArray[0] eq "ARGONNE") {  
                             splice(@TempArray,1,1);  
                         }  
                         for (my $j=1; $j < @TempArray; $j++) {  
                             $self->PutR(HasReactionAliasOf => $LoadingArray[0],$TempArray[0],alias => $TempArray[$j]);  
                         }  
                     }  
                 }  
             }  
254          }          }
255        # Return the file handle.
256        return $retVal;
257      }      }
258    
259    =head3 ReadLine
260    
261        my @fields = $sl->ReadLine($ih);
262    
263    Read a line of data from an input file.
264    
265    =over 4
266    
267    =item ih
268    
269    Open file handle for the input file.
270    
271    =item RETURN
272    
273    Returns a list of the field values for the next record in the file.
274    
275    =back
276    
277    =cut
278    
279    sub ReadLine {
280        # Get the parameters.
281        my ($self, $ih) = @_;
282        # Read the line.
283        my @retVal = Tracer::GetLine($ih);
284        # Count this record.
285        $self->Track(records => $retVal[0], 1000);
286        # Return the data.
287        return @retVal;
288  }  }
289    
290    
291  sub FixDate {  =head3 CheckReaction
292      my ($date) = @_;  
293      my $retVal = ($date eq 'NA' ? 0 : $date);      $sl->CheckReaction($reaction);
294      return $retVal;  
295    Insure we have created a rectord for the specified reaction.
296    
297    =over 4
298    
299    =item reaction
300    
301    ID of the reaction in question.
302    
303    =back
304    
305    =cut
306    
307    sub CreateReaction {
308        # Get the parameters.
309        my ($self, $reaction) = @_;
310        # Get the reaction hash.
311        my $reactionH = $self->{reactions};
312        # See if this reaction is new.
313        if (! $reactionH->{$reaction}) {
314            # It is, so create it.
315            $self->PutE(Reaction => $reaction);
316            # Insure we don't create it again.
317            $reactionH->{$reaction} = 1;
318        }
319  }  }
320    
321  1;  1;

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3