--- ModelSaplingLoader.pm 2009/05/28 18:08:56 1.3 +++ ModelSaplingLoader.pm 2011/05/02 17:28:09 1.17 @@ -16,32 +16,32 @@ # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # -use FIGMODEL; +#use FIGMODEL; package ModelSaplingLoader; use strict; use Tracer; use ERDB; - use HTTP::Date; use base 'BaseSaplingLoader'; -=head1 Sapling ModelLoader Load Group Class +=head1 Sapling Model Load Group Class =head2 Introduction -The ModelLoader Load Group includes all of the major mdl tables. +The Model Load Group includes a small set of tables that describe reactions and compounds +and how they relate to the models in the main model database. =head3 new - my $sl = ModelLoaderSaplingLoader->new($erdb, $options, @tables); + my $sl = ModelSaplingLoader->new($erdb, $options, @tables); -Construct a new ModelLoaderSaplingLoader object. +Construct a new ModelSaplingLoader object. =over 4 =item erdb -[[SaplingPm]] object for the database being loaded. +L object for the database being loaded. =item options @@ -59,9 +59,13 @@ # Get the parameters. my ($class, $erdb, $options) = @_; # Create the table list. - my @tables = sort qw(StructuralCue IsFoundIn Compound HasAliasOf AliasType HasReactionAliasOf Reaction Involves IsTriggeredBy RoleSet IsCombinationOf IsCategorizedInto EcNumber IsConsistentWith IsStimulatedBy FeatureSet IsSetOf IsRequiredBy Model Encompasses IsModeledBy); + my @tables = qw(Compound Reaction EcNumber Model Media IsTriggeredBy + IsCategorizedInto IsConsistentWith IsModeledBy Involves + IsRequiredBy); # Create the BaseSaplingLoader object. my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables); + # Create the reaction tracking hash. + $retVal->{reactions} = {}; # Return it. return $retVal; } @@ -72,7 +76,7 @@ $sl->Generate(); -Generate the data for the mdl files. +Generate the data for the model files. =cut @@ -81,21 +85,10 @@ my ($self) = @_; # Get the database object. my $erdb = $self->db(); - # Get the source object. - my $fig = $self->source(); # Is this the global section? - my $model = new FIGMODEL; if ($self->global()) { - #Loading the model entities and relationships - $self->LoadModels($model); - #Loading structural cues first - $self->LoadStructuralCues($model); - #Loading the compound entities and relationships - $self->LoadCompounds($model); - #Loading the reaction, and EC number entities and relationships - $self->LoadReactions($model); - #Loading the role set entity and relationships - $self->LoadRoleSets($model); + # Load the tables from the model dump files. + $self->LoadModelFiles(); } else { # Get the section ID. my $genomeID = $self->section(); @@ -103,457 +96,225 @@ } } -#This function loads all of the StructuralCue entities -sub LoadStructuralCues { - my ($self,$model) = @_; - - #Getting the list of structural cue filenames - my @CuesList = glob($model->{"Reaction database directory"}->[0]."cues/"); - my %CuePriorities; - #TODO:NEED TO POPULATE THE PRIORITIES HASH - foreach my $CueFile (@CuesList) { - my $Object = FIGMODELObject->load($CueFile,"\t"); - if (defined($Object) && defined($Object->{"NAME"}->[0])) { - my @LoadingArray = ($Object->{"NAME"}->[0],-1,0,0,"","","",""); - if (defined($CuePriorities{$Object->{"NAME"}->[0]})) { - $LoadingArray[1] = $CuePriorities{$Object->{"NAME"}->[0]}; - } - if (defined($Object->{"SMALL_MOLECULE"}->[0])) { - $LoadingArray[2] = $Object->{"SMALL_MOLECULE"}->[0]; - } - if (defined($Object->{"CHARGE"}->[0])) { - $LoadingArray[3] = $Object->{"CHARGE"}->[0]; - } - if (defined($Object->{"FORMULA"}->[0])) { - $LoadingArray[4] = $Object->{"FORMULA"}->[0]; - } - if (defined($Object->{"ENERGY"}->[0])) { - $LoadingArray[5] = $Object->{"ENERGY"}->[0]; - } - if (defined($Object->{"ERROR"}->[0])) { - $LoadingArray[6] = $Object->{"ERROR"}->[0]; - } - if (defined($Object->{"STRUCTURAL_CUES"}->[0])) { - $LoadingArray[7] = join(";",@{$Object->{"STRUCTURAL_CUES"}}); - } - $self->PutE(StructuralCue => $LoadingArray[0], priority => $LoadingArray[1], smallMolecule => $LoadingArray[2], charge => $LoadingArray[3], formula => $LoadingArray[4],deltaG => $LoadingArray[5], deltaGerr => $LoadingArray[6], subgroups => $LoadingArray[7]); - } - } -} +=head3 LoadModelFiles + + $sl->LoadModelFiles(); -sub LoadCompounds { - my ($self,$model) = @_; +Load the data from the six model dump files. - #First getting the compound table which has a list of all compounds in the database as well as alot of compound information - my $CompoundTable = $model->GetDBTable("COMPOUNDS"); +=cut + +# hash of ubiquitous compounds. +use constant UBIQUITOUS => { + cpd00001 => 'OH-', + cpd00002 => 'ATP', + cpd00003 => 'Nicotinamideadeninedinucleotide', + cpd00004 => 'Nicotinamideadeninedinucleotide-reduced', + cpd00005 => 'Nicotinamideadeninedinucleotidephosphate-reduced', + cpd00006 => 'Nicotinamideadeninedinucleotidephosphate', + cpd00007 => 'Oxygen', + cpd00008 => 'ADP', + cpd00009 => 'Orthophosphoric acid', + cpd00010 => 'CoenzymeA', + cpd00011 => 'Carbon dioxide', + cpd00012 => 'PPi', + cpd00018 => 'AMP', + cpd00020 => 'Pyruvic Acid', + cpd00022 => 'Acetyl-CoA', + cpd00025 => 'Hydrogen peroxide', + cpd00067 => 'H+', + cpd00971 => 'Sodium', + cpd15352 => '2-Demethylmenaquinone', + cpd15353 => '2-Demethylmenaquinol', + cpd15499 => 'Menaquinol', + cpd15500 => 'Menaquinone', + cpd15560 => 'Ubiquinone-8', + cpd15561 => 'Ubiquinol-8', +}; - #Adding the universal AliasType that will always exist - $self->PutE(AliasType => "NAME", source => "NONE"); - $self->PutE(AliasType => "SEARCHNAME", source => "NONE"); - $self->PutE(AliasType => "KEGG", source => "http://www.kegg.com/"); - $self->PutE(AliasType => "ARGONNE", source => "OBSOLETE IDs"); - - #Cycling through the compound list and adding each individual compound entity - for (my $i=0; $i < $CompoundTable->size(); $i++) { - my $Row = $CompoundTable->get_row($i); - if (defined($Row) && defined($Row->{"DATABASE"}->[0]) && $Row->{"DATABASE"}->[0] =~ m/cpd\d\d\d\d\d/) { - my $Object = FIGMODELObject->load($model->{"compound directory"}->[0].$Row->{"DATABASE"}->[0],"\t"); - my @LoadingArray = ($Row->{"DATABASE"}->[0],$Row->{"DATABASE"}->[0],0,"",0,"",0,10000000,10000000,""); - $self->Track(compoundRow => $LoadingArray[0], 1000); - if (defined($Row->{"NAME"}->[0])) { - $LoadingArray[1] = $Row->{"NAME"}->[0]; - } - if (defined($Object->{"MASS"}->[0])) { - $LoadingArray[2] = $Object->{"MASS"}->[0]; - } - if (defined($Object->{"FORMULA"}->[0])) { - $LoadingArray[3] = $Object->{"FORMULA"}->[0]; - } - if (defined($Object->{"CHARGE"}->[0])) { - $LoadingArray[4] = $Object->{"CHARGE"}->[0]; - } - if (defined($Object->{"CHARGE"}->[0]) && defined($Object->{"FORMULA"}->[0])) { - $LoadingArray[5] = FIGMODEL::ConvertToNeutralFormula($Object->{"FORMULA"}->[0],$Object->{"CHARGE"}->[0]); - } - if (-e $model->{"Argonne molfile directory"}->[0].$Row->{"DATABASE"}->[0].".mol" || -e $model->{"Argonne molfile directory"}->[0]."pH7/".$Row->{"DATABASE"}->[0].".mol") { - $LoadingArray[6] = 1; - } - if (defined($Object->{"DELTAG"}->[0])) { - $LoadingArray[7] = $Object->{"DELTAG"}->[0]; - } - if (defined($Object->{"DELTAGERR"}->[0])) { - $LoadingArray[8] = $Object->{"DELTAGERR"}->[0]; - } - if (defined($Row->{"STRINGCODE"}->[0])) { - $LoadingArray[9] = $Row->{"STRINGCODE"}->[0]; - } - $self->PutE(Compound => $LoadingArray[0], label => $LoadingArray[1], mass => $LoadingArray[2], chargedFormula => $LoadingArray[3], charge => $LoadingArray[4],neutralFormula => $LoadingArray[5], hasMolfile => $LoadingArray[6], deltaG => $LoadingArray[7], deltaGerr => $LoadingArray[8], stringcode => $LoadingArray[9]); - #Adding the relationship between compound and structural cue - if (defined($Object->{"STRUCTURAL_CUES"})) { - foreach my $Cue (@{$Object->{"STRUCTURAL_CUES"}}) { - my @TempArray = split(/:/,$Cue); - if (@TempArray == 2) { - $self->PutR(IsFoundIn => $TempArray[0],$LoadingArray[0],count => $TempArray[1]); - } - } - } - #Adding the relationship between compound and alias type - if (defined($Object->{"NAME"})) { - foreach my $Name (@{$Object->{"NAME"}}) { - $self->PutR(HasAliasOf => $LoadingArray[0],"NAME",alias => $Name); - my @SearchNames = FIGMODEL::ConvertToSearchNames($Name); - foreach my $SearchName (@SearchNames) { - $self->PutR(HasAliasOf => $LoadingArray[0],"SEARCHNAME",alias => $SearchName); - } - } - } - if (defined($Object->{"DBLINKS"})) { - foreach my $Link (@{$Object->{"DBLINKS"}}) { - my @TempArray = split(/:/,$Link); - if (@TempArray >= 2) { - if ($TempArray[0] ne "KEGG" && $TempArray[0] ne "ARGONNE") { - $self->PutE(AliasType => $TempArray[0], source => "PUBLISHED MODEL"); - } - if ($TempArray[0] eq "ARGONNE") { - splice(@TempArray,1,1); - } - for (my $j=1; $j < @TempArray; $j++) { - $self->PutR(HasAliasOf => $LoadingArray[0],$TempArray[0],alias => $TempArray[$j]); - } - } - } - } +sub LoadModelFiles { + # Get the parameters. + my ($self) = @_; + # Get the model dump file directory. + my $dir = $self->db()->LoadDirectory() . "/models"; + # First we read the compounds. + my $ih = $self->CheckFile("$dir/CompoundName.txt", qw(CompoundID Name)); + while (! eof $ih) { + # Get the next compound. + my ($id, $label) = $self->ReadLine($ih); + # Create a compound record for it. + $self->PutE(Compound => $id, label => $label, ubiquitous => (UBIQUITOUS->{$id} ? 1 : 0)); + } + # Next, the compound-reactions relationship. We create the reactions here, too. + $ih = $self->CheckFile("$dir/CompoundReaction.txt", qw(CompoundID ReactionID + Stoichiometry Cofactor)); + while (! eof $ih) { + # Get the next link. + my ($compound, $reaction, $stoich, $cofactor) = $self->ReadLine($ih); + # Insure the reaction exists. + $self->CreateReaction($reaction); + # Check for product or substrate. + my $product; + if ($stoich < 0) { + $product = 0; + $stoich = -$stoich; + } else { + $product = 1; } + # Connect the reaction to the compound. + $self->PutR(Involves => $reaction, $compound, product => $product, + stoichiometry => $stoich, cofactor => $cofactor); } -} - -sub LoadReactions { - my ($self,$model) = @_; - #First getting the reaction table which has a list of all reactions in the database as well as alot of reaction information - my $ReactionTable = $model->GetDBTable("REACTIONS"); - - #Cycling through the compound list and adding each individual compound entity - my $Count = 0; - for (my $i=0; $i < $ReactionTable->size(); $i++) { - my $Row = $ReactionTable->get_row($i); - if (defined($Row) && defined($Row->{"DATABASE"}->[0]) && $Row->{"DATABASE"}->[0] =~ m/rxn\d\d\d\d\d/) { - my $Object = FIGMODELObject->load($model->{"reaction directory"}->[0].$Row->{"DATABASE"}->[0],"\t"); - my @LoadingArray = ($Row->{"DATABASE"}->[0],"","","","","",10000000,10000000,10000000,"R"); - $self->Track(reactionRow => $LoadingArray[0], 1000); - if (defined($Object->{"DEFINITION"}->[0])) { - $LoadingArray[1] = $Object->{"DEFINITION"}->[0]; - } - if (defined($Object->{"EQUATION"}->[0])) { - my ($Direction,$Equation,$ReverseEquation,$FullEquation,$EquationCompartment,$Error) = $model->ConvertEquationToCode($Object->{"EQUATION"}->[0],{"Test" => "Test"}); - $LoadingArray[2] = $Equation; - } - if (defined($Object->{"OPERATOR"}->[0])) { - $LoadingArray[3] = $Object->{"OPERATOR"}->[0]; - } - if (defined($Object->{"PATHWAY"}->[0])) { - $LoadingArray[4] = join(";",@{$Object->{"PATHWAY"}}); - } - if (defined($Row->{"KEGG MAPS"}->[0])) { - $LoadingArray[5] = join(";",@{$Row->{"KEGG MAPS"}}); - } - if (defined($Object->{"DELTAG"}->[0])) { - $LoadingArray[6] = $Object->{"DELTAG"}->[0]; - } - if (defined($Object->{"MMDELTAG"}->[0])) { - $LoadingArray[7] = $Object->{"MMDELTAG"}->[0]; - } - if (defined($Object->{"DELTAGERR"}->[0])) { - $LoadingArray[8] = $Object->{"DELTAGERR"}->[0]; - } - if (defined($Object->{"THERMODYNAMIC REVERSIBILITY"}->[0])) { - my $code = $Object->{"THERMODYNAMIC REVERSIBILITY"}->[0]; - if ($code eq '<=>') { - $LoadingArray[9] = "R"; - } elsif ($code eq '<=') { - $LoadingArray[9] = "B"; - } elsif ($code eq '=>') { - $LoadingArray[9] = "F"; - } - } - $self->PutE(Reaction => $LoadingArray[0], definition => $LoadingArray[1], code => $LoadingArray[2], operator => $LoadingArray[3], pathway => $LoadingArray[4],keggMap => $LoadingArray[5], deltaG => $LoadingArray[6], MMdeltaG => $LoadingArray[7], deltaGerr => $LoadingArray[8], predictedReversibility => $LoadingArray[9]); - #Adding relationship between compounds and reactions - my ($Reactants,$Products) = $model->GetReactionSubstrateData($LoadingArray[0]); - if (! defined $Reactants) { - $self->Add(missingReactants => 1); - } else { - for (my $j=0; $j < @{$Reactants}; $j++) { - $self->PutR(Involves => $LoadingArray[0],$Reactants->[$j]->{"DATABASE"}->[0],product => 0,stoichiometry => $Reactants->[$j]->{"COEFFICIENT"}->[0],main => 1,compartment => $Reactants->[$j]->{"COMPARTMENT"}->[0],discriminator => $Count); - #TODO:Properly identify main reactants in reactions - $Count++; - } - } - if (! defined $Products) { - $self->Add(missingProducts => 1); - } else { - for (my $j=0; $j < @{$Products}; $j++) { - $self->PutR(Involves => $LoadingArray[0],$Products->[$j]->{"DATABASE"}->[0],product => 1,stoichiometry => $Products->[$j]->{"COEFFICIENT"}->[0],main => 1,compartment => $Products->[$j]->{"COMPARTMENT"}->[0],discriminator => $Count); - #TODO:Properly identify main reactants in reactions - $Count++; - } - } - #Adding relationship between EC number and reactions - if (defined($Object->{"ENZYME"})) { - foreach my $Enzyme (@{$Object->{"ENZYME"}}) { - $self->PutE(EcNumber => $Enzyme, obselete => 0, replacedby => "NONE"); - $self->PutR(IsCategorizedInto => $LoadingArray[0],$Enzyme,source => "KEGG"); - } - } - #Adding the relationship between reaction and alias type - if (defined($Object->{"NAME"})) { - foreach my $Name (@{$Object->{"NAME"}}) { - $self->PutR(HasReactionAliasOf => $LoadingArray[0],"NAME",alias => $Name); - } - } - if (defined($Object->{"DBLINKS"})) { - foreach my $Link (@{$Object->{"DBLINKS"}}) { - my @TempArray = split(/:/,$Link); - if (@TempArray >= 2) { - if ($TempArray[0] ne "KEGG" && $TempArray[0] ne "ARGONNE") { - $self->PutE(AliasType => $TempArray[0], source => "PUBLISHED MODEL"); - } - if ($TempArray[0] eq "ARGONNE") { - splice(@TempArray,1,1); - } - for (my $j=1; $j < @TempArray; $j++) { - $self->PutR(HasReactionAliasOf => $LoadingArray[0],$TempArray[0],alias => $TempArray[$j]); - } - } - } - } + # Now we connect the reactions to roles. + $ih = $self->CheckFile("$dir/ReactionRole.txt", qw(ReactionID Role)); + while (! eof $ih) { + # Get the next reaction/role pair. We don't need to create roles here because + # they are built in the Feature module. + my ($reaction, $role) = $self->ReadLine($ih); + # Insure the reaction exists. + $self->CreateReaction($reaction); + # Connect the reaction to the role. + $self->PutR(IsTriggeredBy => $reaction, $role); + } + # Now we create the models. + $ih = $self->CheckFile("$dir/ModelGenome.txt", qw(ModelID Name GenomeID)); + while (! eof $ih) { + # Get the next model. + my ($model, $name, $genome) = $self->ReadLine($ih); + # Create the model. + $self->PutE(Model => $model); + # Connect it to the genome. Again, the genomes are created elsewhere. + $self->PutR(IsModeledBy => $genome, $model); + } + # Next we connect the reactions to models. + $ih = $self->CheckFile("$dir/ModelReaction.txt", qw(ModelID ReactionID)); + while (! eof $ih) { + # Get the next line. + my ($model, $reaction) = $self->ReadLine($ih); + # Only proceed if a reaction is present. + if ($reaction) { + # Insure the reaction exists. + $self->CreateReaction($reaction); + # Connect the reaction to the model. + $self->PutR(IsRequiredBy => $reaction, $model); } } } -sub LoadRoleSets { - my ($self,$model) = @_; - my $sapling =$self->db(); - my $RoleMappings = $model->GetDBTable("CURATED ROLE MAPPINGS"); - my $NewSet; - my $SetHash; - my $LastComplex = -1; - my $LastReaction = ""; - my $LastRole = ""; - for (my $i=0; $i < $RoleMappings->size(); $i++) { - my $Row = $RoleMappings->get_row($i); - if (defined($Row) && defined($Row->{"REACTION"}->[0]) && defined($Row->{"ROLE"}->[0]) && defined($Row->{"COMPLEX"}->[0]) && defined($Row->{"MASTER"}->[0])) { - if ($Row->{"REACTION"}->[0] ne $LastReaction || $LastComplex ne $Row->{"COMPLEX"}->[0]) { - if (defined($NewSet->{"ROLES"}) && defined($NewSet->{"REACTIONS"})) { - my @RoleList = sort(keys(%{$NewSet->{"ROLES"}})); - my @ReactionList = keys(%{$NewSet->{"REACTIONS"}}); - foreach my $Reaction (@ReactionList) { - $SetHash->{join("+",@RoleList)}->{"REACTIONS"}->{$Reaction}->{"MASTER"}->[0] = $NewSet->{"REACTIONS"}->{$Reaction}->{"MASTER"}->[0]; - if (defined($NewSet->{"REACTIONS"}->{$Reaction}->{"SUBSYSTEMS"})) { - foreach my $Subsystem (@{$NewSet->{"REACTIONS"}->{$Reaction}->{"SUBSYSTEMS"}}) { - if (defined($SetHash->{join("+",@RoleList)}->{"REACTIONS"}->{$Reaction}->{"SUBSYSTEMS"})) { - foreach my $OtherSubsystem (@{$SetHash->{join("+",@RoleList)}->{"REACTIONS"}->{$Reaction}->{"SUBSYSTEMS"}}) { - if ($Subsystem eq $OtherSubsystem) { - $Subsystem = ""; - last; - } - } - } - if ($Subsystem ne "") { - push(@{$SetHash->{join("+",@RoleList)}->{"REACTIONS"}->{$Reaction}->{"SUBSYSTEMS"}},$Subsystem); - } - } - } - } - foreach my $Role (@RoleList) { - $SetHash->{join("+",@RoleList)}->{"ROLES"}->{$Role} = 1; - } - } - $NewSet = {}; - } - $NewSet->{"ROLES"}->{$Row->{"ROLE"}->[0]} = 1; - $NewSet->{"REACTIONS"}->{$Row->{"REACTION"}->[0]}->{"MASTER"}->[0] = $Row->{"MASTER"}->[0]; - if (defined($Row->{"SUBSYSTEM"}->[0])) { - push(@{$NewSet->{"REACTIONS"}->{$Row->{"REACTION"}->[0]}->{"SUBSYSTEMS"}},$Row->{"SUBSYSTEM"}->[0]); - } - $LastRole = $Row->{"ROLE"}->[0]; - $LastReaction = $Row->{"REACTION"}->[0]; - $LastComplex = $Row->{"COMPLEX"}->[0]; - } - } +=head3 StartFile - my @RoleSets = keys(%{$SetHash}); - foreach my $RoleSet (@RoleSets) { - my $Digested = $sapling->DigestKey($RoleSet); - $self->PutE(RoleSet => $Digested, source => "NONE"); - my @ReactionList = keys(%{$SetHash->{$RoleSet}->{"REACTIONS"}}); - foreach my $Reaction (@ReactionList) { - $self->PutR(IsTriggeredBy => $Reaction,$Digested, source => "NONE",master => $SetHash->{$RoleSet}->{"REACTIONS"}->{$Reaction}->{"MASTER"}->[0],subsystem => join(";",@{$SetHash->{$RoleSet}->{"REACTIONS"}->{$Reaction}->{"SUBSYSTEMS"}})); - } - my @RoleList = keys(%{$SetHash->{$RoleSet}->{"ROLES"}}); - foreach my $Role (@RoleList) { - $self->PutR(IsCombinationOf => $Digested,$Role, local => 0); + my $ih = $sl->CheckFile($fileName, @fieldNames); + +Read the header record of the specified file and verify that the field names match +the names in the input list. If they do not, an error will be thrown; if they do, an +open file handle will be returned, positioned on the first data record. + +=over 4 + +=item fileName + +Name for the input file. The file is in standard tab-delimited format. The first record +contains the field names and the remaining records contain the data. + +=item fieldNames + +List of the field names expected, in order. + +=item RETURN + +Returns the open file handle if successful. If there is a mismatch, throws an error. + +=back + +=cut + +sub CheckFile { + # Get the parameters. + my ($self, $fileName, @fieldNames) = @_; + # Open the file. + my $retVal = Open(undef, "<$fileName"); + $self->Add(files => 1); + # Read in the file header. + my @actualFields = Tracer::GetLine($retVal); + # This will be set to TRUE if there's a mismatch. + my $error = 0; + for (my $i = 0; $i <= $#fieldNames; $i++) { + if ($fieldNames[$i] ne $actualFields[$i]) { + Trace("Field match error: expected $fieldNames[$i], found $actualFields[$i].") if T(0); + $error = 1; } } + # Was there an error? + if ($error) { + # Yes, so abort. + Confess("Invalid field name header in $fileName."); + } else { + # No, so trace the open. + Trace("Processing $fileName.") if T(ERDBLoadGroup => 2); + } + # Return the file handle. + return $retVal; } -sub LoadModels { - my ($self,$model) = @_; - my $sapling = $self->db(); - ## TRS: Get hash of valid genome IDs. - my $genomeHash = $sapling->GenomeHash(); - my $ModelList = $model->GetDBTable("MODEL LIST"); - my $ModelStats = $model->GetDBTable("MODEL STATS"); - for (my $i=0; $i < $ModelStats->size(); $i++) { - my $Row = $ModelStats->get_row($i); - if (defined($Row) && defined($Row->{"Model ID"}->[0])) { - my @LoadingArray = ($Row->{"Model ID"}->[0],"","",0,0,0,"",0,0,0,0,0,0,0,0,0,0,0,0,"","",0,0,"",0,0); - if (defined($Row->{"Genome ID"}->[0]) && $genomeHash->{$Row->{"Genome ID"}->[0]}) { - if (defined($Row->{"Organism name"}->[0])) { - $LoadingArray[1] = $Row->{"Organism name"}->[0]; - } - if (defined($Row->{"Source"}->[0])) { - $LoadingArray[2] = $Row->{"Source"}->[0]; - } - if (defined($Row->{"Total genes"}->[0])) { - $LoadingArray[3] = $Row->{"Total genes"}->[0]; - } - if (defined($Row->{"Gram positive genes"}->[0])) { - $LoadingArray[4] = $Row->{"Gram positive genes"}->[0]; - } - if (defined($Row->{"Gram negative genes"}->[0])) { - $LoadingArray[5] = $Row->{"Gram negative genes"}->[0]; - } - if (defined($Row->{"Class"}->[0])) { - $LoadingArray[6] = $Row->{"Class"}->[0]; - } - if (defined($Row->{"Genes with functions"}->[0])) { - $LoadingArray[7] = $Row->{"Genes with functions"}->[0]; - } - if (defined($Row->{"Genes with reactions"}->[0])) { - $LoadingArray[8] = $Row->{"Genes with reactions"}->[0]; - } - if (defined($Row->{"Subsystem genes"}->[0])) { - $LoadingArray[9] = $Row->{"Subsystem genes"}->[0]; - } - if (defined($Row->{"Subsystem genes with reactions"}->[0])) { - $LoadingArray[10] = $Row->{"Subsystem genes with reactions"}->[0]; - } - if (defined($Row->{"Nonsubsystem genes"}->[0])) { - $LoadingArray[11] = $Row->{"Nonsubsystem genes"}->[0]; - } - if (defined($Row->{"Nonsubsystem genes with reactions"}->[0])) { - $LoadingArray[12] = $Row->{"Nonsubsystem genes with reactions"}->[0]; - } - if (defined($Row->{"Number of reactions"}->[0])) { - $LoadingArray[13] = $Row->{"Number of reactions"}->[0]; - } - if (defined($Row->{"Transport reaction"}->[0])) { - $LoadingArray[14] = $Row->{"Transport reaction"}->[0]; - } - if (defined($Row->{"Gap filling reactions"}->[0])) { - $LoadingArray[15] = $Row->{"Gap filling reactions"}->[0]; - } - if (defined($Row->{"Gap filling time"}->[0])) { - $LoadingArray[16] = $Row->{"Gap filling time"}->[0]; - } - if (defined($Row->{"Gap filling objective"}->[0])) { - $LoadingArray[17] = $Row->{"Gap filling objective"}->[0]; - } - if (defined($Row->{"Gap filling minimal objective"}->[0])) { - $LoadingArray[18] = $Row->{"Gap filling minimal objective"}->[0]; - } - if (defined($Row->{"Build date"}->[0])) { - $LoadingArray[19] = $Row->{"Build date"}->[0]; - } - if (defined($Row->{"Gap fill date"}->[0])) { - $LoadingArray[20] = $Row->{"Gap fill date"}->[0]; - } - if (defined($Row->{"Version"}->[0])) { - $LoadingArray[21] = $Row->{"Version"}->[0]; - } - if (defined($Row->{"Gap fill version"}->[0])) { - $LoadingArray[22] = $Row->{"Gap fill version"}->[0]; - } - if ($Row->{"Model ID"}->[0] =~ m/Core\d+\.\d+V\d+\.\d+/) { - $LoadingArray[25] = 1; - } elsif (defined($ModelList->get_row_by_key($Row->{"Model ID"}->[0],"MODEL ID"))) { - if (defined($ModelList->get_row_by_key($Row->{"Model ID"}->[0],"MODEL ID")->{"DIRECTORY"}->[0])) { - $LoadingArray[23] = $ModelList->get_row_by_key($Row->{"Model ID"}->[0],"MODEL ID")->{"DIRECTORY"}->[0]; - } - if (defined($ModelList->get_row_by_key($Row->{"Model ID"}->[0],"MODEL ID")->{"JOB ID"}->[0]) && $ModelList->get_row_by_key($Row->{"Model ID"}->[0],"MODEL ID")->{"JOB ID"}->[0] ne "NONE") { - $LoadingArray[24] = $ModelList->get_row_by_key($Row->{"Model ID"}->[0],"MODEL ID")->{"JOB ID"}->[0]; - } - } - for my $j (3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,21,22,24,25) { - $LoadingArray[$j] = 0 if ($LoadingArray[$j] =~ /[a-z]/i); - } - $self->PutE(Model => $LoadingArray[0], organismName => $LoadingArray[1], source => $LoadingArray[2], totalGenes => $LoadingArray[3], gramPositiveGenes => $LoadingArray[4],gramNegativeGenes => $LoadingArray[5], class => $LoadingArray[6], genesWithFunction => $LoadingArray[7], genesWithReactions => $LoadingArray[8], subsystemGenes => $LoadingArray[9], subsystemGenesWithReactions => $LoadingArray[10], nonsubsystemGenes => $LoadingArray[11], nonsubsystemGenesWithReactions => $LoadingArray[12], numberOfReactions => $LoadingArray[13], numberOfTransporters => $LoadingArray[14], gapFillingReactions => $LoadingArray[15], gapFillingTime => $LoadingArray[16], gapFillingObjective => $LoadingArray[17], gapFillingMinimalObjective => $LoadingArray[18], buildDate => $LoadingArray[19], gapFillDate => $LoadingArray[20], version => $LoadingArray[21], gapFillVersion => $LoadingArray[22], directory => $LoadingArray[23], jobid => $LoadingArray[24], obsolete => $LoadingArray[25]); - #Adding the relationship between model and genome - if (defined($Row->{"Genome ID"}->[0])) { - $self->PutR(IsModeledBy => $Row->{"Genome ID"}->[0],$LoadingArray[0]); - } - #If the model is not obsolete, we add the relationships between model and reaction and featureset - if ($LoadingArray[25] == 0) { - #Loading the model from file - my $ModelTable = $model->GetDBModel($LoadingArray[0]); - for (my $j=0; $j < $ModelTable->size(); $j++) { - my $RxnRow = $ModelTable->get_row($j); - if (defined($RxnRow) && defined($RxnRow->{"LOAD"}->[0])) { - my @SubLoadingArray = ("R","c","NONE",10,"V","NONE","UNKNOWN"); - if (defined($RxnRow->{"DIRECTIONALITY"}->[0])) { - my $code = $RxnRow->{"DIRECTIONALITY"}->[0]; - if ($code eq '<=>') { - $SubLoadingArray[0] = 'R'; - } elsif ($code eq '<=') { - $SubLoadingArray[0] = 'B'; - } elsif ($code eq '=>') { - $SubLoadingArray[0] = 'F'; - } - } - if (defined($RxnRow->{"COMPARTMENT"}->[0])) { - $SubLoadingArray[1] = $RxnRow->{"COMPARTMENT"}->[0]; - } - if (defined($RxnRow->{"SUBSYSTEM"})) { - $SubLoadingArray[2] = join(";",@{$RxnRow->{"SUBSYSTEM"}}); - } - if (defined($RxnRow->{"CONFIDENCE"}->[0])) { - $SubLoadingArray[3] = $RxnRow->{"CONFIDENCE"}->[0]; - if ($SubLoadingArray[3] eq 'NONE') { - $SubLoadingArray[3] = 10; - } - } - if (defined($RxnRow->{"NOTES"})) { - $SubLoadingArray[5] = join(";",@{$RxnRow->{"NOTES"}}); - } - if (defined($RxnRow->{"ASSOCIATED PEG"})) { - #Adding the role set entities - foreach my $FeatureSet (@{$RxnRow->{"ASSOCIATED PEG"}}) { - if ($FeatureSet =~ m/peg\.\d+/) { - my $DigestedKey = $sapling->DigestKey($FeatureSet); - $self->PutE(FeatureSet => $DigestedKey, source => "NONE"); - #Adding the relationship between the reaction and roleset - $self->PutR(IsStimulatedBy => $RxnRow->{"LOAD"}->[0],$DigestedKey); - #Adding the relationship between the model and featureset - $self->PutR(Encompasses => $LoadingArray[0],$DigestedKey); - #Adding the relationship between FeatureSet and feature - my @PegArray = split(/\+/,$FeatureSet); - foreach my $Feature (@PegArray) { - if ($Feature =~ m/(peg\.\d+)/) { - $self->PutR(IsSetOf => $DigestedKey,"fig|".$Row->{"Genome ID"}->[0].".".$1); - } - } - } - } - $SubLoadingArray[6] = join(";",@{$RxnRow->{"ASSOCIATED PEG"}}); - } - $self->PutR(IsRequiredBy => $RxnRow->{"LOAD"}->[0],$LoadingArray[0], direction => $SubLoadingArray[0], compartment => $SubLoadingArray[1], confidence => $SubLoadingArray[3], class => $SubLoadingArray[4], notes => $SubLoadingArray[5], dependency => $SubLoadingArray[6]); - } - } - #Clearing the model data from memmory - undef $ModelTable; - $model->ClearDBModel($LoadingArray[0],"DELETE"); - } - } - } +=head3 ReadLine + + my @fields = $sl->ReadLine($ih); + +Read a line of data from an input file. + +=over 4 + +=item ih + +Open file handle for the input file. + +=item RETURN + +Returns a list of the field values for the next record in the file. + +=back + +=cut + +sub ReadLine { + # Get the parameters. + my ($self, $ih) = @_; + # Read the line. + my @retVal = Tracer::GetLine($ih); + # Count this record. + $self->Track(records => $retVal[0], 1000); + # Return the data. + return @retVal; +} + + +=head3 CheckReaction + + $sl->CheckReaction($reaction); + +Insure we have created a rectord for the specified reaction. + +=over 4 + +=item reaction + +ID of the reaction in question. + +=back + +=cut + +sub CreateReaction { + # Get the parameters. + my ($self, $reaction) = @_; + # Get the reaction hash. + my $reactionH = $self->{reactions}; + # See if this reaction is new. + if (! $reactionH->{$reaction}) { + # It is, so create it. + $self->PutE(Reaction => $reaction); + # Insure we don't create it again. + $reactionH->{$reaction} = 1; } }