[Bio] / Sprout / ModelSaplingLoader.pm Repository:
ViewVC logotype

View of /Sprout/ModelSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (download) (as text) (annotate)
Mon Jan 24 03:52:18 2011 UTC (8 years, 8 months ago) by parrello
Branch: MAIN
Changes since 1.11: +0 -287 lines
More loader fixes.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
use FIGMODEL;

package ModelSaplingLoader;
    use strict;
    use Tracer;
    use ERDB;
    use HTTP::Date;
    use base 'BaseSaplingLoader';

=head1 Sapling ModelLoader Load Group Class

=head2 Introduction

The ModelLoader Load Group includes all of the major mdl tables.

=head3 new

    my $sl = ModelLoaderSaplingLoader->new($erdb, $options, @tables);

Construct a new ModelLoaderSaplingLoader object.

=over 4

=item erdb

L<Sapling> object for the database being loaded.

=item options

Reference to a hash of command-line options.

=item tables

List of tables in this load group.

=back

=cut

sub new {
    # Get the parameters.
    my ($class, $erdb, $options) = @_;
    # Create the table list.
    my @tables = sort qw(StructuralCue IsFoundIn Compound HasAliasOf AliasType
                         HasReactionAliasOf Reaction Involves IsTriggeredBy
                         RoleSet IsCombinationOf IsCategorizedInto EcNumber
                         IsConsistentWith IsStimulatedBy  FeatureSet IsSetOf
                         IsRequiredBy Model IsModeledBy ModelUser); # Encompasses);
    # Create the BaseSaplingLoader object.
    my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
    # Return it.
    return $retVal;
}

=head2 Public Methods

=head3 Generate

    $sl->Generate();

Generate the data for the mdl files.

=cut

sub Generate {
    # Get the parameters.
    my ($self) = @_;
    # Get the database object.
    my $erdb = $self->db();
    # Get the source object.
    my $fig = $self->source();
    # Is this the global section?
    my $model = new FIGMODEL;
    if ($self->global()) {
        #Loading the compound entities and relationships
        $self->LoadCompounds($model);
        #Loading the reaction, and EC number entities and relationships
        $self->LoadReactions($model);
    } else {
        # Get the section ID.
        my $genomeID = $self->section();
        #NO GENOME SPECIFIC MODEL STUFF
    }
}

sub LoadCompounds {
    my ($self,$model) = @_;

    #First getting the compound table which has a list of all compounds in the database as well as alot of compound information
    my $CompoundTable = $model->database()->GetDBTable("COMPOUNDS");

    #Adding the universal AliasType that will always exist
    $self->PutE(AliasType => "NAME", source => "NONE");
    $self->PutE(AliasType => "SEARCHNAME", source => "NONE");
    $self->PutE(AliasType => "KEGG", source => "http://www.kegg.com/");
    $self->PutE(AliasType => "ARGONNE", source => "OBSOLETE IDs");

    #Cycling through the compound list and adding each individual compound entity
    for (my $i=0; $i < $CompoundTable->size(); $i++) {
        my $Row = $CompoundTable->get_row($i);
        if (defined($Row) && defined($Row->{"DATABASE"}->[0]) && $Row->{"DATABASE"}->[0] =~ m/cpd\d\d\d\d\d/) {
            my $Object = FIGMODELObject->load($model->{"compound directory"}->[0].$Row->{"DATABASE"}->[0],"\t");
            my @LoadingArray = ($Row->{"DATABASE"}->[0],$Row->{"DATABASE"}->[0],0,"",0,"",0,10000000,10000000,"");
            $self->Track(compoundRow => $LoadingArray[0], 1000);
            if (defined($Row->{"NAME"}->[0])) {
                $LoadingArray[1] = $Row->{"NAME"}->[0];
            }
            if (defined($Object->{"MASS"}->[0])) {
                $LoadingArray[2] = $Object->{"MASS"}->[0];
            }
            if (defined($Object->{"FORMULA"}->[0])) {
                $LoadingArray[3] = $Object->{"FORMULA"}->[0];
            }
            if (defined($Object->{"CHARGE"}->[0])) {
                $LoadingArray[4] = $Object->{"CHARGE"}->[0];
            }
            if (defined($Object->{"CHARGE"}->[0]) && defined($Object->{"FORMULA"}->[0])) {
                $LoadingArray[5] = FIGMODEL::ConvertToNeutralFormula($Object->{"FORMULA"}->[0],$Object->{"CHARGE"}->[0]);
            }
            if (-e $model->{"Argonne molfile directory"}->[0].$Row->{"DATABASE"}->[0].".mol" || -e $model->{"Argonne molfile directory"}->[0]."pH7/".$Row->{"DATABASE"}->[0].".mol") {
                $LoadingArray[6] = 1;
            }
            if (defined($Object->{"DELTAG"}->[0])) {
                $LoadingArray[7] = $Object->{"DELTAG"}->[0];
            }
            if (defined($Object->{"DELTAGERR"}->[0])) {
                $LoadingArray[8] = $Object->{"DELTAGERR"}->[0];
            }
            if (defined($Row->{"STRINGCODE"}->[0])) {
                $LoadingArray[9] = $Row->{"STRINGCODE"}->[0];
            }
            $self->PutE(Compound => $LoadingArray[0], label => $LoadingArray[1], mass => $LoadingArray[2], chargedFormula => $LoadingArray[3], charge => $LoadingArray[4],neutralFormula => $LoadingArray[5], hasMolfile => $LoadingArray[6], deltaG => $LoadingArray[7], deltaGerr => $LoadingArray[8], stringcode => $LoadingArray[9]);
            #Adding the relationship between compound and structural cue
            if (defined($Object->{"STRUCTURAL_CUES"})) {
                foreach my $Cue (@{$Object->{"STRUCTURAL_CUES"}}) {
                    my @TempArray = split(/:/,$Cue);
                    if (@TempArray == 2) {
                        $self->PutR(IsFoundIn => $TempArray[0],$LoadingArray[0],count => $TempArray[1]);
                    }
                }
            }
            #Adding the relationship between compound and alias type
            if (defined($Object->{"NAME"})) {
                foreach my $Name (@{$Object->{"NAME"}}) {
                    $self->PutR(HasAliasOf => $LoadingArray[0],"NAME",alias => $Name);
                    my @SearchNames = FIGMODEL::ConvertToSearchNames($Name);
                    foreach my $SearchName (@SearchNames) {
                        $self->PutR(HasAliasOf => $LoadingArray[0],"SEARCHNAME",alias => $SearchName);
                    }
                }
            }
            if (defined($Object->{"DBLINKS"})) {
                foreach my $Link (@{$Object->{"DBLINKS"}}) {
                    my @TempArray = split(/:/,$Link);
                    if (@TempArray >= 2) {
                        if ($TempArray[0] ne "KEGG" && $TempArray[0] ne "ARGONNE") {
                            $self->PutE(AliasType => $TempArray[0], source => "PUBLISHED MODEL");
                        }
                        if ($TempArray[0] eq "ARGONNE") {
                            splice(@TempArray,1,1);
                        }
                        for (my $j=1; $j < @TempArray; $j++) {
                            $self->PutR(HasAliasOf => $LoadingArray[0],$TempArray[0],alias => $TempArray[$j]);
                        }
                    }
                }
            }
        }
    }
}

sub LoadReactions {
    my ($self,$model) = @_;
    #First getting the reaction table which has a list of all reactions in the database as well as alot of reaction information
    my $ReactionTable = $model->database()->GetDBTable("REACTIONS");

    #Cycling through the compound list and adding each individual compound entity
    my $Count = 0;
    for (my $i=0; $i < $ReactionTable->size(); $i++) {
        my $Row = $ReactionTable->get_row($i);
        if (defined($Row) && defined($Row->{"DATABASE"}->[0]) && $Row->{"DATABASE"}->[0] =~ m/rxn\d\d\d\d\d/) {
            my $Object = FIGMODELObject->load($model->{"reaction directory"}->[0].$Row->{"DATABASE"}->[0],"\t");
            my @LoadingArray = ($Row->{"DATABASE"}->[0],"","","","","",10000000,10000000,10000000,"R");
            $self->Track(reactionRow => $LoadingArray[0], 1000);
            if (defined($Object->{"DEFINITION"}->[0])) {
                $LoadingArray[1] = $Object->{"DEFINITION"}->[0];
            }
            if (defined($Object->{"EQUATION"}->[0])) {
                my ($Direction,$Equation,$ReverseEquation,$FullEquation,$EquationCompartment,$Error) = $model->ConvertEquationToCode($Object->{"EQUATION"}->[0],{"Test" => "Test"});
                $LoadingArray[2] = $Equation;
            }
            if (defined($Object->{"OPERATOR"}->[0])) {
                $LoadingArray[3] = $Object->{"OPERATOR"}->[0];
            }
            if (defined($Object->{"PATHWAY"}->[0])) {
                $LoadingArray[4] = join(";",@{$Object->{"PATHWAY"}});
            }
            if (defined($Row->{"KEGG MAPS"}->[0])) {
                $LoadingArray[5] = join(";",@{$Row->{"KEGG MAPS"}});
            }
            if (defined($Object->{"DELTAG"}->[0])) {
                $LoadingArray[6] = $Object->{"DELTAG"}->[0];
            }
            if (defined($Object->{"MMDELTAG"}->[0])) {
                $LoadingArray[7] = $Object->{"MMDELTAG"}->[0];
            }
            if (defined($Object->{"DELTAGERR"}->[0])) {
                $LoadingArray[8] = $Object->{"DELTAGERR"}->[0];
            }
            if (defined($Object->{"THERMODYNAMIC REVERSIBILITY"}->[0])) {
                my $code = $Object->{"THERMODYNAMIC REVERSIBILITY"}->[0];
                if ($code eq '<=>') {
                    $LoadingArray[9] = "R";
                } elsif ($code eq '<=') {
                    $LoadingArray[9] = "B";
                } elsif ($code eq '=>') {
                    $LoadingArray[9] = "F";
                }
            }
            $self->PutE(Reaction => $LoadingArray[0], definition => $LoadingArray[1], code => $LoadingArray[2], operator => $LoadingArray[3], pathway => $LoadingArray[4],keggMap => $LoadingArray[5], deltaG => $LoadingArray[6], MMdeltaG => $LoadingArray[7], deltaGerr => $LoadingArray[8], predictedReversibility => $LoadingArray[9]);
            #Adding relationship between compounds and reactions
            my ($Reactants,$Products) = $model->GetReactionSubstrateData($LoadingArray[0]);
            if (! defined $Reactants) {
                $self->Add(missingReactants => 1);
            } else {
                for (my $j=0; $j < @{$Reactants}; $j++) {
                    $self->PutR(Involves => $LoadingArray[0],$Reactants->[$j]->{"DATABASE"}->[0],product => 0,stoichiometry => $Reactants->[$j]->{"COEFFICIENT"}->[0],main => 1,compartment => $Reactants->[$j]->{"COMPARTMENT"}->[0],discriminator => $Count);
                    #TODO:Properly identify main reactants in reactions
                    $Count++;
                }
            }
            if (! defined $Products) {
                $self->Add(missingProducts => 1);
            } else {
                for (my $j=0; $j < @{$Products}; $j++) {
                    $self->PutR(Involves => $LoadingArray[0],$Products->[$j]->{"DATABASE"}->[0],product => 1,stoichiometry => $Products->[$j]->{"COEFFICIENT"}->[0],main => 1,compartment => $Products->[$j]->{"COMPARTMENT"}->[0],discriminator => $Count);
                    #TODO:Properly identify main reactants in reactions
                    $Count++;
                }
            }
            #Adding relationship between EC number and reactions
            if (defined($Object->{"ENZYME"})) {
                foreach my $Enzyme (@{$Object->{"ENZYME"}}) {
                   if ($Enzyme =~ /,/) {
                    #TODO:Invalid enzyme
                   } else {
                    $Enzyme =~ s/\s//g;
                    $self->PutE(EcNumber => $Enzyme, obsolete => 0, replacedby => "NONE");
                    $self->PutR(IsCategorizedInto => $LoadingArray[0],$Enzyme,source => "KEGG");
                   }
                }
            }
            #Adding the relationship between reaction and alias type
            if (defined($Object->{"NAME"})) {
                foreach my $Name (@{$Object->{"NAME"}}) {
                    $self->PutR(HasReactionAliasOf => $LoadingArray[0],"NAME",alias => $Name);
                }
            }
            if (defined($Object->{"DBLINKS"})) {
                foreach my $Link (@{$Object->{"DBLINKS"}}) {
                    my @TempArray = split(/:/,$Link);
                    if (@TempArray >= 2) {
                        if ($TempArray[0] ne "KEGG" && $TempArray[0] ne "ARGONNE") {
                            $self->PutE(AliasType => $TempArray[0], source => "PUBLISHED MODEL");
                        }
                        if ($TempArray[0] eq "ARGONNE") {
                            splice(@TempArray,1,1);
                        }
                        for (my $j=1; $j < @TempArray; $j++) {
                            $self->PutR(HasReactionAliasOf => $LoadingArray[0],$TempArray[0],alias => $TempArray[$j]);
                        }
                    }
                }
            }
        }
    }
}


sub FixDate {
    my ($date) = @_;
    my $retVal = ($date eq 'NA' ? 0 : $date);
    return $retVal;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3