[Bio] / BioSimScratch / loadBss.py Repository:
ViewVC logotype

View of /BioSimScratch/loadBss.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Thu Jul 31 02:59:25 2003 UTC (16 years, 2 months ago) by efrank
Branch: MAIN
CVS Tags: post-st-migration2, mass-spec-01, sc2004-3, mass-spec-02, post-st-migration, forRick-14Jul04-1, forRick-14Jul04-0, sc2004-4, sc04-finalDay, sc2004-2, sc2004-1, sc2004-0, HEAD
Changes since 1.2: +30 -16 lines
sql:
	modify lenght of short name to 128chars
loadBss.py
	modify so that metabolites names can have '-' in them.  really this has to
        do with a hack of looking for "-" as meaning the reaction arrow.  now  look
        for "<->"  "<-" and "->" explicitly

#!/usr/bin/python
#------------------------------------------------------------------------
#
# My first Python program....it's pretty awful!
#
#
#
# class bss
# class Reaction_py
#------------------------------------------------------------------------

import sys
import string
import cx_Oracle

#------------------------------------------------------------------------
class bss:
    "Represents BioSimScratch schema and high level operatioins upon it."
#------------------------------------------------------------------------
#
#    def setReco( RecoName )
#        """Sets the name of the reconstruction being manipulated.  You
#        can only manipulate one Reconstruction per instance.  This
#        method must be called exactly once OR addReco must be called
#        exactly once.
#
#    def addReco( RecoName, topRecoElemName ):
#        """RecoName is the name of the reconstruction and must not already
#        exist in the reco catalog.  topRecoElemName is the name of the top
#        recoelem node.  If it does not exist, it will be created."""
#
#    def addMetabolite( shortName, longName ):
#        "Presently, the combination (short,long) is a key"
#
#    def addREChild( childName, parentName ):
#        "Add a child to parentName, which must exist already."
#
#    def addRx( reName, rxString ):
#        """Add reaction to RecoElem, reName.  rxString, e.g., 
#        enzyme name: A + 2 B <-> D """
#
#    def addEnzymeRole( ECnumber, compoundName):
#        "Binds a compound name to an EC role"
#
#    def dump():
#        "Print self"
#
#    def clear()
#        "Zorch all the structures"
#
#    def writeAndClear():
#        """Want to force no-commit if error.  but then what is diff between
#        bss and the external datastructure?  should I purge my state after
#        the commit?  yes.??"""
#------------------------------------------------------------------------
# Instance data
#------------------------------------------------------------------------
#   self.metabolites[ shortName:string ] =  longName : string
#   self.recos[ recoName:string ]        =  [recoId : int, topNode: string]
#   self.recoElems[ recoElemname:string] =  [myId;int, [kidName:string,..]]
#   self.enzymeRoles[ key : string ]     =
#                         [ ecNumber : string, compName:strng, enzRolId:int]
#   self.reactions  : [ [recoName:string, rx:Reaction_py, rxId:int] ...]
#   self.connection : DBI database connection
#   self.inError    : boolean.  says "don't commit!"
#------------------------------------------------------------------------

    #--------------------------------------------------------------------
    def __init__(self ):
    #--------------------------------------------------------------------
        self.metabolites = {}
        self.recos       = {}
        self.recoElems   = {}
        self.enzymeRoles = {}
        self.reactions   = []
        # self.connection  = 
        self.inError     = 0
        self.recoName    = None

    #--------------------------------------------------------------------
    def dump(self ):
    #--------------------------------------------------------------------
        print "metabs   : ", self.metabolites
        print "recos    : ", self.recos
        print "recoElems: ", self.recoElems
        print "enzRoles : ", self.enzymeRoles
        print "reactions: [ ", 
        for r in self.reactions:
            print "[", r[0], ",",  r[1].origString, ",", r[2], "] ",
        print " ]"
        print "inerror  : ", self.inError
        print "recoName : ", self.recoName

    #--------------------------------------------------------------------
    def clear( self ):
    #--------------------------------------------------------------------
        self.metabolites = {}
        self.recos       = {}
        self.recoElems   = {}
        self.enzymeRoles = {}
        self.reactions   = []
        # self.connection  = 
        self.inError     = 0

        print "bss.clear:  zeroed all data structures"
        return
    #--------------------------------------------------------------------

    #--------------------------------------------------------------------
    def addMetabolite(self,  shortName, longName ):
    #--------------------------------------------------------------------
        "Presently, the combination (short,long) is a key"

        if ( self.metabolites.has_key( shortName ) and not
             self.metabolites[ shortName ] == longName ):
            print "bss.addMetabolite: pair already exists", shortName, longName
            return
            
        self.metabolites[ shortName ] = longName
        return
    #
        

    #--------------------------------------------------------------------
    def setReco(self,  recoName):
    #--------------------------------------------------------------------
        if ( self.recoName != None ):
            print "bss.setReco: recoName already set."
            return

        self.recoName = recoName

    #--------------------------------------------------------------------
    def addReco(self,  recoName, topRecoElemName ):
    #--------------------------------------------------------------------
        if ( self.recoName != None):
            print "bss.addReco: recoName already set."
            return
        
        if ( self.recos.has_key( recoName ) ):
            print "bss.addReco: recoName already exists", recoName, topRecoElemName
            return

        self.recos[recoName] = [None, topRecoElemName]
        self.setReco( recoName )
        
        if ( not self.recoElems.has_key( topRecoElemName ) ):
            self.recoElems[topRecoElemName]=[None, []]
        
        # All reconstructions are parented by a great granddaddy of 'em
        # all called "All Reconstructions"

        self.addREChild( topRecoElemName, "All Reconstructions")

        return
    #

    #--------------------------------------------------------------------
    def addREChild(self,  childName, parentName ):
    #--------------------------------------------------------------------
        if ( self.recoElems.has_key( parentName ) ):
            self.recoElems[parentName][1].append( childName )
        else:
            # allow this in case parent is already in db
            self.recoElems[parentName] = [None, [childName]]

        if ( not self.recoElems.has_key( childName ) ):
            self.recoElems[childName]=[None, []]
        
        return
    #

    #--------------------------------------------------------------------
    def addRx(self,  reName, rxString ):
    #--------------------------------------------------------------------
        if ( not self.recoElems.has_key( reName ) ):
            print "bss.addRx: create RecoElem first", reName, rxString
            return

        self.reactions.append( [reName, Reaction_py(rxString), None])
        return
    
    
    #--------------------------------------------------------------------
    def addEnzymeRole(self,  key, ECnumber, compoundName):
    #--------------------------------------------------------------------
    # the key is present because an EC can be fullfilled my many compounds
    # and a compound can fulfill multiple functions.  so we just punt
    # and use a key to keep track of these relations.
    
        if (self.enzymeRoles.has_key( key )):
            print "bss.addEnzymeRole: (ECnumber,name) already used: ",
            print self.enzymeRoles[ key ]
            return

        # don't knoenzymeRoleIdD yet from DB
        self.enzymeRoles[ key ] = [ ECnumber, compoundName, None ]

        return
    
    #--------------------------------------------------------------------
    def writeAndClear(self ):
    #--------------------------------------------------------------------

       self.connection = cx_Oracle.connect( "biosimscratch/bss@db02")

       # Execute all table modifications, but do not commit.  We
       # may discover errors here, e.g., constraint violation that
       # would mean that data in our structures is either self inconsistent
       # or inconsistent with what is in the DB.  If so, the self.inError
       # will be set and we'll roll back the transaction below....need
       # to learn about python exceptions.

       self.__dbWriteMetabolites()
       self.__dbWriteEnzymeRoles()
       self.__dbWriteRecoElems()
       self.__dbWriteRecos()
       self.__dbWriteReactions()

       if ( self.inError ):
           print "bss.writeAndClear: transaction abandoned"
           self.connection.rollback()
       else:
           print "bss.writeAndClear: commiting transaction"
           self.dump()
           self.connection.commit()
           self.clear()

       self.connection.close()
       return
     
    #--------------------------------------------------------------------
    def __dbWriteMetabolites( self ):
    #--------------------------------------------------------------------
        c = self.connection.cursor()

        for m in self.metabolites.keys():
            try:
                c.execute( """INSERT INTO ECMetabolite
                           VALUES ( :p_Arg_1, :p_Arg_2 ) """,
                           p_Arg_1=m,
                           p_Arg_2=self.metabolites[m] )
            except:
                self.inError = 1
                print "bss.__dbWriteMetabolites: Oracle error writing (",
                print m, " , ",
                print self.metabolites[m], " ) "
                c.close()
                return

        print "bss.__dbWriteMetabolites: metabolites valid"
        self.inError = 0
        c.close()
        return
   
    #---------------------------------------------------------------------
    def __dbWriteEnzymeRoles( self ):
    #---------------------------------------------------------------------
        c = self.connection.cursor()

        for k in self.enzymeRoles.keys():
            (ec, compName, enzRoleId) = self.enzymeRoles[ k ]
            try:
                c.execute( """INSERT INTO ECEnzymeRole
                              VALUES ( EnzymeRoleIdSequence.nextval,
                                       :p_Arg_1, :p_Arg_2 ) """,
                           p_Arg_1=ec,
                           p_Arg_2=compName )
                c.execute( """SELECT EnzymeRoleIdSequence.currval
                              FROM dual""")
                id = int( c.fetchall()[0][0] )
                self.enzymeRoles[ k ] = [ ec, compName, id]
            except:
                self.inError = 1
                print "bss.__dbWriteEnzymeRoles: Oracle error writing (",
                print ec, " , ", compName, " ) "
                c.close()
                return

        print "bss.__dbWriteEnzymeRoles: enzymeRoles valid"
        self.inError = 0
        c.close()
        return
   
    #---------------------------------------------------------------------
    def __dbWriteRxStoichList( self, table, id, stoichList ):
    #---------------------------------------------------------------------
    #  table is oracle table name:  ECRxInput or ECRxOutput
    #  id    is the RxId for the join onto ECReaction
    #
    
        c = self.connection.cursor()

        for (stoich, name) in stoichList:
            try:
                c.execute( "INSERT INTO " + table + 
                           " VALUES ( :p_Arg_1, :p_Arg_2, :p_Arg_3 )",
                           p_Arg_1 = id,
                           p_Arg_2=  int( stoich ),
                           p_Arg_3=  name)
            except:
                self.inError = 1
                print "bss.__dbWriteRxStoichList: Oracle error for:",
                print table, id, stoichList
                c.close()
                return

        print "bss.__dbWriteRxInputs: inputs valid"
        self.inError = 0
        c.close()
        return
   
    #---------------------------------------------------------------------
    def __dbWriteRxCatalyst( self, RxId, enzRoleKey ):
    #---------------------------------------------------------------------

        if (enzRoleKey == None ):
            # some rxns have no associated enzyme info.
            return

        c = self.connection.cursor()

        if ( not self.enzymeRoles.has_key( enzRoleKey )):
            print "bss.__dbWriteRxCatalyst: enzyme not found with key: ",
            print enzRoleKey
            self.inError=1
            c.close()
            return

        (ec, compName, enzRoleId) = self.enzymeRoles[ enzRoleKey ]

        try:
            c.execute( """INSERT INTO ECCatalyst
                          VALUES ( :p_Arg_1, :p_Arg_2 )""",
                           p_Arg_1 = RxId,
                           p_Arg_2 = enzRoleId )
        except:
            self.inError = 1
            print "bss.__dbWriteRxCatalyst: Oracle error for:",
            print id, function
            c.close()
            return

        print "bss.__dbWriteRxCatalyst: inputs valid"
        self.inError = 0
        c.close()
        return


    #---------------------------------------------------------------------
    def __dbAttachReaction( self, rxId, recoElemId ):
    #---------------------------------------------------------------------

        (recoId, topRecoElemName) = self.recos[ self.recoName] 

        if ( None == recoId ):
            print "bss.__dbAttachReaction: no recoId set"
            self.inError=1
            return
        
        c = self.connection.cursor()
        try:
            sqlcmd= "INSERT INTO ReactionsInProcess VALUES ( %d, %d, %d) " %                    ( rxId, recoElemId, recoId)
            c.execute( sqlcmd )
        except:
            self.inError = 1
            print "bss.__dbAttachReaction failed (rx, recoelem, reco) = ",
            print "(%d, %d, %d)" % ( rxId, recoElemId, recoId)
            c.close
            return

        c.close
        return

    #---------------------------------------------------------------------
    def __dbWriteReactions( self ):
    #---------------------------------------------------------------------
        c = self.connection.cursor()

        for r in self.reactions:
            ( recoElemName, rx, id ) = r

            recoElemId = self.recoElems[ recoElemName ][0]

            if (not rx.isValid):
                print "Invalid Rx: ",
                rx.dump()
                self.inError=1
                return
            
            try:
                c.execute( """INSERT INTO ECReactions
                           VALUES ( RxIdSequence.nextval, :p_Arg_1 ) """,
                           p_Arg_1= rx.dir)
                c.execute( """SELECT RxIdSequence.currval FROM dual""")
                rxId = int( c.fetchall()[0][0] )
                r[2] = rxId

                self.__dbAttachReaction( rxId, recoElemId )
                self.__dbWriteRxStoichList( "ECRxInput", rxId, rx.inputs )
                self.__dbWriteRxStoichList( "ECRxOutput", rxId, rx.outputs )
                self.__dbWriteRxCatalyst( rxId, rx.enzyme )
            except:
                self.inError = 1
                print "bss.__dbWriteReactions: Oracle error for:",
                rx.dump()
                c.close()
                return

        print "bss.__dbWriteReactions: enzymeRoles valid"
        self.inError = 0
        c.close()
        return
   
    #---------------------------------------------------------------------
    def __dbWriteRecoElems( self ):
    #---------------------------------------------------------------------
                
        c = self.connection.cursor()

        # First load ProcList with all RecoElem names.  This defines
        # the RecoElem Id's via the database sequence, ProcIdSequence

        for reName in self.recoElems.keys():
            reStruct = self.recoElems[ reName ]
            try:
                c.execute( """INSERT INTO ProcList 
                           VALUES ( :p_Arg_1, 
                           ProcIdSequence.nextval, NULL ) """,
                           p_Arg_1 = reName)
                c.execute( """SELECT ProcIdSequence.currval FROM dual""")
                id = int( c.fetchall()[0][0] )
                reStruct[0] = id

            except:
                self.inError = 1
                print "bss.__dbWriteRecoElems: Oracle error for ProcList:", reStruct
                c.close()
                return
        print "bss.__dbWriteRecoElems: RecoElem names valid"

        # Now build the hierarchy by loading ProcStruct

        for parentName in self.recoElems.keys():
            (parentId, kidList) = self.recoElems[ parentName ]
            for kidName in kidList:
                (kidId, kidskids) = self.recoElems[ kidName ]

                # now have kid, parent pair.  Put it in the DB
                try:
                    c.execute( "INSERT INTO ProcStruct VALUES ('"
                               + parentName + "', :p_Arg_1, '"
                               + kidName    + "', :p_Arg_2 ) ",
                               p_Arg_1 = parentId,
                               p_Arg_2 = kidId)

                except:
                    self.inError = 1
                    print "bss.__dbWriteRecoElems: Oracle error for ProcStruct:", parentName, parentId, kidName, kidId
                    c.close()
                    return
        print "bss.__dbWriteRecoElems: ProcStruct names valid"

        self.inError = 0
        c.close()
        return

    #---------------------------------------------------------------------
    def __dbWriteRecos( self ):
    #---------------------------------------------------------------------
#   self.recos[ recoName:string ]        =  [recoId : int, topNodeId:int]

        c = self.connection.cursor()

        # Enter the reconstructions into the Reco Catalog
        for recoName in self.recos.keys():
            try:
                c.execute( """INSERT INTO RecoCatalogEntry 
                           VALUES ( :p_Arg_1, 'No Currator', 
                           RecoIdSequence.nextval ) """,
                           p_Arg_1 = recoName)
                c.execute( """SELECT RecoIdSequence.currval FROM dual""")
                recoId = int( c.fetchall()[0][0] )
                self.recos[recoName][0]=recoId

                # Attach the top node of reconstruction.
                topNodeName = self.recos[recoName][1]
                topNodeId   = self.recoElems[topNodeName][0]
                c.execute( """INSERT INTO ProcessInReco
                            VALUES ( :p_Arg_1, :p_Arg_2 ) """,
                           p_Arg_1 = recoId,
                           p_Arg_2 = topNodeId )
            except:
                self.inError = 1
                print "bss.__dbWriteRecos: Oracle error for Catalog:", recoName
                c.close()
                return
        print "bss.__dbWriteRecos: RecoCatalog and ProcessInReco valid"

        return

# end class bss
#------------------------------------------------------------------------
    



#------------------------------------------------------------------------
class Reaction_py:

#------------------------------------------------------------------------
# ..I called it "enzyme" but its really "function" i.e., EC
#
#  def __init__( rxAsString )
#  def dump()
#  self.isValid    : boolean
#  self.origString :  
#  self.dir        : "forward"  "backward" or "both"
#  self.enzyme     : string
#  self.inputs     : [ (stoich:int, name:string)...]
#  self.outputs    : [ (stoich:int, name:string)...]
#--
#  implementaiton
#  def parseRx( l ):
#  def getDirection( halves ):
#  def getEnz( lhs_str ):
#  def  rxBreaker( lhs_raw ):
#  def readReactions( rxList ):
#
#------------------------------------------------------------------------

    #--------------------------------------------------------------------
    def parseRx(self, l ):
    #--------------------------------------------------------------------
    #
    #  EnzymeName: A + B -> C      Variants: ->, <-, <->
    #  3 A + B -> C
    #
    #  3*A + B -> C     Forbid
    #  NAD+ + H -> NADH Forbid... no "+" in metab name.
    #  A   B -> C,      Forbid
    #  A + B -C = 0     Forbid
    #  3A + B -> C      Forbid...interpret "3A" as metabolite name?
    #  
    #
    # Notes:
    #   o there is at most one colon, if an enzyme name is given.
    #   o rx  = (enzyme, dir, lhs, rhs)
    #   o lhs = ( (stoich, metab), ... )
    #
    #

#        halves = string.split( string.lstrip( string.rstrip(l) ), '-' )
#        nDash = string.count( l, "-" )
#        if ( 0 == nDash):
#            self.isValid  = 0
#            print "No reaction arrow in :", l
#            return ()
#        elif ( 1 != nDash ):
#            self.isValid  = 0
#            print "Format error. Too many -'s in:", l
#            return ()
#        (self.dir, lhs_raw, rhs_raw) = self.getDirection( halves )
             
        l=string.lstrip( string.rstrip( l ) )

        halves = string.split( l, '<->' )
        if ( len( halves) == 2 ):
            (self.dir, lhs_raw, rhs_raw) = ("both", halves[0], halves[1])
        else:
            halves = string.split( l, '<-' )
            if ( len( halves) == 2 ):
                (self.dir, lhs_raw, rhs_raw) = ("backward", halves[0], halves[1])
            else:
                halves = string.split( l, '->' )
                if ( len( halves) == 2 ):
                    (self.dir, lhs_raw, rhs_raw) = ("forward", halves[0], halves[1])
                else:
                    print "messed up direction"
                    self.isValid=0
                    return


        (self.enzyme, lhs_raw) = self.getEnz( lhs_raw )
        if (not self.isValid):
            return
    
        self.inputs  = self.rxBreaker( lhs_raw )
        self.outputs = self.rxBreaker( rhs_raw )
    
        return

    #---------------------------------------------------------------------
    def __init__ ( self, rxAsString ):
    #---------------------------------------------------------------------
        self.origString = rxAsString
        self.isValid  = 1
        self.dir      = None
        self.enzyme   = None
        self.inputs   = None
        self.outputs  = None


        self.parseRx( rxAsString )
        return
    

    #---------------------------------------------------------------------
    def getDirection( self, halves ):
    #---------------------------------------------------------------------

        back = 0
        fwd  = 0
    
        if ( halves[0][-1] == "<" ):
            back = 1
            lhs = halves[0][:-1]
        else:
            lhs = halves[0]
            
        if ( halves[1][0] == ">" ):
            fwd = 1
            rhs = halves[1][1:]
        else:
            rhs = halves[1]
                
        if ( fwd and back ):
            dir = "both"
        elif ( fwd ):
            dir = "forward"
        elif ( back):
            dir = "backward"
        else:
            self.isValid  = 0
            dir=None
            print "Reaction has no arrow:", self.origString
                    
        return (dir, lhs, rhs)
                
    #--------------------------------------------------------------------
    def getEnz( self, lhs_str ):
    #--------------------------------------------------------------------

        nColon = string.count( lhs_str, ":" )
        if ( 0 == nColon ):
            return ( None, lhs_str)
        elif ( 1 == nColon ):
            s = string.split( lhs_str, ":" )
            return ( s[0], s[1])
        else:
            self.isValid  = 0
            print "Too many colons ", lhs_str
        return
    
    #--------------------------------------------------------------------
    def  rxBreaker( self, lhs_raw ):
    #--------------------------------------------------------------------

        r = []
        terms = string.split( lhs_raw, "+" )
        for term in terms:
            facts = string.split( string.lstrip( string.rstrip( term ) ), " ")
            nfacts = len( facts )
            if ( 0 == nfacts ):
                continue
            elif ( 1 == nfacts ):
                r.append( ( 1, facts[0] ) )
            elif ( 2 == nfacts ):
                r.append( ( facts[0], facts[1] ) )
            else:
                print "Format error in rxBreaker: ", lhs_raw
                self.isValid  = 0
                return []
            
        return r

    #--------------------------------------------------------------------
    def  dump( self ):
    #--------------------------------------------------------------------
        if ( not self.isValid ):
            print "( Invalid, ",
        else:
            print "( Valid, ",

        print self.dir, ",", self.enzyme, ",", self.inputs, ",",
        print self.outputs, ")"

# end class Reaction_py
    


#------------------------------------------------------------------------
def testBss():
#------------------------------------------------------------------------
    b=bss()
    b.addMetabolite( "A", "aaa")
    b.addMetabolite( "B", "bbb")
    b.addMetabolite( "C", "ccc")
    b.addMetabolite( "D", "ddd")

    b.addReco( "myReco", "myRecoTop")

    b.addREChild( "kid1", "myRecoTop" )
    b.addREChild( "kid2", "myRecoTop" )

    b.addEnzymeRole( "E.C. 1.2.3.4", "A transmogriphase" )
    b.addEnzymeRole( "E.C. 1.2.3.5", "C transmogriphase" )
#    b.addEnzymeRole( '2.7.1.2',	'Glucokinase')

    b.addRx( "kid1", "E.C. 1.2.3.4: A -> B" )
    b.addRx( "kid1", "E.C. 1.2.3.5: C -> D" )

    return b

#------------------------------------------------------------------------
def readReactions( rxList ):
#------------------------------------------------------------------------

    nLine = 0
    for l in sys.stdin:
        nLine = nLine + 1

        rx = Reaction_py( l[:-1] )
        rxList.append( rx )

    return

#------------------------------------------------------------------------
def main():
#------------------------------------------------------------------------
    print "hello"

    rxList = []
    readReactions( rxList )

    for r in rxList:
        r.dump()
    print "done"


#------------------------------------------------------------------------
if __name__ == "__main__":
    main()

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3