[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Wed Oct 12 03:17:58 2005 UTC revision 1.21, Sat Nov 12 03:42:48 2005 UTC
# Line 600  Line 600 
600    
601      Subsystem      Subsystem
602      Role      Role
603        RoleEC
604      SSCell      SSCell
605      ContainsFeature      ContainsFeature
606      IsGenomeOf      IsGenomeOf
# Line 607  Line 608 
608      OccursInSubsystem      OccursInSubsystem
609      ParticipatesIn      ParticipatesIn
610      HasSSCell      HasSSCell
     Catalyzes  
     Reaction  
611      ConsistsOfRoles      ConsistsOfRoles
612      RoleSubset      RoleSubset
613      HasRoleSubset      HasRoleSubset
614      ConsistsOfGenomes      ConsistsOfGenomes
615      GenomeSubset      GenomeSubset
616      HasGenomeSubset      HasGenomeSubset
617        Catalyzes
618        Diagram
619        RoleOccursIn
620    
621  =over 4  =over 4
622    
# Line 624  Line 626 
626    
627  =back  =back
628    
 B<TO DO>  
   
 Generate RoleName table?  
   
629  =cut  =cut
630  #: Return Type $%;  #: Return Type $%;
631  sub LoadSubsystemData {  sub LoadSubsystemData {
# Line 644  Line 642 
642      my $subsysCount = @subsysIDs;      my $subsysCount = @subsysIDs;
643      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
644      my $featureCount = $genomeCount * 4000;      my $featureCount = $genomeCount * 4000;
645        # Get the map list.
646        my @maps = $fig->all_maps;
647        my $mapCount = @maps;
648      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
649        my $loadDiagram = $self->_TableLoader('Diagram', $mapCount);
650        my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $featureCount * 6);
651      my $loadSubsystem = $self->_TableLoader('Subsystem', $subsysCount);      my $loadSubsystem = $self->_TableLoader('Subsystem', $subsysCount);
652      my $loadRole = $self->_TableLoader('Role', $featureCount * 6);      my $loadRole = $self->_TableLoader('Role', $featureCount * 6);
653        my $loadRoleEC = $self->_TableLoader('RoleEC', $featureCount * 6);
654        my $loadCatalyzes = $self->_TableLoader('Catalyzes', $genomeCount * $featureCount);
655      my $loadSSCell = $self->_TableLoader('SSCell', $featureCount * $genomeCount);      my $loadSSCell = $self->_TableLoader('SSCell', $featureCount * $genomeCount);
656      my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $featureCount * $subsysCount);      my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $featureCount * $subsysCount);
657      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $featureCount * $genomeCount);      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $featureCount * $genomeCount);
# Line 654  Line 659 
659      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $featureCount * 6);      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $featureCount * 6);
660      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $subsysCount * $genomeCount);      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $subsysCount * $genomeCount);
661      my $loadHasSSCell = $self->_TableLoader('HasSSCell', $featureCount * $genomeCount);      my $loadHasSSCell = $self->_TableLoader('HasSSCell', $featureCount * $genomeCount);
     my $loadReaction = $self->_TableLoader('Reaction', $featureCount * $genomeCount);  
     my $loadCatalyzes = $self->_TableLoader('Catalyzes', $featureCount * $genomeCount);  
662      my $loadRoleSubset = $self->_TableLoader('RoleSubset', $subsysCount * 50);      my $loadRoleSubset = $self->_TableLoader('RoleSubset', $subsysCount * 50);
663      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $subsysCount * 50);      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $subsysCount * 50);
664      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $featureCount * $genomeCount);      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $featureCount * $genomeCount);
665      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $featureCount * $genomeCount);      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $featureCount * $genomeCount);
666      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $subsysCount * 50);      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $subsysCount * 50);
667      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $subsysCount * 50);      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $subsysCount * 50);
668        # Create load objects for each of the tables we're loading.
669      Trace("Beginning subsystem data load.") if T(2);      Trace("Beginning subsystem data load.") if T(2);
670      # The reaction hash will contain a list of reactions for each role. When we're done,      # This hash will contain the role for each EC. When we're done, this
671      # a complicated sort and merge will be used to generate the Reaction and Catalyzes      # information will be used to generate the Catalyzes table.
672      # tables.      my %ecToRoles = ();
     my %reactionsToRoles = ();  
673      # Loop through the subsystems. Our first task will be to create the      # Loop through the subsystems. Our first task will be to create the
674      # roles. We do this by looping through the subsystems and creating a      # roles. We do this by looping through the subsystems and creating a
675      # role hash. The hash tracks each role ID so that we don't create      # role hash. The hash tracks each role ID so that we don't create
# Line 679  Line 682 
682          $loadSubsystem->Add("subsystemIn");          $loadSubsystem->Add("subsystemIn");
683          # Get the subsystem object.          # Get the subsystem object.
684          my $sub = $fig->get_subsystem($subsysID);          my $sub = $fig->get_subsystem($subsysID);
         # Get its reaction hash.  
         my $reactionHash = $sub->get_reactions();  
685          # Create the subsystem record.          # Create the subsystem record.
686          my $curator = $sub->get_curator();          my $curator = $sub->get_curator();
687          my $notes = $sub->get_notes();          my $notes = $sub->get_notes();
# Line 697  Line 698 
698                  # Add the role.                  # Add the role.
699                  $loadRole->Put($roleID, $abbr);                  $loadRole->Put($roleID, $abbr);
700                  $roleData{$roleID} = 1;                  $roleData{$roleID} = 1;
701                  # Add the role's reactions.                  # Check for an EC number.
702                  my $reactions = $reactionHash->{$roleID};                  if ($roleID =~ /\(EC ([^.]+\.[^.]+\.[^.]+\.[^)]+)\)\s*$/) {
703                  for my $reactionID (@{$reactions}) {                      my $ec = $1;
704                      if (! exists $reactionsToRoles{$reactionID}) {                      $loadRoleEC->Put($roleID, $ec);
705                          # Here the reaction is brand-new, so we create its reaction                      $ecToRoles{$ec} = $roleID;
                         # record.  
                         $loadReaction->Put($reactionID, $fig->reversible($reactionID));  
                         # We also create a blank list for it in the reaction hash.  
                         $reactionsToRoles{$reactionID} = [];  
                     }  
                     # Add the role to the reaction's role list.  
                     push @{$reactionsToRoles{$reactionID}}, $roleID;  
706                  }                  }
707              }              }
708          }          }
# Line 808  Line 802 
802              }              }
803          }          }
804      }      }
805      # Before we leave, we must create the Catalyzes table. The data is all stored in      # Now we loop through the diagrams. We need to create the diagram records
806      # "reactionToRoles" hash.      # and link each diagram to its roles. Note that only roles which occur
807      for my $reactionID (keys %reactionsToRoles) {      # in subsystems (and therefore appear in the %ecToRoles hash) are
808          # Get this reaction's list of roles. We sort it so we can merge out duplicates.      # included.
809          my @roles = sort @{$reactionsToRoles{$reactionID}};      for my $map (@maps) {
         my $lastRole = "";  
         # Loop through the roles, creating catalyzation records.  
         for my $thisRole (@roles) {  
             if ($thisRole ne $lastRole) {  
                 $loadCatalyzes->Put($thisRole, $reactionID);  
             }  
         }  
     }  
     # Finish the load.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
 =head3 LoadDiagramData  
   
 C<< my $stats = $spl->LoadDiagramData(); >>  
   
 Load the diagram data from FIG into Sprout.  
   
 Diagrams are used to organize functional roles. The diagram shows the  
 connections between chemicals that interact with a subsystem.  
   
 The following relations are loaded by this method.  
   
     Diagram  
     RoleOccursIn  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadDiagramData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the map list.  
     my @maps = $fig->all_maps;  
     my $mapCount = @maps;  
     my $genomeCount = (keys %{$self->{genomes}});  
     my $featureCount = $genomeCount * 4000;  
     # Create load objects for each of the tables we're loading.  
     my $loadDiagram = $self->_TableLoader('Diagram', $mapCount);  
     my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $featureCount * 6);  
     Trace("Beginning diagram data load.") if T(2);  
     # Loop through the diagrams.  
     for my $map ($fig->all_maps) {  
810          Trace("Loading diagram $map.") if T(3);          Trace("Loading diagram $map.") if T(3);
811          # Get the diagram's descriptive name.          # Get the diagram's descriptive name.
812          my $name = $fig->map_name($map);          my $name = $fig->map_name($map);
# Line 874  Line 815 
815          # A hash is used to prevent duplicates.          # A hash is used to prevent duplicates.
816          my %roleHash = ();          my %roleHash = ();
817          for my $role ($fig->map_to_ecs($map)) {          for my $role ($fig->map_to_ecs($map)) {
818              if (! $roleHash{$role}) {              if (exists $ecToRoles{$role} && ! $roleHash{$role}) {
819                  $loadRoleOccursIn->Put($role, $map);                  $loadRoleOccursIn->Put($ecToRoles{$role}, $map);
820                  $roleHash{$role} = 1;                  $roleHash{$role} = 1;
821              }              }
822          }          }
823      }      }
824        # Before we leave, we must create the Catalyzes table. We start with the reactions,
825        # then use the "ecToRoles" table to convert EC numbers to role IDs.
826        my @reactions = $fig->all_reactions();
827        for my $reactionID (@reactions) {
828            # Get this reaction's list of roles. The results will be EC numbers.
829            my @roles = $fig->catalyzed_by($reactionID);
830            # Loop through the roles, creating catalyzation records.
831            for my $thisRole (@roles) {
832                if (exists $ecToRoles{$thisRole}) {
833                    $loadCatalyzes->Put($ecToRoles{$thisRole}, $reactionID);
834                }
835            }
836        }
837      # Finish the load.      # Finish the load.
838      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
839      return $retVal;      return $retVal;
# Line 1254  Line 1208 
1208    
1209  The following relations are loaded by this method.  The following relations are loaded by this method.
1210    
1211        Reaction
1212      ReactionURL      ReactionURL
1213      Compound      Compound
1214      CompoundName      CompoundName
# Line 1281  Line 1236 
1236      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1237      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
1238      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1239        my $loadReaction = $self->_TableLoader('Reaction', $genomeCount * 4000);
1240      my $loadReactionURL = $self->_TableLoader('ReactionURL', $genomeCount * 4000);      my $loadReactionURL = $self->_TableLoader('ReactionURL', $genomeCount * 4000);
1241      my $loadCompound = $self->_TableLoader('Compound', $genomeCount * 4000);      my $loadCompound = $self->_TableLoader('Compound', $genomeCount * 4000);
1242      my $loadCompoundName = $self->_TableLoader('CompoundName', $genomeCount * 8000);      my $loadCompoundName = $self->_TableLoader('CompoundName', $genomeCount * 8000);
1243      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $genomeCount * 4000);      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $genomeCount * 4000);
1244      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $genomeCount * 12000);      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $genomeCount * 12000);
1245      Trace("Beginning reaction/compound data load.") if T(2);      Trace("Beginning reaction/compound data load.") if T(2);
1246      # Create a hash to remember the compounds we've generated in the compound table.      # First we create the compounds.
1247      my %compoundHash = ();      my @compounds = $fig->all_compounds();
1248      # Loop through the reactions.      for my $cid (@compounds) {
1249            # Check for names.
1250            my @names = $fig->names_of_compound($cid);
1251            # Each name will be given a priority number, starting with 1.
1252            my $prio = 1;
1253            for my $name (@names) {
1254                $loadCompoundName->Put($cid, $name, $prio++);
1255            }
1256            # Create the main compound record. Note that the first name
1257            # becomes the label.
1258            my $label = (@names > 0 ? $names[0] : $cid);
1259            $loadCompound->Put($cid, $label);
1260            # Check for a CAS ID.
1261            my $cas = $fig->cas($cid);
1262            if ($cas) {
1263                $loadCompoundCAS->Put($cid, $cas);
1264            }
1265        }
1266        # All the compounds are set up, so we need to loop through the reactions next. First,
1267        # we initialize the discriminator index. This is a single integer used to insure
1268        # duplicate elements in a reaction are not accidentally collapsed.
1269        my $discrim = 0;
1270      my @reactions = $fig->all_reactions();      my @reactions = $fig->all_reactions();
1271      for my $reactionID (@reactions) {      for my $reactionID (@reactions) {
1272            # Create the reaction record.
1273            $loadReaction->Put($reactionID, $fig->reversible($reactionID));
1274          # Compute the reaction's URL.          # Compute the reaction's URL.
1275          my $url = HTML::reaction_link($reactionID);          my $url = HTML::reaction_link($reactionID);
1276          # Put it in the ReactionURL table.          # Put it in the ReactionURL table.
# Line 1300  Line 1279 
1279          # substrates first and then products.          # substrates first and then products.
1280          for my $product (0, 1) {          for my $product (0, 1) {
1281              # Get the compounds of the current type for the current reaction. FIG will              # Get the compounds of the current type for the current reaction. FIG will
1282              # give us 3-tuples: [ID, Stoichometry, main-flag]. At this time we do not              # give us 3-tuples: [ID, stoichiometry, main-flag]. At this time we do not
1283              # have location data in SEED, so it defaults to the empty string.              # have location data in SEED, so it defaults to the empty string.
1284              my @compounds = $fig->reaction2comp($reactionID, $product);              my @compounds = $fig->reaction2comp($reactionID, $product);
1285              for my $compData (@compounds) {              for my $compData (@compounds) {
1286                  # Extract the compound data from the current tuple.                  # Extract the compound data from the current tuple.
1287                  my ($cid, $stoich, $main) = @{$compData};                  my ($cid, $stoich, $main) = @{$compData};
1288                  # Link the compound to the reaction.                  # Link the compound to the reaction.
1289                  $loadIsAComponentOf->Put($cid, $reactionID, "", $main, $product, $stoich);                  $loadIsAComponentOf->Put($cid, $reactionID, $discrim++, "", $main,
1290                  # If this is a new compound, we need to create its table entries.                                           $product, $stoich);
                 if (! exists $compoundHash{$cid}) {  
                     $compoundHash{$cid} = 1;  
                     # Create the main compound record and denote we've done it.  
                     $loadCompound->Put($cid);  
                     # Check for a CAS ID.  
                     my $cas = $fig->cas($cid);  
                     if ($cas) {  
                         $loadCompoundCAS->Put($cid, $cas);  
                     }  
                     # Check for names.  
                     my @names = $fig->names_of_compound($cid);  
                     # Each name will be given a priority number, starting with 1.  
                     my $prio = 0;  
                     for my $name (@names) {  
                         $loadCompoundName->Put($cid, $name, $prio++);  
                     }  
                 }  
1291              }              }
1292          }          }
1293      }      }
# Line 1457  Line 1419 
1419      # Loop through the list, finishing the loads. Note that if the finish fails, we die      # Loop through the list, finishing the loads. Note that if the finish fails, we die
1420      # ignominiously. At some future point, we want to make the loads restartable.      # ignominiously. At some future point, we want to make the loads restartable.
1421      while (my $loader = pop @{$loadList}) {      while (my $loader = pop @{$loadList}) {
1422            # Trace the fact that we're cleaning up.
1423            my $relName = $loader->RelName;
1424            Trace("Finishing load for $relName.") if T(2);
1425          my $stats = $loader->Finish();          my $stats = $loader->Finish();
1426            if ($self->{options}->{dbLoad}) {
1427                # Here we want to use the load file just created to load the database.
1428                Trace("Loading relation $relName.") if T(2);
1429                my $newStats = $self->{sprout}->LoadUpdate(1, [$relName]);
1430                # Accumulate the statistics from the DB load.
1431                $stats->Accumulate($newStats);
1432            }
1433          $retVal->Accumulate($stats);          $retVal->Accumulate($stats);
         my $relName = $loader->RelName;  
1434          Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);          Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);
1435      }      }
1436      # Return the load statistics.      # Return the load statistics.

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.21

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3