[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.19, Thu Oct 20 09:34:09 2005 UTC revision 1.22, Mon Dec 12 21:48:22 2005 UTC
# Line 608  Line 608 
608      OccursInSubsystem      OccursInSubsystem
609      ParticipatesIn      ParticipatesIn
610      HasSSCell      HasSSCell
     Catalyzes  
     Reaction  
611      ConsistsOfRoles      ConsistsOfRoles
612      RoleSubset      RoleSubset
613      HasRoleSubset      HasRoleSubset
614      ConsistsOfGenomes      ConsistsOfGenomes
615      GenomeSubset      GenomeSubset
616      HasGenomeSubset      HasGenomeSubset
617        Catalyzes
618        Diagram
619        RoleOccursIn
620    
621  =over 4  =over 4
622    
# Line 641  Line 642 
642      my $subsysCount = @subsysIDs;      my $subsysCount = @subsysIDs;
643      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
644      my $featureCount = $genomeCount * 4000;      my $featureCount = $genomeCount * 4000;
645        # Get the map list.
646        my @maps = $fig->all_maps;
647        my $mapCount = @maps;
648      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
649        my $loadDiagram = $self->_TableLoader('Diagram', $mapCount);
650        my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $featureCount * 6);
651      my $loadSubsystem = $self->_TableLoader('Subsystem', $subsysCount);      my $loadSubsystem = $self->_TableLoader('Subsystem', $subsysCount);
652      my $loadRole = $self->_TableLoader('Role', $featureCount * 6);      my $loadRole = $self->_TableLoader('Role', $featureCount * 6);
653      my $loadRoleEC = $self->_TableLoader('RoleEC', $featureCount * 6);      my $loadRoleEC = $self->_TableLoader('RoleEC', $featureCount * 6);
654        my $loadCatalyzes = $self->_TableLoader('Catalyzes', $genomeCount * $featureCount);
655      my $loadSSCell = $self->_TableLoader('SSCell', $featureCount * $genomeCount);      my $loadSSCell = $self->_TableLoader('SSCell', $featureCount * $genomeCount);
656      my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $featureCount * $subsysCount);      my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $featureCount * $subsysCount);
657      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $featureCount * $genomeCount);      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $featureCount * $genomeCount);
# Line 652  Line 659 
659      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $featureCount * 6);      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $featureCount * 6);
660      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $subsysCount * $genomeCount);      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $subsysCount * $genomeCount);
661      my $loadHasSSCell = $self->_TableLoader('HasSSCell', $featureCount * $genomeCount);      my $loadHasSSCell = $self->_TableLoader('HasSSCell', $featureCount * $genomeCount);
     my $loadReaction = $self->_TableLoader('Reaction', $featureCount * $genomeCount);  
     my $loadCatalyzes = $self->_TableLoader('Catalyzes', $featureCount * $genomeCount);  
662      my $loadRoleSubset = $self->_TableLoader('RoleSubset', $subsysCount * 50);      my $loadRoleSubset = $self->_TableLoader('RoleSubset', $subsysCount * 50);
663      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $subsysCount * 50);      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $subsysCount * 50);
664      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $featureCount * $genomeCount);      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $featureCount * $genomeCount);
665      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $featureCount * $genomeCount);      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $featureCount * $genomeCount);
666      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $subsysCount * 50);      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $subsysCount * 50);
667      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $subsysCount * 50);      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $subsysCount * 50);
668        # Create load objects for each of the tables we're loading.
669      Trace("Beginning subsystem data load.") if T(2);      Trace("Beginning subsystem data load.") if T(2);
670      # The reaction hash will contain a list of reactions for each role. When we're done,      # This hash will contain the role for each EC. When we're done, this
671      # a complicated sort and merge will be used to generate the Reaction and Catalyzes      # information will be used to generate the Catalyzes table.
672      # tables.      my %ecToRoles = ();
     my %reactionsToRoles = ();  
673      # Loop through the subsystems. Our first task will be to create the      # Loop through the subsystems. Our first task will be to create the
674      # roles. We do this by looping through the subsystems and creating a      # roles. We do this by looping through the subsystems and creating a
675      # role hash. The hash tracks each role ID so that we don't create      # role hash. The hash tracks each role ID so that we don't create
# Line 677  Line 682 
682          $loadSubsystem->Add("subsystemIn");          $loadSubsystem->Add("subsystemIn");
683          # Get the subsystem object.          # Get the subsystem object.
684          my $sub = $fig->get_subsystem($subsysID);          my $sub = $fig->get_subsystem($subsysID);
         # Get its reaction hash.  
         my $reactionHash = $sub->get_reactions();  
685          # Create the subsystem record.          # Create the subsystem record.
686          my $curator = $sub->get_curator();          my $curator = $sub->get_curator();
687          my $notes = $sub->get_notes();          my $notes = $sub->get_notes();
# Line 697  Line 700 
700                  $roleData{$roleID} = 1;                  $roleData{$roleID} = 1;
701                  # Check for an EC number.                  # Check for an EC number.
702                  if ($roleID =~ /\(EC ([^.]+\.[^.]+\.[^.]+\.[^)]+)\)\s*$/) {                  if ($roleID =~ /\(EC ([^.]+\.[^.]+\.[^.]+\.[^)]+)\)\s*$/) {
703                      $loadRoleEC->Put($roleID, $1);                      my $ec = $1;
704                  }                      $loadRoleEC->Put($roleID, $ec);
705                  # Add the role's reactions.                      $ecToRoles{$ec} = $roleID;
                 my $reactions = $reactionHash->{$roleID};  
                 for my $reactionID (@{$reactions}) {  
                     if (! exists $reactionsToRoles{$reactionID}) {  
                         # Here the reaction is brand-new, so we create its reaction  
                         # record.  
                         $loadReaction->Put($reactionID, $fig->reversible($reactionID));  
                         # We also create a blank list for it in the reaction hash.  
                         $reactionsToRoles{$reactionID} = [];  
                     }  
                     # Add the role to the reaction's role list.  
                     push @{$reactionsToRoles{$reactionID}}, $roleID;  
706                  }                  }
707              }              }
708          }          }
# Line 810  Line 802 
802              }              }
803          }          }
804      }      }
805      # Before we leave, we must create the Catalyzes table. The data is all stored in      # Now we loop through the diagrams. We need to create the diagram records
806      # "reactionToRoles" hash.      # and link each diagram to its roles. Note that only roles which occur
807      for my $reactionID (keys %reactionsToRoles) {      # in subsystems (and therefore appear in the %ecToRoles hash) are
808          # Get this reaction's list of roles. We sort it so we can merge out duplicates.      # included.
809          my @roles = sort @{$reactionsToRoles{$reactionID}};      for my $map (@maps) {
         my $lastRole = "";  
         # Loop through the roles, creating catalyzation records.  
         for my $thisRole (@roles) {  
             if ($thisRole ne $lastRole) {  
                 $loadCatalyzes->Put($thisRole, $reactionID);  
             }  
         }  
     }  
     # Finish the load.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
 =head3 LoadDiagramData  
   
 C<< my $stats = $spl->LoadDiagramData(); >>  
   
 Load the diagram data from FIG into Sprout.  
   
 Diagrams are used to organize functional roles. The diagram shows the  
 connections between chemicals that interact with a subsystem.  
   
 The following relations are loaded by this method.  
   
     Diagram  
     RoleOccursIn  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadDiagramData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the map list.  
     my @maps = $fig->all_maps;  
     my $mapCount = @maps;  
     my $genomeCount = (keys %{$self->{genomes}});  
     my $featureCount = $genomeCount * 4000;  
     # Create load objects for each of the tables we're loading.  
     my $loadDiagram = $self->_TableLoader('Diagram', $mapCount);  
     my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $featureCount * 6);  
     Trace("Beginning diagram data load.") if T(2);  
     # Loop through the diagrams.  
     for my $map ($fig->all_maps) {  
810          Trace("Loading diagram $map.") if T(3);          Trace("Loading diagram $map.") if T(3);
811          # Get the diagram's descriptive name.          # Get the diagram's descriptive name.
812          my $name = $fig->map_name($map);          my $name = $fig->map_name($map);
# Line 876  Line 815 
815          # A hash is used to prevent duplicates.          # A hash is used to prevent duplicates.
816          my %roleHash = ();          my %roleHash = ();
817          for my $role ($fig->map_to_ecs($map)) {          for my $role ($fig->map_to_ecs($map)) {
818              if (! $roleHash{$role}) {              if (exists $ecToRoles{$role} && ! $roleHash{$role}) {
819                  $loadRoleOccursIn->Put($role, $map);                  $loadRoleOccursIn->Put($ecToRoles{$role}, $map);
820                  $roleHash{$role} = 1;                  $roleHash{$role} = 1;
821              }              }
822          }          }
823      }      }
824        # Before we leave, we must create the Catalyzes table. We start with the reactions,
825        # then use the "ecToRoles" table to convert EC numbers to role IDs.
826        my @reactions = $fig->all_reactions();
827        for my $reactionID (@reactions) {
828            # Get this reaction's list of roles. The results will be EC numbers.
829            my @roles = $fig->catalyzed_by($reactionID);
830            # Loop through the roles, creating catalyzation records.
831            for my $thisRole (@roles) {
832                if (exists $ecToRoles{$thisRole}) {
833                    $loadCatalyzes->Put($ecToRoles{$thisRole}, $reactionID);
834                }
835            }
836        }
837      # Finish the load.      # Finish the load.
838      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
839      return $retVal;      return $retVal;
# Line 1035  Line 987 
987              # Create a hash of timestamps. We use this to prevent duplicate time stamps              # Create a hash of timestamps. We use this to prevent duplicate time stamps
988              # from showing up for a single PEG's annotations.              # from showing up for a single PEG's annotations.
989              my %seenTimestamps = ();              my %seenTimestamps = ();
990              # Check for a functional assignment.              # Loop through the annotations.
             my $func = $fig->function_of($peg);  
             if ($func) {  
                 # If this is NOT a hypothetical assignment, we create an  
                 # assignment annotation for it.  
                 if (! FIG::hypo($peg)) {  
                     # Note that we double the slashes so that what goes into the database is  
                     # a new-line escape sequence rather than an actual new-line.  
                     $loadAnnotation->Put("$peg:$time", $time, "FIG\\nSet function to\\n$func");  
                     $loadIsTargetOfAnnotation->Put($peg, "$peg:$time");  
                     $loadMadeAnnotation->Put("FIG", "$peg:$time");  
                     # Denote we've seen this timestamp.  
                     $seenTimestamps{$time} = 1;  
                 }  
             }  
             # Now loop through the real annotations.  
991              for my $tuple ($fig->feature_annotations($peg, "raw")) {              for my $tuple ($fig->feature_annotations($peg, "raw")) {
992                  my ($fid, $timestamp, $user, $text) = @{$tuple};                  my ($fid, $timestamp, $user, $text) = @{$tuple};
993                  # Here we fix up the annotation text. "\r" is removed,                  # Here we fix up the annotation text. "\r" is removed,
# Line 1256  Line 1193 
1193    
1194  The following relations are loaded by this method.  The following relations are loaded by this method.
1195    
1196        Reaction
1197      ReactionURL      ReactionURL
1198      Compound      Compound
1199      CompoundName      CompoundName
# Line 1283  Line 1221 
1221      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1222      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
1223      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1224        my $loadReaction = $self->_TableLoader('Reaction', $genomeCount * 4000);
1225      my $loadReactionURL = $self->_TableLoader('ReactionURL', $genomeCount * 4000);      my $loadReactionURL = $self->_TableLoader('ReactionURL', $genomeCount * 4000);
1226      my $loadCompound = $self->_TableLoader('Compound', $genomeCount * 4000);      my $loadCompound = $self->_TableLoader('Compound', $genomeCount * 4000);
1227      my $loadCompoundName = $self->_TableLoader('CompoundName', $genomeCount * 8000);      my $loadCompoundName = $self->_TableLoader('CompoundName', $genomeCount * 8000);
1228      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $genomeCount * 4000);      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $genomeCount * 4000);
1229      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $genomeCount * 12000);      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $genomeCount * 12000);
1230      Trace("Beginning reaction/compound data load.") if T(2);      Trace("Beginning reaction/compound data load.") if T(2);
1231      # Create a hash to remember the compounds we've generated in the compound table.      # First we create the compounds.
1232      my %compoundHash = ();      my @compounds = $fig->all_compounds();
1233      # Loop through the reactions.      for my $cid (@compounds) {
1234            # Check for names.
1235            my @names = $fig->names_of_compound($cid);
1236            # Each name will be given a priority number, starting with 1.
1237            my $prio = 1;
1238            for my $name (@names) {
1239                $loadCompoundName->Put($cid, $name, $prio++);
1240            }
1241            # Create the main compound record. Note that the first name
1242            # becomes the label.
1243            my $label = (@names > 0 ? $names[0] : $cid);
1244            $loadCompound->Put($cid, $label);
1245            # Check for a CAS ID.
1246            my $cas = $fig->cas($cid);
1247            if ($cas) {
1248                $loadCompoundCAS->Put($cid, $cas);
1249            }
1250        }
1251        # All the compounds are set up, so we need to loop through the reactions next. First,
1252        # we initialize the discriminator index. This is a single integer used to insure
1253        # duplicate elements in a reaction are not accidentally collapsed.
1254        my $discrim = 0;
1255      my @reactions = $fig->all_reactions();      my @reactions = $fig->all_reactions();
1256      for my $reactionID (@reactions) {      for my $reactionID (@reactions) {
1257            # Create the reaction record.
1258            $loadReaction->Put($reactionID, $fig->reversible($reactionID));
1259          # Compute the reaction's URL.          # Compute the reaction's URL.
1260          my $url = HTML::reaction_link($reactionID);          my $url = HTML::reaction_link($reactionID);
1261          # Put it in the ReactionURL table.          # Put it in the ReactionURL table.
# Line 1302  Line 1264 
1264          # substrates first and then products.          # substrates first and then products.
1265          for my $product (0, 1) {          for my $product (0, 1) {
1266              # Get the compounds of the current type for the current reaction. FIG will              # Get the compounds of the current type for the current reaction. FIG will
1267              # give us 3-tuples: [ID, Stoichometry, main-flag]. At this time we do not              # give us 3-tuples: [ID, stoichiometry, main-flag]. At this time we do not
1268              # have location data in SEED, so it defaults to the empty string.              # have location data in SEED, so it defaults to the empty string.
1269              my @compounds = $fig->reaction2comp($reactionID, $product);              my @compounds = $fig->reaction2comp($reactionID, $product);
1270              for my $compData (@compounds) {              for my $compData (@compounds) {
1271                  # Extract the compound data from the current tuple.                  # Extract the compound data from the current tuple.
1272                  my ($cid, $stoich, $main) = @{$compData};                  my ($cid, $stoich, $main) = @{$compData};
1273                  # Link the compound to the reaction.                  # Link the compound to the reaction.
1274                  $loadIsAComponentOf->Put($cid, $reactionID, "", $main, $product, $stoich);                  $loadIsAComponentOf->Put($cid, $reactionID, $discrim++, "", $main,
1275                  # If this is a new compound, we need to create its table entries.                                           $product, $stoich);
                 if (! exists $compoundHash{$cid}) {  
                     $compoundHash{$cid} = 1;  
                     # Create the main compound record and denote we've done it.  
                     $loadCompound->Put($cid);  
                     # Check for a CAS ID.  
                     my $cas = $fig->cas($cid);  
                     if ($cas) {  
                         $loadCompoundCAS->Put($cid, $cas);  
                     }  
                     # Check for names.  
                     my @names = $fig->names_of_compound($cid);  
                     # Each name will be given a priority number, starting with 1.  
                     my $prio = 0;  
                     for my $name (@names) {  
                         $loadCompoundName->Put($cid, $name, $prio++);  
                     }  
                 }  
1276              }              }
1277          }          }
1278      }      }

Legend:
Removed from v.1.19  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3