[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.21, Sat Nov 12 03:42:48 2005 UTC revision 1.24, Tue Jan 17 01:10:54 2006 UTC
# Line 171  Line 171 
171      return $retVal;      return $retVal;
172  }  }
173    
174    =head3 LoadOnly
175    
176    C<< my $flag = $spl->LoadOnly; >>
177    
178    Return TRUE if we are in load-only mode, else FALSE.
179    
180    =cut
181    
182    sub LoadOnly {
183        my ($self) = @_;
184        return $self->{options}->{loadOnly};
185    }
186    
187  =head3 LoadGenomeData  =head3 LoadGenomeData
188    
189  C<< my $stats = $spl->LoadGenomeData(); >>  C<< my $stats = $spl->LoadGenomeData(); >>
# Line 216  Line 229 
229      # Get the genome count.      # Get the genome count.
230      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
231      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
     Trace("Beginning genome data load.") if T(2);  
232      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
233      my $loadGenome = $self->_TableLoader('Genome', $genomeCount);      my $loadGenome = $self->_TableLoader('Genome');
234      my $loadHasContig = $self->_TableLoader('HasContig', $genomeCount * 300);      my $loadHasContig = $self->_TableLoader('HasContig');
235      my $loadContig = $self->_TableLoader('Contig', $genomeCount * 300);      my $loadContig = $self->_TableLoader('Contig');
236      my $loadIsMadeUpOf = $self->_TableLoader('IsMadeUpOf', $genomeCount * 60000);      my $loadIsMadeUpOf = $self->_TableLoader('IsMadeUpOf');
237      my $loadSequence = $self->_TableLoader('Sequence', $genomeCount * 60000);      my $loadSequence = $self->_TableLoader('Sequence');
238        if ($self->{options}->{loadOnly}) {
239            Trace("Loading from existing files.") if T(2);
240        } else {
241            Trace("Generating genome data.") if T(2);
242      # Now we loop through the genomes, generating the data for each one.      # Now we loop through the genomes, generating the data for each one.
243      for my $genomeID (sort keys %{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
244          Trace("Loading data for genome $genomeID.") if T(3);              Trace("Generating data for genome $genomeID.") if T(3);
245          $loadGenome->Add("genomeIn");          $loadGenome->Add("genomeIn");
246          # The access code comes in via the genome hash.          # The access code comes in via the genome hash.
247          my $accessCode = $genomeHash->{$genomeID};          my $accessCode = $genomeHash->{$genomeID};
# Line 268  Line 284 
284              }              }
285          }          }
286      }      }
287        }
288      # Finish the loads.      # Finish the loads.
289      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
290      # Return the result.      # Return the result.
# Line 311  Line 328 
328      my $genomeCount = (keys %{$genomeFilter});      my $genomeCount = (keys %{$genomeFilter});
329      my $featureCount = $genomeCount * 4000;      my $featureCount = $genomeCount * 4000;
330      # Start the loads.      # Start the loads.
331      my $loadCoupling = $self->_TableLoader('Coupling', $featureCount * $genomeCount);      my $loadCoupling = $self->_TableLoader('Coupling');
332      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $featureCount * 8000);      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy');
333      my $loadPCH = $self->_TableLoader('PCH', $featureCount * 2000);      my $loadPCH = $self->_TableLoader('PCH');
334      my $loadParticipatesInCoupling = $self->_TableLoader('ParticipatesInCoupling', $featureCount * 2000);      my $loadParticipatesInCoupling = $self->_TableLoader('ParticipatesInCoupling');
335      my $loadUsesAsEvidence = $self->_TableLoader('UsesAsEvidence', $featureCount * 8000);      my $loadUsesAsEvidence = $self->_TableLoader('UsesAsEvidence');
336      Trace("Beginning coupling data load.") if T(2);      if ($self->{options}->{loadOnly}) {
337            Trace("Loading from existing files.") if T(2);
338        } else {
339            Trace("Generating coupling data.") if T(2);
340      # Loop through the genomes found.      # Loop through the genomes found.
341      for my $genome (sort keys %{$genomeFilter}) {      for my $genome (sort keys %{$genomeFilter}) {
342          Trace("Generating coupling data for $genome.") if T(3);          Trace("Generating coupling data for $genome.") if T(3);
# Line 389  Line 409 
409              }              }
410          }          }
411      }      }
412        }
413      # All done. Finish the load.      # All done. Finish the load.
414      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
415      return $retVal;      return $retVal;
# Line 426  Line 447 
447      my ($self) = @_;      my ($self) = @_;
448      # Get the FIG object.      # Get the FIG object.
449      my $fig = $self->{fig};      my $fig = $self->{fig};
     # Find out if this is a limited run.  
     my $limited = $self->{options}->{limitedFeatures};  
450      # Get the table of genome IDs.      # Get the table of genome IDs.
451      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
     my $featureCount = $genomeCount * 4000;  
452      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
453      my $loadFeature = $self->_TableLoader('Feature', $featureCount);      my $loadFeature = $self->_TableLoader('Feature');
454      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn', $featureCount);      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn');
455      my $loadFeatureAlias = $self->_TableLoader('FeatureAlias', $featureCount * 6);      my $loadFeatureAlias = $self->_TableLoader('FeatureAlias');
456      my ($loadFeatureLink, $loadFeatureTranslation, $loadFeatureUpstream);      my $loadFeatureLink = $self->_TableLoader('FeatureLink');
457      if (! $limited) {      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');
458          $loadFeatureLink = $self->_TableLoader('FeatureLink', $featureCount * 10);      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');
         $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation', $featureCount);  
         $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream', $featureCount);  
     }  
459      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
460      # locations.      # locations.
461      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
462      Trace("Beginning feature data load.") if T(2);      if ($self->{options}->{loadOnly}) {
463            Trace("Loading from existing files.") if T(2);
464        } else {
465            Trace("Generating feature data.") if T(2);
466      # Now we loop through the genomes, generating the data for each one.      # Now we loop through the genomes, generating the data for each one.
467      for my $genomeID (sort keys %{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
468          Trace("Loading features for genome $genomeID.") if T(3);          Trace("Loading features for genome $genomeID.") if T(3);
# Line 463  Line 480 
480              for my $alias ($fig->feature_aliases($featureID)) {              for my $alias ($fig->feature_aliases($featureID)) {
481                  $loadFeatureAlias->Put($featureID, $alias);                  $loadFeatureAlias->Put($featureID, $alias);
482              }              }
             # The next stuff is for a full load only.  
             if (! $limited) {  
483                  # Get the links.                  # Get the links.
484                  my @links = $fig->fid_links($featureID);                  my @links = $fig->fid_links($featureID);
485                  for my $link (@links) {                  for my $link (@links) {
# Line 483  Line 498 
498                          $loadFeatureUpstream->Put($featureID, $upstream);                          $loadFeatureUpstream->Put($featureID, $upstream);
499                      }                      }
500                  }                  }
             }  
501              # This part is the roughest. We need to relate the features to contig              # This part is the roughest. We need to relate the features to contig
502              # locations, and the locations must be split so that none of them exceed              # locations, and the locations must be split so that none of them exceed
503              # the maximum segment size. This simplifies the genes_in_region processing              # the maximum segment size. This simplifies the genes_in_region processing
# Line 512  Line 526 
526              }              }
527          }          }
528      }      }
529        }
530      # Finish the loads.      # Finish the loads.
531      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
532      return $retVal;      return $retVal;
# Line 548  Line 563 
563      my $fig = $self->{fig};      my $fig = $self->{fig};
564      # Get the table of genome IDs.      # Get the table of genome IDs.
565      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
     my $featureCount = $genomeCount * 4000;  
566      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
567      my $loadIsBidirectionalBestHitOf = $self->_TableLoader('IsBidirectionalBestHitOf',      my $loadIsBidirectionalBestHitOf = $self->_TableLoader('IsBidirectionalBestHitOf');
568                                                             $featureCount * $genomeCount);      if ($self->{options}->{loadOnly}) {
569      Trace("Beginning BBH load.") if T(2);          Trace("Loading from existing files.") if T(2);
570        } else {
571            Trace("Generating BBH data.") if T(2);
572      # Now we loop through the genomes, generating the data for each one.      # Now we loop through the genomes, generating the data for each one.
573      for my $genomeID (sort keys %{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
574          $loadIsBidirectionalBestHitOf->Add("genomeIn");          $loadIsBidirectionalBestHitOf->Add("genomeIn");
# Line 579  Line 594 
594              }              }
595          }          }
596      }      }
597        }
598      # Finish the loads.      # Finish the loads.
599      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
600      return $retVal;      return $retVal;
# Line 639  Line 655 
655      # Get the subsystem hash. This lists the subsystems we'll process.      # Get the subsystem hash. This lists the subsystems we'll process.
656      my $subsysHash = $self->{subsystems};      my $subsysHash = $self->{subsystems};
657      my @subsysIDs = sort keys %{$subsysHash};      my @subsysIDs = sort keys %{$subsysHash};
     my $subsysCount = @subsysIDs;  
     my $genomeCount = (keys %{$genomeHash});  
     my $featureCount = $genomeCount * 4000;  
658      # Get the map list.      # Get the map list.
659      my @maps = $fig->all_maps;      my @maps = $fig->all_maps;
     my $mapCount = @maps;  
     # Create load objects for each of the tables we're loading.  
     my $loadDiagram = $self->_TableLoader('Diagram', $mapCount);  
     my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $featureCount * 6);  
     my $loadSubsystem = $self->_TableLoader('Subsystem', $subsysCount);  
     my $loadRole = $self->_TableLoader('Role', $featureCount * 6);  
     my $loadRoleEC = $self->_TableLoader('RoleEC', $featureCount * 6);  
     my $loadCatalyzes = $self->_TableLoader('Catalyzes', $genomeCount * $featureCount);  
     my $loadSSCell = $self->_TableLoader('SSCell', $featureCount * $genomeCount);  
     my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $featureCount * $subsysCount);  
     my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $featureCount * $genomeCount);  
     my $loadIsRoleOf = $self->_TableLoader('IsRoleOf', $featureCount * $genomeCount);  
     my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $featureCount * 6);  
     my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $subsysCount * $genomeCount);  
     my $loadHasSSCell = $self->_TableLoader('HasSSCell', $featureCount * $genomeCount);  
     my $loadRoleSubset = $self->_TableLoader('RoleSubset', $subsysCount * 50);  
     my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $subsysCount * 50);  
     my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $featureCount * $genomeCount);  
     my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $featureCount * $genomeCount);  
     my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $subsysCount * 50);  
     my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $subsysCount * 50);  
660      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
661      Trace("Beginning subsystem data load.") if T(2);      my $loadDiagram = $self->_TableLoader('Diagram');
662        my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn');
663        my $loadSubsystem = $self->_TableLoader('Subsystem');
664        my $loadRole = $self->_TableLoader('Role');
665        my $loadRoleEC = $self->_TableLoader('RoleEC');
666        my $loadCatalyzes = $self->_TableLoader('Catalyzes');
667        my $loadSSCell = $self->_TableLoader('SSCell');
668        my $loadContainsFeature = $self->_TableLoader('ContainsFeature');
669        my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf');
670        my $loadIsRoleOf = $self->_TableLoader('IsRoleOf');
671        my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem');
672        my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn');
673        my $loadHasSSCell = $self->_TableLoader('HasSSCell');
674        my $loadRoleSubset = $self->_TableLoader('RoleSubset');
675        my $loadGenomeSubset = $self->_TableLoader('GenomeSubset');
676        my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles');
677        my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes');
678        my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');
679        my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');
680        if ($self->{options}->{loadOnly}) {
681            Trace("Loading from existing files.") if T(2);
682        } else {
683            Trace("Generating subsystem data.") if T(2);
684      # This hash will contain the role for each EC. When we're done, this      # This hash will contain the role for each EC. When we're done, this
685      # information will be used to generate the Catalyzes table.      # information will be used to generate the Catalyzes table.
686      my %ecToRoles = ();      my %ecToRoles = ();
# Line 834  Line 848 
848              }              }
849          }          }
850      }      }
851        }
852      # Finish the load.      # Finish the load.
853      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
854      return $retVal;      return $retVal;
# Line 875  Line 890 
890      my $fig = $self->{fig};      my $fig = $self->{fig};
891      # Get the genome hash.      # Get the genome hash.
892      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
893      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
894      my $loadProperty = $self->_TableLoader('Property', $genomeCount * 1500);      my $loadProperty = $self->_TableLoader('Property');
895      my $loadHasProperty = $self->_TableLoader('HasProperty', $genomeCount * 1500);      my $loadHasProperty = $self->_TableLoader('HasProperty');
896      Trace("Beginning property data load.") if T(2);      if ($self->{options}->{loadOnly}) {
897            Trace("Loading from existing files.") if T(2);
898        } else {
899            Trace("Generating property data.") if T(2);
900      # Create a hash for storing property IDs.      # Create a hash for storing property IDs.
901      my %propertyKeys = ();      my %propertyKeys = ();
902      my $nextID = 1;      my $nextID = 1;
903      # Loop through the genomes.      # Loop through the genomes.
904      for my $genomeID (keys %{$genomeHash}) {      for my $genomeID (keys %{$genomeHash}) {
905          $loadProperty->Add("genomeIn");          $loadProperty->Add("genomeIn");
906                Trace("Generating properties for $genomeID.") if T(3);
907          # Get the genome's features. The feature ID is the first field in the          # Get the genome's features. The feature ID is the first field in the
908          # tuples returned by "all_features_detailed". We use "all_features_detailed"          # tuples returned by "all_features_detailed". We use "all_features_detailed"
909          # rather than "all_features" because we want all features regardless of type.          # rather than "all_features" because we want all features regardless of type.
910          my @features = map { $_->[0] } @{$fig->all_features_detailed($genomeID)};          my @features = map { $_->[0] } @{$fig->all_features_detailed($genomeID)};
911                my $featureCount = 0;
912                my $propertyCount = 0;
913          # Loop through the features, creating HasProperty records.          # Loop through the features, creating HasProperty records.
914          for my $fid (@features) {          for my $fid (@features) {
             $loadProperty->Add("featureIn");  
915              # Get all attributes for this feature. We do this one feature at a time              # Get all attributes for this feature. We do this one feature at a time
916              # to insure we do not get any genome attributes.              # to insure we do not get any genome attributes.
917              my @attributeList = $fig->get_attributes($fid, '', '', '');              my @attributeList = $fig->get_attributes($fid, '', '', '');
918                    if (scalar @attributeList) {
919                        $featureCount++;
920                    }
921              # Loop through the attributes.              # Loop through the attributes.
922              for my $tuple (@attributeList) {              for my $tuple (@attributeList) {
923                        $propertyCount++;
924                  # Get this attribute value's data. Note that we throw away the FID,                  # Get this attribute value's data. Note that we throw away the FID,
925                  # since it will always be the same as the value if "$fid".                  # since it will always be the same as the value if "$fid".
926                  my (undef, $key, $value, $url) = @{$tuple};                  my (undef, $key, $value, $url) = @{$tuple};
# Line 919  Line 942 
942                  $loadHasProperty->Put($fid, $propertyID, $url);                  $loadHasProperty->Put($fid, $propertyID, $url);
943              }              }
944          }          }
945                # Update the statistics.
946                Trace("$propertyCount attributes processed for $featureCount features.") if T(3);
947                $loadHasProperty->Add("featuresIn", $featureCount);
948                $loadHasProperty->Add("propertiesIn", $propertyCount);
949            }
950      }      }
951      # Finish the load.      # Finish the load.
952      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
# Line 959  Line 987 
987      my $fig = $self->{fig};      my $fig = $self->{fig};
988      # Get the genome hash.      # Get the genome hash.
989      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
990      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
991      my $loadAnnotation = $self->_TableLoader('Annotation', $genomeCount * 4000);      my $loadAnnotation = $self->_TableLoader('Annotation');
992      my $loadIsTargetOfAnnotation = $self->_TableLoader('IsTargetOfAnnotation', $genomeCount * 4000);      my $loadIsTargetOfAnnotation = $self->_TableLoader('IsTargetOfAnnotation');
993      my $loadSproutUser = $self->_TableLoader('SproutUser', 100);      my $loadSproutUser = $self->_TableLoader('SproutUser');
994      my $loadUserAccess = $self->_TableLoader('UserAccess', 1000);      my $loadUserAccess = $self->_TableLoader('UserAccess');
995      my $loadMadeAnnotation = $self->_TableLoader('MadeAnnotation', $genomeCount * 4000);      my $loadMadeAnnotation = $self->_TableLoader('MadeAnnotation');
996      Trace("Beginning annotation data load.") if T(2);      if ($self->{options}->{loadOnly}) {
997            Trace("Loading from existing files.") if T(2);
998        } else {
999            Trace("Generating annotation data.") if T(2);
1000      # Create a hash of user names. We'll use this to prevent us from generating duplicate      # Create a hash of user names. We'll use this to prevent us from generating duplicate
1001      # user records.      # user records.
1002      my %users = ( FIG => 1, master => 1 );      my %users = ( FIG => 1, master => 1 );
# Line 987  Line 1017 
1017              # Create a hash of timestamps. We use this to prevent duplicate time stamps              # Create a hash of timestamps. We use this to prevent duplicate time stamps
1018              # from showing up for a single PEG's annotations.              # from showing up for a single PEG's annotations.
1019              my %seenTimestamps = ();              my %seenTimestamps = ();
1020              # Check for a functional assignment.                  # Loop through the annotations.
             my $func = $fig->function_of($peg);  
             if ($func) {  
                 # If this is NOT a hypothetical assignment, we create an  
                 # assignment annotation for it.  
                 if (! FIG::hypo($peg)) {  
                     # Note that we double the slashes so that what goes into the database is  
                     # a new-line escape sequence rather than an actual new-line.  
                     $loadAnnotation->Put("$peg:$time", $time, "FIG\\nSet function to\\n$func");  
                     $loadIsTargetOfAnnotation->Put($peg, "$peg:$time");  
                     $loadMadeAnnotation->Put("FIG", "$peg:$time");  
                     # Denote we've seen this timestamp.  
                     $seenTimestamps{$time} = 1;  
                 }  
             }  
             # Now loop through the real annotations.  
1021              for my $tuple ($fig->feature_annotations($peg, "raw")) {              for my $tuple ($fig->feature_annotations($peg, "raw")) {
1022                  my ($fid, $timestamp, $user, $text) = @{$tuple};                  my ($fid, $timestamp, $user, $text) = @{$tuple};
1023                  # Here we fix up the annotation text. "\r" is removed,                  # Here we fix up the annotation text. "\r" is removed,
# Line 1041  Line 1056 
1056              }              }
1057          }          }
1058      }      }
1059        }
1060      # Finish the load.      # Finish the load.
1061      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
1062      return $retVal;      return $retVal;
# Line 1081  Line 1097 
1097      my $fig = $self->{fig};      my $fig = $self->{fig};
1098      # Get the genome hash.      # Get the genome hash.
1099      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
1100      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1101      my $loadComesFrom = $self->_TableLoader('ComesFrom', $genomeCount * 4);      my $loadComesFrom = $self->_TableLoader('ComesFrom');
1102      my $loadSource = $self->_TableLoader('Source', $genomeCount * 4);      my $loadSource = $self->_TableLoader('Source');
1103      my $loadSourceURL = $self->_TableLoader('SourceURL', $genomeCount * 8);      my $loadSourceURL = $self->_TableLoader('SourceURL');
1104      Trace("Beginning source data load.") if T(2);      if ($self->{options}->{loadOnly}) {
1105            Trace("Loading from existing files.") if T(2);
1106        } else {
1107            Trace("Generating annotation data.") if T(2);
1108      # Create hashes to collect the Source information.      # Create hashes to collect the Source information.
1109      my %sourceURL = ();      my %sourceURL = ();
1110      my %sourceDesc = ();      my %sourceDesc = ();
# Line 1116  Line 1134 
1134      for my $sourceID (keys %sourceDesc) {      for my $sourceID (keys %sourceDesc) {
1135          $loadSource->Put($sourceID, $sourceDesc{$sourceID});          $loadSource->Put($sourceID, $sourceDesc{$sourceID});
1136      }      }
1137        }
1138      # Finish the load.      # Finish the load.
1139      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
1140      return $retVal;      return $retVal;
# Line 1155  Line 1174 
1174      my $fig = $self->{fig};      my $fig = $self->{fig};
1175      # Get the genome hash.      # Get the genome hash.
1176      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
1177      # Convert the genome hash. We'll get the genus and species for each genome and make      # Convert the genome hash. We'll get the genus and species for each genome and make
1178      # it the key.      # it the key.
1179      my %speciesHash = map { $fig->genus_species($_) => $_ } (keys %{$genomeHash});      my %speciesHash = map { $fig->genus_species($_) => $_ } (keys %{$genomeHash});
1180      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1181      my $loadExternalAliasFunc = $self->_TableLoader('ExternalAliasFunc', $genomeCount * 4000);      my $loadExternalAliasFunc = $self->_TableLoader('ExternalAliasFunc');
1182      my $loadExternalAliasOrg = $self->_TableLoader('ExternalAliasOrg', $genomeCount * 4000);      my $loadExternalAliasOrg = $self->_TableLoader('ExternalAliasOrg');
1183      Trace("Beginning external data load.") if T(2);      if ($self->{options}->{loadOnly}) {
1184            Trace("Loading from existing files.") if T(2);
1185        } else {
1186            Trace("Generating external data.") if T(2);
1187      # We loop through the files one at a time. First, the organism file.      # We loop through the files one at a time. First, the organism file.
1188      Open(\*ORGS, "<$FIG_Config::global/ext_org.table");      Open(\*ORGS, "<$FIG_Config::global/ext_org.table");
1189      my $orgLine;      my $orgLine;
# Line 1192  Line 1213 
1213              $loadExternalAliasFunc->Put(@funcFields[0,1]);              $loadExternalAliasFunc->Put(@funcFields[0,1]);
1214          }          }
1215      }      }
1216        }
1217      # Finish the load.      # Finish the load.
1218      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
1219      return $retVal;      return $retVal;
# Line 1232  Line 1254 
1254      my ($self) = @_;      my ($self) = @_;
1255      # Get the FIG object.      # Get the FIG object.
1256      my $fig = $self->{fig};      my $fig = $self->{fig};
     # Get the genome hash.  
     my $genomeHash = $self->{genomes};  
     my $genomeCount = (keys %{$genomeHash});  
1257      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1258      my $loadReaction = $self->_TableLoader('Reaction', $genomeCount * 4000);      my $loadReaction = $self->_TableLoader('Reaction');
1259      my $loadReactionURL = $self->_TableLoader('ReactionURL', $genomeCount * 4000);      my $loadReactionURL = $self->_TableLoader('ReactionURL');
1260      my $loadCompound = $self->_TableLoader('Compound', $genomeCount * 4000);      my $loadCompound = $self->_TableLoader('Compound');
1261      my $loadCompoundName = $self->_TableLoader('CompoundName', $genomeCount * 8000);      my $loadCompoundName = $self->_TableLoader('CompoundName');
1262      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $genomeCount * 4000);      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS');
1263      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $genomeCount * 12000);      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');
1264      Trace("Beginning reaction/compound data load.") if T(2);      if ($self->{options}->{loadOnly}) {
1265            Trace("Loading from existing files.") if T(2);
1266        } else {
1267            Trace("Generating annotation data.") if T(2);
1268      # First we create the compounds.      # First we create the compounds.
1269      my @compounds = $fig->all_compounds();      my @compounds = $fig->all_compounds();
1270      for my $cid (@compounds) {      for my $cid (@compounds) {
# Line 1291  Line 1313 
1313              }              }
1314          }          }
1315      }      }
1316        }
1317      # Finish the load.      # Finish the load.
1318      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
1319      return $retVal;      return $retVal;
# Line 1326  Line 1349 
1349      my $fig = $self->{fig};      my $fig = $self->{fig};
1350      # Get the genome hash.      # Get the genome hash.
1351      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
     my $genomeCount = (keys %{$genomeHash});  
1352      # Create a load object for the table we're loading.      # Create a load object for the table we're loading.
1353      my $loadGenomeGroups = $self->_TableLoader('GenomeGroups', $genomeCount * 4);      my $loadGenomeGroups = $self->_TableLoader('GenomeGroups');
1354      Trace("Beginning group data load.") if T(2);      if ($self->{options}->{loadOnly}) {
1355            Trace("Loading from existing files.") if T(2);
1356        } else {
1357            Trace("Generating group data.") if T(2);
1358      # Loop through the genomes.      # Loop through the genomes.
1359      my $line;      my $line;
1360      for my $genomeID (keys %{$genomeHash}) {      for my $genomeID (keys %{$genomeHash}) {
# Line 1345  Line 1370 
1370          }          }
1371          close TMP;          close TMP;
1372      }      }
1373        }
1374      # Finish the load.      # Finish the load.
1375      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
1376      return $retVal;      return $retVal;
# Line 1366  Line 1392 
1392    
1393  Name of the table (relation) being loaded.  Name of the table (relation) being loaded.
1394    
 =item rowCount (optional)  
   
 Estimated maximum number of rows in the table.  
   
1395  =item RETURN  =item RETURN
1396    
1397  Returns an ERDBLoad object for loading the specified table.  Returns an ERDBLoad object for loading the specified table.
# Line 1380  Line 1402 
1402    
1403  sub _TableLoader {  sub _TableLoader {
1404      # Get the parameters.      # Get the parameters.
1405      my ($self, $tableName, $rowCount) = @_;      my ($self, $tableName, $loadOnly) = @_;
1406      # Create the load object.      # Create the load object.
1407      my $retVal = ERDBLoad->new($self->{erdb}, $tableName, $self->{loadDirectory}, $rowCount);      my $retVal = ERDBLoad->new($self->{erdb}, $tableName, $self->{loadDirectory}, $self->LoadOnly);
1408      # Cache it in the loader list.      # Cache it in the loader list.
1409      push @{$self->{loaders}}, $retVal;      push @{$self->{loaders}}, $retVal;
1410      # Return it to the caller.      # Return it to the caller.
# Line 1421  Line 1443 
1443      while (my $loader = pop @{$loadList}) {      while (my $loader = pop @{$loadList}) {
1444          # Trace the fact that we're cleaning up.          # Trace the fact that we're cleaning up.
1445          my $relName = $loader->RelName;          my $relName = $loader->RelName;
1446          Trace("Finishing load for $relName.") if T(2);          Trace("Finishing $relName.") if T(2);
1447          my $stats = $loader->Finish();          my $stats = $loader->Finish();
1448          if ($self->{options}->{dbLoad}) {          if ($self->{options}->{dbLoad}) {
1449              # Here we want to use the load file just created to load the database.              # Here we want to use the load file just created to load the database.

Legend:
Removed from v.1.21  
changed lines
  Added in v.1.24

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3