[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Tue Aug 16 20:35:03 2005 UTC revision 1.9, Wed Sep 14 11:21:24 2005 UTC
# Line 40  Line 40 
40  a variable called C<$fig>. This makes it fairly straightforward to determine which  a variable called C<$fig>. This makes it fairly straightforward to determine which
41  FIG methods are required to load the Sprout database.  FIG methods are required to load the Sprout database.
42    
43    This object creates the load files; however, the tables are not created until it
44    is time to actually do the load from the files into the target database.
45    
46  =cut  =cut
47    
48  #: Constructor SproutLoad->new();  #: Constructor SproutLoad->new();
# Line 48  Line 51 
51    
52  =head3 new  =head3 new
53    
54  C<< my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile); >>  C<< my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options); >>
55    
56  Construct a new Sprout Loader object, specifying the two participating databases and  Construct a new Sprout Loader object, specifying the two participating databases and
57  the name of the files containing the list of genomes and subsystems to use.  the name of the files containing the list of genomes and subsystems to use.
# Line 79  Line 82 
82  to a list of subsystem names. If nothing is specified, all known subsystems will be  to a list of subsystem names. If nothing is specified, all known subsystems will be
83  considered trusted. Only subsystem data related to the trusted subsystems is loaded.  considered trusted. Only subsystem data related to the trusted subsystems is loaded.
84    
85    =item options
86    
87    Reference to a hash of command-line options.
88    
89  =back  =back
90    
91  =cut  =cut
92    
93  sub new {  sub new {
94      # Get the parameters.      # Get the parameters.
95      my ($class, $sprout, $fig, $genomeFile, $subsysFile) = @_;      my ($class, $sprout, $fig, $genomeFile, $subsysFile, $options) = @_;
96      # Load the list of genomes into a hash.      # Load the list of genomes into a hash.
97      my %genomes;      my %genomes;
98      if (! defined($genomeFile) || $genomeFile eq '') {      if (! defined($genomeFile) || $genomeFile eq '') {
# Line 155  Line 162 
162                    sprout => $sprout,                    sprout => $sprout,
163                    loadDirectory => $directory,                    loadDirectory => $directory,
164                    erdb => $sprout->{_erdb},                    erdb => $sprout->{_erdb},
165                    loaders => []                    loaders => [],
166                      options => $options
167                   };                   };
168      # Bless and return it.      # Bless and return it.
169      bless $retVal, $class;      bless $retVal, $class;
# Line 217  Line 225 
225      # Now we loop through the genomes, generating the data for each one.      # Now we loop through the genomes, generating the data for each one.
226      for my $genomeID (sort keys %{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
227          Trace("Loading data for genome $genomeID.") if T(3);          Trace("Loading data for genome $genomeID.") if T(3);
228            $loadGenome->Add("genomeIn");
229          # The access code comes in via the genome hash.          # The access code comes in via the genome hash.
230          my $accessCode = $genomeHash->{$genomeID};          my $accessCode = $genomeHash->{$genomeID};
231          # Get the genus, species, and strain from the scientific name. Note that we append          # Get the genus, species, and strain from the scientific name. Note that we append
# Line 232  Line 241 
241          my @contigs = $fig->all_contigs($genomeID);          my @contigs = $fig->all_contigs($genomeID);
242          for my $contigID (@contigs) {          for my $contigID (@contigs) {
243              Trace("Processing contig $contigID for $genomeID.") if T(4);              Trace("Processing contig $contigID for $genomeID.") if T(4);
244                $loadContig->Add("contigIn");
245                $loadSequence->Add("contigIn");
246              # Create the contig ID.              # Create the contig ID.
247              my $sproutContigID = "$genomeID:$contigID";              my $sproutContigID = "$genomeID:$contigID";
248              # Create the contig record and relate it to the genome.              # Create the contig record and relate it to the genome.
# Line 243  Line 254 
254              # Now we get the sequence a chunk at a time.              # Now we get the sequence a chunk at a time.
255              my $contigLen = $fig->contig_ln($genomeID, $contigID);              my $contigLen = $fig->contig_ln($genomeID, $contigID);
256              for (my $i = 1; $i <= $contigLen; $i += $chunkSize) {              for (my $i = 1; $i <= $contigLen; $i += $chunkSize) {
257                    $loadSequence->Add("chunkIn");
258                  # Compute the endpoint of this chunk.                  # Compute the endpoint of this chunk.
259                  my $end = FIG::min($i + $chunkSize - 1, $contigLen);                  my $end = FIG::min($i + $chunkSize - 1, $contigLen);
260                  # Get the actual DNA.                  # Get the actual DNA.
# Line 307  Line 319 
319      # Loop through the genomes found.      # Loop through the genomes found.
320      for my $genome (sort keys %{$genomeFilter}) {      for my $genome (sort keys %{$genomeFilter}) {
321          Trace("Generating coupling data for $genome.") if T(3);          Trace("Generating coupling data for $genome.") if T(3);
322            $loadCoupling->Add("genomeIn");
323          # Create a hash table for holding coupled pairs. We use this to prevent          # Create a hash table for holding coupled pairs. We use this to prevent
324          # duplicates. For example, if A is coupled to B, we don't want to also          # duplicates. For example, if A is coupled to B, we don't want to also
325          # assert that B is coupled to A, because we already know it. Fortunately,          # assert that B is coupled to A, because we already know it. Fortunately,
# Line 317  Line 330 
330          my @pegs = $fig->pegs_of($genome);          my @pegs = $fig->pegs_of($genome);
331          # Loop through the PEGs.          # Loop through the PEGs.
332          for my $peg1 (@pegs) {          for my $peg1 (@pegs) {
333                $loadCoupling->Add("pegIn");
334              Trace("Processing PEG $peg1 for $genome.") if T(4);              Trace("Processing PEG $peg1 for $genome.") if T(4);
335              # Get a list of the coupled PEGs.              # Get a list of the coupled PEGs.
336              my @couplings = $fig->coupled_to($peg1);              my @couplings = $fig->coupled_to($peg1);
# Line 327  Line 341 
341                  # Compute the coupling ID.                  # Compute the coupling ID.
342                  my $coupleID = Sprout::CouplingID($peg1, $peg2);                  my $coupleID = Sprout::CouplingID($peg1, $peg2);
343                  if (! exists $dupHash{$coupleID}) {                  if (! exists $dupHash{$coupleID}) {
344                        $loadCoupling->Add("couplingIn");
345                      # Here we have a new coupling to store in the load files.                      # Here we have a new coupling to store in the load files.
346                      Trace("Storing coupling ($coupleID) with score $score.") if T(4);                      Trace("Storing coupling ($coupleID) with score $score.") if T(4);
347                      # Ensure we don't do this again.                      # Ensure we don't do this again.
# Line 342  Line 357 
357                      my %evidenceMap = ();                      my %evidenceMap = ();
358                      # Process each evidence item.                      # Process each evidence item.
359                      for my $evidenceData (@evidence) {                      for my $evidenceData (@evidence) {
360                            $loadPCH->Add("evidenceIn");
361                          my ($peg3, $peg4, $usage) = @{$evidenceData};                          my ($peg3, $peg4, $usage) = @{$evidenceData};
362                          # Only proceed if the evidence is from a Sprout                          # Only proceed if the evidence is from a Sprout
363                          # genome.                          # genome.
364                          if ($genomeFilter->{$fig->genome_of($peg3)}) {                          if ($genomeFilter->{$fig->genome_of($peg3)}) {
365                                $loadUsesAsEvidence->Add("evidenceChosen");
366                              my $evidenceKey = "$coupleID $peg3 $peg4";                              my $evidenceKey = "$coupleID $peg3 $peg4";
367                              # We store this evidence in the hash if the usage                              # We store this evidence in the hash if the usage
368                              # is nonzero or no prior evidence has been found. This                              # is nonzero or no prior evidence has been found. This
# Line 408  Line 425 
425      my ($self) = @_;      my ($self) = @_;
426      # Get the FIG object.      # Get the FIG object.
427      my $fig = $self->{fig};      my $fig = $self->{fig};
428        # Find out if this is a limited run.
429        my $limited = $self->{options}->{limitedFeatures};
430      # Get the table of genome IDs.      # Get the table of genome IDs.
431      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
432      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
433      my $featureCount = $genomeCount * 4000;      my $featureCount = $genomeCount * 4000;
434      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
435      my $loadFeature = $self->_TableLoader('Feature', $featureCount);      my $loadFeature = $self->_TableLoader('Feature', $featureCount);
     my $loadFeatureAlias = $self->_TableLoader('FeatureAlias', $featureCount * 6);  
     my $loadFeatureLink = $self->_TableLoader('FeatureLink', $featureCount * 10);  
     my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation', $featureCount);  
     my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream', $featureCount);  
436      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn', $featureCount);      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn', $featureCount);
437        my ($loadFeatureAlias, $loadFeatureLink, $loadFeatureTranslation, $loadFeatureUpstream);
438        if (! $limited) {
439            $loadFeatureAlias = $self->_TableLoader('FeatureAlias', $featureCount * 6);
440            $loadFeatureLink = $self->_TableLoader('FeatureLink', $featureCount * 10);
441            $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation', $featureCount);
442            $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream', $featureCount);
443        }
444      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
445      # locations.      # locations.
446      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 426  Line 448 
448      # Now we loop through the genomes, generating the data for each one.      # Now we loop through the genomes, generating the data for each one.
449      for my $genomeID (sort keys %{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
450          Trace("Loading features for genome $genomeID.") if T(3);          Trace("Loading features for genome $genomeID.") if T(3);
451            $loadFeature->Add("genomeIn");
452          # Get the feature list for this genome.          # Get the feature list for this genome.
453          my $features = $fig->all_features_detailed($genomeID);          my $features = $fig->all_features_detailed($genomeID);
454          # Loop through the features.          # Loop through the features.
455          for my $featureData (@{$features}) {          for my $featureData (@{$features}) {
456                $loadFeature->Add("featureIn");
457              # Split the tuple.              # Split the tuple.
458              my ($featureID, $locations, $aliases, $type) = @{$featureData};              my ($featureID, $locations, $aliases, $type) = @{$featureData};
459              # Create the feature record.              # Create the feature record.
460              $loadFeature->Put("$genomeID:$featureID", 1, $type);              $loadFeature->Put($featureID, 1, $type);
461                # The next stuff is for a full load only.
462                if (! $limited) {
463              # Create the aliases.              # Create the aliases.
464              for my $alias (split /\s*,\s*/, $aliases) {              for my $alias (split /\s*,\s*/, $aliases) {
465                  $loadFeatureAlias->Put($featureID, $alias);                  $loadFeatureAlias->Put($featureID, $alias);
# Line 445  Line 471 
471              }              }
472              # If this is a peg, generate the translation and the upstream.              # If this is a peg, generate the translation and the upstream.
473              if ($type eq 'peg') {              if ($type eq 'peg') {
474                        $loadFeatureTranslation->Add("pegIn");
475                  my $translation = $fig->get_translation($featureID);                  my $translation = $fig->get_translation($featureID);
476                  if ($translation) {                  if ($translation) {
477                      $loadFeatureTranslation->Put($featureID, $translation);                      $loadFeatureTranslation->Put($featureID, $translation);
# Line 455  Line 482 
482                      $loadFeatureUpstream->Put($featureID, $upstream);                      $loadFeatureUpstream->Put($featureID, $upstream);
483                  }                  }
484              }              }
485                }
486              # This part is the roughest. We need to relate the features to contig              # This part is the roughest. We need to relate the features to contig
487              # locations, and the locations must be split so that none of them exceed              # locations, and the locations must be split so that none of them exceed
488              # the maximum segment size. This simplifies the genes_in_region processing              # the maximum segment size. This simplifies the genes_in_region processing
489              # for Sprout.              # for Sprout.
490              my @locationList = split /\s*,\s*/, $locations;              my @locationList = map { "$genomeID:$_" } split /\s*,\s*/, $locations;
491                # Create the location position indicator.
492                my $i = 1;
493              # Loop through the locations.              # Loop through the locations.
494              for my $location (@locationList) {              for my $location (@locationList) {
495                  # Parse the location.                  # Parse the location.
# Line 467  Line 497 
497                  # Split it into a list of chunks.                  # Split it into a list of chunks.
498                  my @locOList = ();                  my @locOList = ();
499                  while (my $peeling = $locObject->Peel($chunkSize)) {                  while (my $peeling = $locObject->Peel($chunkSize)) {
500                        $loadIsLocatedIn->Add("peeling");
501                      push @locOList, $peeling;                      push @locOList, $peeling;
502                  }                  }
503                  push @locOList, $locObject;                  push @locOList, $locObject;
504                  # Loop through the chunks, creating IsLocatedIn records. The variable                  # Loop through the chunks, creating IsLocatedIn records. The variable
505                  # "$i" will be used to keep the location index.                  # "$i" will be used to keep the location index.
                 my $i = 1;  
506                  for my $locChunk (@locOList) {                  for my $locChunk (@locOList) {
507                      $loadIsLocatedIn->Put($featureID, $locChunk->Contig, $locChunk->Left,                      $loadIsLocatedIn->Put($featureID, $locChunk->Contig, $locChunk->Left,
508                                            $locChunk->Dir, $locChunk->Length, $i);                                            $locChunk->Dir, $locChunk->Length, $i);
# Line 525  Line 555 
555      Trace("Beginning BBH load.") if T(2);      Trace("Beginning BBH load.") if T(2);
556      # Now we loop through the genomes, generating the data for each one.      # Now we loop through the genomes, generating the data for each one.
557      for my $genomeID (sort keys %{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
558            $loadIsBidirectionalBestHitOf->Add("genomeIn");
559          Trace("Processing features for genome $genomeID.") if T(3);          Trace("Processing features for genome $genomeID.") if T(3);
560          # Get the feature list for this genome.          # Get the feature list for this genome.
561          my $features = $fig->all_features_detailed($genomeID);          my $features = $fig->all_features_detailed($genomeID);
# Line 622  Line 653 
653      my %roleData = ();      my %roleData = ();
654      for my $subsysID (@subsysIDs) {      for my $subsysID (@subsysIDs) {
655          Trace("Creating subsystem $subsysID.") if T(3);          Trace("Creating subsystem $subsysID.") if T(3);
656            $loadSubsystem->Add("subsystemIn");
657          # Create the subsystem record.          # Create the subsystem record.
658          $loadSubsystem->Put($subsysID);          $loadSubsystem->Put($subsysID);
659          # Get the subsystem's roles.          # Get the subsystem's roles.
660          my @roles = $fig->subsys_to_roles($subsysID);          my @roles = $fig->subsystem_to_roles($subsysID);
661          # Connect the roles to the subsystem. If a role is new, we create          # Connect the roles to the subsystem. If a role is new, we create
662          # a role record for it.          # a role record for it.
663          for my $roleID (@roles) {          for my $roleID (@roles) {
664                $loadOccursInSubsystem->Add("roleIn");
665              $loadOccursInSubsystem->Put($roleID, $subsysID);              $loadOccursInSubsystem->Put($roleID, $subsysID);
666              if (! exists $roleData{$roleID}) {              if (! exists $roleData{$roleID}) {
667                  $loadRole->Put($roleID);                  $loadRole->Put($roleID);
# Line 650  Line 683 
683                  for (my $i = 0; $i <= $#roles; $i++) {                  for (my $i = 0; $i <= $#roles; $i++) {
684                      my $role = $roles[$i];                      my $role = $roles[$i];
685                      # Get the features in the spreadsheet cell for this genome and role.                      # Get the features in the spreadsheet cell for this genome and role.
686                      my @pegs = $fig->pegs_in_subsystem_coll($subsysID, $genomeID, $i);                      my @pegs = $fig->pegs_in_subsystem_cell($subsysID, $genomeID, $i);
687                      # Only proceed if features exist.                      # Only proceed if features exist.
688                      if (@pegs > 0) {                      if (@pegs > 0) {
689                          # Create the spreadsheet cell.                          # Create the spreadsheet cell.
# Line 778  Line 811 
811      my $nextID = 1;      my $nextID = 1;
812      # Loop through the genomes.      # Loop through the genomes.
813      for my $genomeID (keys %{$genomeHash}) {      for my $genomeID (keys %{$genomeHash}) {
814            $loadProperty->Add("genomeIn");
815          # Get the genome's features. The feature ID is the first field in the          # Get the genome's features. The feature ID is the first field in the
816          # tuples returned by "all_features_detailed". We use "all_features_detailed"          # tuples returned by "all_features_detailed". We use "all_features_detailed"
817          # rather than "all_features" because we want all features regardless of type.          # rather than "all_features" because we want all features regardless of type.
818          my @features = map { $_->[0] } @{$fig->all_features_detailed($genomeID)};          my @features = map { $_->[0] } @{$fig->all_features_detailed($genomeID)};
819          # Loop through the features, creating HasProperty records.          # Loop through the features, creating HasProperty records.
820          for my $fid (@features) {          for my $fid (@features) {
821                $loadProperty->Add("featureIn");
822              # Get all attributes for this feature. We do this one feature at a time              # Get all attributes for this feature. We do this one feature at a time
823              # to insure we do not get any genome attributes.              # to insure we do not get any genome attributes.
824              my @attributeList = $fig->get_attributes($fid, '', '', '');              my @attributeList = $fig->get_attributes($fid, '', '', '');
# Line 869  Line 904 
904      # Get the current time.      # Get the current time.
905      my $time = time();      my $time = time();
906      # Loop through the genomes.      # Loop through the genomes.
907      for my $genomeID (%{$genomeHash}) {      for my $genomeID (sort keys %{$genomeHash}) {
908          Trace("Processing $genomeID.") if T(3);          Trace("Processing $genomeID.") if T(3);
909          # Get the genome's PEGs.          # Get the genome's PEGs.
910          my @pegs = $fig->pegs_of($genomeID);          my @pegs = $fig->pegs_of($genomeID);
# Line 894  Line 929 
929                  }                  }
930                  # Now loop through the real annotations.                  # Now loop through the real annotations.
931                  for my $tuple ($fig->feature_annotations($peg, "raw")) {                  for my $tuple ($fig->feature_annotations($peg, "raw")) {
932                      my ($fid, $timestamp, $user, $text) = $tuple;                      my ($fid, $timestamp, $user, $text) = @{$tuple};
933                      # Here we fix up the annotation text. "\r" is removed,                      # Here we fix up the annotation text. "\r" is removed,
934                      # and "\t" and "\n" are escaped. Note we use the "s"                      # and "\t" and "\n" are escaped. Note we use the "s"
935                      # modifier so that new-lines inside the text do not                      # modifier so that new-lines inside the text do not
# Line 935  Line 970 
970      return $retVal;      return $retVal;
971  }  }
972    
973    =head3 LoadSourceData
974    
975    C<< my $stats = $spl->LoadSourceData(); >>
976    
977    Load the source data from FIG into Sprout.
978    
979    Source data links genomes to information about the organizations that
980    mapped it.
981    
982    The following relations are loaded by this method.
983    
984        ComesFrom
985        Source
986        SourceURL
987    
988    There is no direct support for source attribution in FIG, so we access the SEED
989    files directly.
990    
991    =over 4
992    
993    =item RETURNS
994    
995    Returns a statistics object for the loads.
996    
997    =back
998    
999    =cut
1000    #: Return Type $%;
1001    sub LoadSourceData {
1002        # Get this object instance.
1003        my ($self) = @_;
1004        # Get the FIG object.
1005        my $fig = $self->{fig};
1006        # Get the genome hash.
1007        my $genomeHash = $self->{genomes};
1008        my $genomeCount = (keys %{$genomeHash});
1009        # Create load objects for each of the tables we're loading.
1010        my $loadComesFrom = $self->_TableLoader('ComesFrom', $genomeCount * 4);
1011        my $loadSource = $self->_TableLoader('Source', $genomeCount * 4);
1012        my $loadSourceURL = $self->_TableLoader('SourceURL', $genomeCount * 8);
1013        Trace("Beginning source data load.") if T(2);
1014        # Create hashes to collect the Source information.
1015        my %sourceURL = ();
1016        my %sourceDesc = ();
1017        # Loop through the genomes.
1018        my $line;
1019        for my $genomeID (sort keys %{$genomeHash}) {
1020            Trace("Processing $genomeID.") if T(3);
1021            # Open the project file.
1022            if ((open(TMP, "<$FIG_Config::organisms/$genomeID/PROJECT")) &&
1023                defined($line = <TMP>)) {
1024                chomp $line;
1025                my($sourceID, $desc, $url) = split(/\t/,$line);
1026                $loadComesFrom->Put($genomeID, $sourceID);
1027                if ($url && ! exists $sourceURL{$genomeID}) {
1028                    $loadSourceURL->Put($sourceID, $url);
1029                    $sourceURL{$sourceID} = 1;
1030                }
1031                if ($desc && ! exists $sourceDesc{$sourceID}) {
1032                    $loadSource->Put($sourceID, $desc);
1033                    $sourceDesc{$sourceID} = 1;
1034                }
1035            }
1036            close TMP;
1037        }
1038        # Finish the load.
1039        my $retVal = $self->_FinishAll();
1040        return $retVal;
1041    }
1042    
1043    =head3 LoadExternalData
1044    
1045    C<< my $stats = $spl->LoadExternalData(); >>
1046    
1047    Load the external data from FIG into Sprout.
1048    
1049    External data contains information about external feature IDs.
1050    
1051    The following relations are loaded by this method.
1052    
1053        ExternalAliasFunc
1054        ExternalAliasOrg
1055    
1056    The support for external IDs in FIG is hidden beneath layers of other data, so
1057    we access the SEED files directly to create these tables. This is also one of
1058    the few load methods that does not proceed genome by genome.
1059    
1060    =over 4
1061    
1062    =item RETURNS
1063    
1064    Returns a statistics object for the loads.
1065    
1066    =back
1067    
1068    =cut
1069    #: Return Type $%;
1070    sub LoadExternalData {
1071        # Get this object instance.
1072        my ($self) = @_;
1073        # Get the FIG object.
1074        my $fig = $self->{fig};
1075        # Get the genome hash.
1076        my $genomeHash = $self->{genomes};
1077        my $genomeCount = (keys %{$genomeHash});
1078        # Convert the genome hash. We'll get the genus and species for each genome and make
1079        # it the key.
1080        my %speciesHash = map { $fig->genus_species($_) => $_ } (keys %{$genomeHash});
1081        # Create load objects for each of the tables we're loading.
1082        my $loadExternalAliasFunc = $self->_TableLoader('ExternalAliasFunc', $genomeCount * 4000);
1083        my $loadExternalAliasOrg = $self->_TableLoader('ExternalAliasOrg', $genomeCount * 4000);
1084        Trace("Beginning external data load.") if T(2);
1085        # We loop through the files one at a time. First, the organism file.
1086        Open(\*ORGS, "<$FIG_Config::global/ext_org.table");
1087        my $orgLine;
1088        while (defined($orgLine = <ORGS>)) {
1089            # Clean the input line.
1090            chomp $orgLine;
1091            # Parse the organism name.
1092            my ($protID, $name) = split /\s*\t\s*/, $orgLine;
1093            $loadExternalAliasOrg->Put($protID, $name);
1094        }
1095        close ORGS;
1096        # Now the function file.
1097        my $funcLine;
1098        Open(\*FUNCS, "<$FIG_Config::global/ext_func.table");
1099        while (defined($funcLine = <FUNCS>)) {
1100            # Clean the line ending.
1101            chomp $funcLine;
1102            # Only proceed if the line is non-blank.
1103            if ($funcLine) {
1104                # Split it into fields.
1105                my @funcFields = split /\s*\t\s*/, $funcLine;
1106                # If there's an EC number, append it to the description.
1107                if ($#funcFields >= 2 && $funcFields[2] =~ /^(EC .*\S)/) {
1108                    $funcFields[1] .= " $1";
1109                }
1110                # Output the function line.
1111                $loadExternalAliasFunc->Put(@funcFields[0,1]);
1112            }
1113        }
1114        # Finish the load.
1115        my $retVal = $self->_FinishAll();
1116        return $retVal;
1117    }
1118    
1119    =head3 LoadGroupData
1120    
1121    C<< my $stats = $spl->LoadGroupData(); >>
1122    
1123    Load the genome Groups into Sprout.
1124    
1125    The following relations are loaded by this method.
1126    
1127        GenomeGroups
1128    
1129    There is no direct support for genome groups in FIG, so we access the SEED
1130    files directly.
1131    
1132    =over 4
1133    
1134    =item RETURNS
1135    
1136    Returns a statistics object for the loads.
1137    
1138    =back
1139    
1140    =cut
1141    #: Return Type $%;
1142    sub LoadGroupData {
1143        # Get this object instance.
1144        my ($self) = @_;
1145        # Get the FIG object.
1146        my $fig = $self->{fig};
1147        # Get the genome hash.
1148        my $genomeHash = $self->{genomes};
1149        my $genomeCount = (keys %{$genomeHash});
1150        # Create a load object for the table we're loading.
1151        my $loadGenomeGroups = $self->_TableLoader('GenomeGroups', $genomeCount * 4);
1152        Trace("Beginning group data load.") if T(2);
1153        # Loop through the genomes.
1154        my $line;
1155        for my $genomeID (keys %{$genomeHash}) {
1156            Trace("Processing $genomeID.") if T(3);
1157            # Open the NMPDR group file for this genome.
1158            if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&
1159                defined($line = <TMP>)) {
1160                # Clean the line ending.
1161                chomp $line;
1162                # Add the group to the table. Note that there can only be one group
1163                # per genome.
1164                $loadGenomeGroups->Put($genomeID, $line);
1165            }
1166            close TMP;
1167        }
1168        # Finish the load.
1169        my $retVal = $self->_FinishAll();
1170        return $retVal;
1171    }
1172    
1173  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1174    
1175  =head3 TableLoader  =head3 TableLoader

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3