[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.84, Thu May 17 23:44:51 2007 UTC revision 1.90, Thu Dec 6 14:53:50 2007 UTC
# Line 7  Line 7 
7      use PageBuilder;      use PageBuilder;
8      use ERDBLoad;      use ERDBLoad;
9      use FIG;      use FIG;
10        use FIGRules;
11      use Sprout;      use Sprout;
12      use Stats;      use Stats;
13      use BasicLocation;      use BasicLocation;
14      use HTML;      use HTML;
15        use AliasAnalysis;
16    
17  =head1 Sprout Load Methods  =head1 Sprout Load Methods
18    
# Line 50  Line 52 
52    
53  =head3 new  =head3 new
54    
55  C<< my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options); >>      my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options);
56    
57  Construct a new Sprout Loader object, specifying the two participating databases and  Construct a new Sprout Loader object, specifying the two participating databases and
58  the name of the files containing the list of genomes and subsystems to use.  the name of the files containing the list of genomes and subsystems to use.
# Line 101  Line 103 
103              # Here we want all the complete genomes and an access code of 1.              # Here we want all the complete genomes and an access code of 1.
104              my @genomeList = $fig->genomes(1);              my @genomeList = $fig->genomes(1);
105              %genomes = map { $_ => 1 } @genomeList;              %genomes = map { $_ => 1 } @genomeList;
106                Trace(scalar(keys %genomes) . " genomes found.") if T(3);
107          } else {          } else {
108              my $type = ref $genomeFile;              my $type = ref $genomeFile;
109              Trace("Genome file parameter type is \"$type\".") if T(3);              Trace("Genome file parameter type is \"$type\".") if T(3);
# Line 167  Line 170 
170          for my $subsystem (keys %subsystems) {          for my $subsystem (keys %subsystems) {
171              my $name = $subsystem;              my $name = $subsystem;
172              $name =~ s/_/ /g;              $name =~ s/_/ /g;
173              my $classes = $fig->subsystem_classification($subsystem);  #            my $classes = $fig->subsystem_classification($subsystem);
174              $name .= " " . join(" ", @{$classes});  #            $name .= " " . join(" ", @{$classes});
175              $subsystems{$subsystem} = $name;              $subsystems{$subsystem} = $name;
176          }          }
177      }      }
178        # Get the list of NMPDR-oriented attribute keys.
179        my @propKeys = $fig->get_group_keys("NMPDR");
180      # Get the data directory from the Sprout object.      # Get the data directory from the Sprout object.
181      my ($directory) = $sprout->LoadInfo();      my ($directory) = $sprout->LoadInfo();
182      # Create the Sprout load object.      # Create the Sprout load object.
# Line 183  Line 188 
188                    loadDirectory => $directory,                    loadDirectory => $directory,
189                    erdb => $sprout,                    erdb => $sprout,
190                    loaders => [],                    loaders => [],
191                    options => $options                    options => $options,
192                      propKeys => \@propKeys,
193                   };                   };
194      # Bless and return it.      # Bless and return it.
195      bless $retVal, $class;      bless $retVal, $class;
# Line 192  Line 198 
198    
199  =head3 LoadOnly  =head3 LoadOnly
200    
201  C<< my $flag = $spl->LoadOnly; >>      my $flag = $spl->LoadOnly;
202    
203  Return TRUE if we are in load-only mode, else FALSE.  Return TRUE if we are in load-only mode, else FALSE.
204    
# Line 203  Line 209 
209      return $self->{options}->{loadOnly};      return $self->{options}->{loadOnly};
210  }  }
211    
 =head3 PrimaryOnly  
   
 C<< my $flag = $spl->PrimaryOnly; >>  
   
 Return TRUE if only the main entity is to be loaded, else FALSE.  
   
 =cut  
   
 sub PrimaryOnly {  
     my ($self) = @_;  
     return $self->{options}->{primaryOnly};  
 }  
212    
213  =head3 LoadGenomeData  =head3 LoadGenomeData
214    
215  C<< my $stats = $spl->LoadGenomeData(); >>      my $stats = $spl->LoadGenomeData();
216    
217  Load the Genome, Contig, and Sequence data from FIG into Sprout.  Load the Genome, Contig, and Sequence data from FIG into Sprout.
218    
# Line 255  Line 249 
249      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
250      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
251      my $loadGenome = $self->_TableLoader('Genome');      my $loadGenome = $self->_TableLoader('Genome');
252      my $loadHasContig = $self->_TableLoader('HasContig', $self->PrimaryOnly);      my $loadHasContig = $self->_TableLoader('HasContig');
253      my $loadContig = $self->_TableLoader('Contig', $self->PrimaryOnly);      my $loadContig = $self->_TableLoader('Contig');
254      my $loadIsMadeUpOf = $self->_TableLoader('IsMadeUpOf', $self->PrimaryOnly);      my $loadIsMadeUpOf = $self->_TableLoader('IsMadeUpOf');
255      my $loadSequence = $self->_TableLoader('Sequence', $self->PrimaryOnly);      my $loadSequence = $self->_TableLoader('Sequence');
256      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
257          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
258      } else {      } else {
# Line 332  Line 326 
326      return $retVal;      return $retVal;
327  }  }
328    
 =head3 LoadCouplingData  
   
 C<< my $stats = $spl->LoadCouplingData(); >>  
   
 Load the coupling and evidence data from FIG into Sprout.  
   
 The coupling data specifies which genome features are functionally coupled. The  
 evidence data explains why the coupling is functional.  
   
 The following relations are loaded by this method.  
   
     Coupling  
     IsEvidencedBy  
     PCH  
     ParticipatesInCoupling  
     UsesAsEvidence  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadCouplingData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the genome hash.  
     my $genomeFilter = $self->{genomes};  
     # Set up an ID counter for the PCHs.  
     my $pchID = 0;  
     # Start the loads.  
     my $loadCoupling = $self->_TableLoader('Coupling');  
     my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);  
     my $loadPCH = $self->_TableLoader('PCH', $self->PrimaryOnly);  
     my $loadParticipatesInCoupling = $self->_TableLoader('ParticipatesInCoupling', $self->PrimaryOnly);  
     my $loadUsesAsEvidence = $self->_TableLoader('UsesAsEvidence', $self->PrimaryOnly);  
     if ($self->{options}->{loadOnly}) {  
         Trace("Loading from existing files.") if T(2);  
     } else {  
         Trace("Generating coupling data.") if T(2);  
         # Loop through the genomes found.  
         for my $genome (sort keys %{$genomeFilter}) {  
             Trace("Generating coupling data for $genome.") if T(3);  
             $loadCoupling->Add("genomeIn");  
             # Create a hash table for holding coupled pairs. We use this to prevent  
             # duplicates. For example, if A is coupled to B, we don't want to also  
             # assert that B is coupled to A, because we already know it. Fortunately,  
             # all couplings occur within a genome, so we can keep the hash table  
             # size reasonably small.  
             my %dupHash = ();  
             # Get all of the genome's PEGs.  
             my @pegs = $fig->pegs_of($genome);  
             # Loop through the PEGs.  
             for my $peg1 (@pegs) {  
                 $loadCoupling->Add("pegIn");  
                 Trace("Processing PEG $peg1 for $genome.") if T(4);  
                 # Get a list of the coupled PEGs.  
                 my @couplings = $fig->coupled_to($peg1);  
                 # For each coupled PEG, we need to verify that a coupling already  
                 # exists. If not, we have to create one.  
                 for my $coupleData (@couplings) {  
                     my ($peg2, $score) = @{$coupleData};  
                     # Compute the coupling ID.  
                     my $coupleID = $self->{erdb}->CouplingID($peg1, $peg2);  
                     if (! exists $dupHash{$coupleID}) {  
                         $loadCoupling->Add("couplingIn");  
                         # Here we have a new coupling to store in the load files.  
                         Trace("Storing coupling ($coupleID) with score $score.") if T(4);  
                         # Ensure we don't do this again.  
                         $dupHash{$coupleID} = $score;  
                         # Write the coupling record.  
                         $loadCoupling->Put($coupleID, $score);  
                         # Connect it to the coupled PEGs.  
                         $loadParticipatesInCoupling->Put($peg1, $coupleID, 1);  
                         $loadParticipatesInCoupling->Put($peg2, $coupleID, 2);  
                         # Get the evidence for this coupling.  
                         my @evidence = $fig->coupling_evidence($peg1, $peg2);  
                         # Organize the evidence into a hash table.  
                         my %evidenceMap = ();  
                         # Process each evidence item.  
                         for my $evidenceData (@evidence) {  
                             $loadPCH->Add("evidenceIn");  
                             my ($peg3, $peg4, $usage) = @{$evidenceData};  
                             # Only proceed if the evidence is from a Sprout  
                             # genome.  
                             if ($genomeFilter->{$fig->genome_of($peg3)}) {  
                                 $loadUsesAsEvidence->Add("evidenceChosen");  
                                 my $evidenceKey = "$coupleID $peg3 $peg4";  
                                 # We store this evidence in the hash if the usage  
                                 # is nonzero or no prior evidence has been found. This  
                                 # insures that if there is duplicate evidence, we  
                                 # at least keep the meaningful ones. Only evidence in  
                                 # the hash makes it to the output.  
                                 if ($usage || ! exists $evidenceMap{$evidenceKey}) {  
                                     $evidenceMap{$evidenceKey} = $evidenceData;  
                                 }  
                             }  
                         }  
                         for my $evidenceID (keys %evidenceMap) {  
                             # Get the ID for this evidence.  
                             $pchID++;  
                             # Create the evidence record.  
                             my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};  
                             $loadPCH->Put($pchID, $usage);  
                             # Connect it to the coupling.  
                             $loadIsEvidencedBy->Put($coupleID, $pchID);  
                             # Connect it to the features.  
                             $loadUsesAsEvidence->Put($pchID, $peg3, 1);  
                             $loadUsesAsEvidence->Put($pchID, $peg4, 2);  
                         }  
                     }  
                 }  
             }  
         }  
     }  
     # All done. Finish the load.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
329  =head3 LoadFeatureData  =head3 LoadFeatureData
330    
331  C<< my $stats = $spl->LoadFeatureData(); >>      my $stats = $spl->LoadFeatureData();
332    
333  Load the feature data from FIG into Sprout.  Load the feature data from FIG into Sprout.
334    
# Line 470  Line 338 
338    
339      Feature      Feature
340      FeatureAlias      FeatureAlias
341        IsAliasOf
342      FeatureLink      FeatureLink
343      FeatureTranslation      FeatureTranslation
344      FeatureUpstream      FeatureUpstream
# Line 479  Line 348 
348      FeatureEssential      FeatureEssential
349      FeatureVirulent      FeatureVirulent
350      FeatureIEDB      FeatureIEDB
351        CDD
352        IsPresentOnProteinOf
353    
354  =over 4  =over 4
355    
# Line 500  Line 371 
371      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
372      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
373      my $loadFeature = $self->_TableLoader('Feature');      my $loadFeature = $self->_TableLoader('Feature');
374      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn', $self->PrimaryOnly);      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn');
375      my $loadFeatureAlias = $self->_TableLoader('FeatureAlias');      my $loadFeatureAlias = $self->_TableLoader('FeatureAlias');
376        my $loadIsAliasOf = $self->_TableLoader('IsAliasOf');
377      my $loadFeatureLink = $self->_TableLoader('FeatureLink');      my $loadFeatureLink = $self->_TableLoader('FeatureLink');
378      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');
379      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');
380      my $loadHasFeature = $self->_TableLoader('HasFeature', $self->PrimaryOnly);      my $loadHasFeature = $self->_TableLoader('HasFeature');
381      my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem', $self->PrimaryOnly);      my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem');
382      my $loadFeatureEssential = $self->_TableLoader('FeatureEssential');      my $loadFeatureEssential = $self->_TableLoader('FeatureEssential');
383      my $loadFeatureVirulent = $self->_TableLoader('FeatureVirulent');      my $loadFeatureVirulent = $self->_TableLoader('FeatureVirulent');
384      my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');      my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');
385        my $loadCDD = $self->_TableLoader('CDD');
386        my $loadIsPresentOnProteinOf = $self->_TableLoader('IsPresentOnProteinOf');
387      # Get the subsystem hash.      # Get the subsystem hash.
388      my $subHash = $self->{subsystems};      my $subHash = $self->{subsystems};
389        # Get the property keys.
390        my $propKeys = $self->{propKeys};
391        # Create a hashes to hold CDD and alias values.
392        my %CDD = ();
393        my %alias = ();
394      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
395      # locations.      # locations.
396      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 520  Line 399 
399      } else {      } else {
400          Trace("Generating feature data.") if T(2);          Trace("Generating feature data.") if T(2);
401          # Now we loop through the genomes, generating the data for each one.          # Now we loop through the genomes, generating the data for each one.
402          for my $genomeID (sort keys %{$genomeHash}) {          my @allGenomes = sort keys %{$genomeHash};
403            Trace(scalar(@allGenomes) . " genomes found in list.") if T(3);
404            for my $genomeID (@allGenomes) {
405              Trace("Loading features for genome $genomeID.") if T(3);              Trace("Loading features for genome $genomeID.") if T(3);
406              $loadFeature->Add("genomeIn");              $loadFeature->Add("genomeIn");
407              # Get the feature list for this genome.              # Get the feature list for this genome.
# Line 531  Line 412 
412              my @fids = map { $_->[0] } @featureTuples;              my @fids = map { $_->[0] } @featureTuples;
413              Trace("$count features found for genome $genomeID.") if T(3);              Trace("$count features found for genome $genomeID.") if T(3);
414              # Get the attributes for this genome and put them in a hash by feature ID.              # Get the attributes for this genome and put them in a hash by feature ID.
415              my $attributes = GetGenomeAttributes($fig, $genomeID, \@fids);              my $attributes = GetGenomeAttributes($fig, $genomeID, \@fids, $propKeys);
416                Trace("Looping through features for $genomeID.") if T(3);
417              # Set up for our duplicate-feature check.              # Set up for our duplicate-feature check.
418              my $oldFeatureID = "";              my $oldFeatureID = "";
419              # Loop through the features.              # Loop through the features.
# Line 556  Line 438 
438                                      $fig->taxonomy_of($genomeID));                                      $fig->taxonomy_of($genomeID));
439                      # Create the aliases.                      # Create the aliases.
440                      for my $alias ($fig->feature_aliases($featureID)) {                      for my $alias ($fig->feature_aliases($featureID)) {
441                          $loadFeatureAlias->Put($featureID, $alias);                          #Connect this alias to this feature.
442                            $loadIsAliasOf->Put($alias, $featureID);
443                          push @keywords, $alias;                          push @keywords, $alias;
444                            # If this is a locus tag, also add its natural form as a keyword.
445                            my $naturalName = AliasAnalysis::Type(LocusTag => $alias);
446                            if ($naturalName) {
447                                push @keywords, $naturalName;
448                            }
449                            # If this is the first time for the specified alias, create its
450                            # alias record.
451                            if (! exists $alias{$alias}) {
452                                $loadFeatureAlias->Put($alias);
453                                $alias{$alias} = 1;
454                            }
455                      }                      }
456                      Trace("Assignment for $featureID is: $assignment") if T(4);                      Trace("Assignment for $featureID is: $assignment") if T(4);
457                      # Break the assignment into words and shove it onto the                      # Break the assignment into words and shove it onto the
# Line 585  Line 479 
479                      }                      }
480                      # Now we need to find the subsystems this feature participates in.                      # Now we need to find the subsystems this feature participates in.
481                      # We also add the subsystems to the keyword list. Before we do that,                      # We also add the subsystems to the keyword list. Before we do that,
482                      # we must convert underscores to spaces and tack on the classifications.                      # we must convert underscores to spaces.
483                      my @subsystems = $fig->peg_to_subsystems($featureID);                      my @subsystems = $fig->peg_to_subsystems($featureID);
484                      for my $subsystem (@subsystems) {                      for my $subsystem (@subsystems) {
485                          # Only proceed if we like this subsystem.                          # Only proceed if we like this subsystem.
# Line 634  Line 528 
528                          push @keywords, 'iedb';                          push @keywords, 'iedb';
529                          $loadFeature->Add('iedb');                          $loadFeature->Add('iedb');
530                      }                      }
531                        # Now we have some other attributes we need to process. Currently,
532                        # this is CDD and CELLO, but we expect the number to increase.
533                        my %attributeHash = ();
534                        for my $attrRow (@{$attributes->{$featureID}}) {
535                            my (undef, $key, @values) = @{$attrRow};
536                            $key =~ /^([^:]+)::(.+)/;
537                            if (exists $attributeHash{$1}) {
538                                $attributeHash{$1}->{$2} = \@values;
539                            } else {
540                                $attributeHash{$1} = {$2 => \@values};
541                            }
542                        }
543                        my $celloValue = "unknown";
544                        # Pull in the CELLO attribute. There will never be more than one.
545                        # If we have one, it's a feature attribute AND a keyword.
546                        my @celloData = keys %{$attributeHash{CELLO}};
547                        if (@celloData) {
548                            $celloValue = $celloData[0];
549                            push @keywords, $celloValue;
550                        }
551                        # Now we handle CDD. This is a bit more complicated, because
552                        # there are multiple CDDs per protein.
553                        if (exists $attributeHash{CDD}) {
554                            # Get the hash of CDD IDs to scores for this feature. We
555                            # already know it exists because of the above IF.
556                            my $cddHash = $attributeHash{CDD};
557                            my @cddData = sort keys %{$cddHash};
558                            for my $cdd (@cddData) {
559                                # Extract the score for this CDD and decode it.
560                                my ($codeScore) = split(/\s*,\s*/, $cddHash->{$cdd}->[1]);
561                                my $realScore = FIGRules::DecodeScore($codeScore);
562                                # We can't afford to crash because of a bad attribute
563                                # value, hence the IF below.
564                                if (! defined($realScore)) {
565                                    # Bad score, so count it.
566                                    $loadFeature->Add('badCDDscore');
567                                } else {
568                                    # Create the connection.
569                                    $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);
570                                    # If this CDD does not yet exist, create its record.
571                                    if (! exists $CDD{$cdd}) {
572                                        $CDD{$cdd} = 1;
573                                        $loadCDD->Put($cdd);
574                                    }
575                                }
576                            }
577                        }
578                      # Now we need to bust up hyphenated words in the keyword                      # Now we need to bust up hyphenated words in the keyword
579                      # list. We keep them separate and put them at the end so                      # list. We keep them separate and put them at the end so
580                      # the original word order is available.                      # the original word order is available.
# Line 654  Line 595 
595                      # Clean the keyword list.                      # Clean the keyword list.
596                      my $cleanWords = $sprout->CleanKeywords($keywordString);                      my $cleanWords = $sprout->CleanKeywords($keywordString);
597                      Trace("Keyword string for $featureID: $cleanWords") if T(4);                      Trace("Keyword string for $featureID: $cleanWords") if T(4);
598                      # Create the feature record.                      # Now we need to process the feature's locations. First, we split them up.
599                      $loadFeature->Put($featureID, 1, $user, $quality, $type, $assignment, $cleanWords);                      my @locationList = split /\s*,\s*/, $locations;
600                        # Next, we convert them to Sprout location objects.
601                        my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
602                        # Assemble them into a sprout location string for later.
603                        my $locationString = join(", ", map { $_->String } @locObjectList);
604                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
605                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
606                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
607                      # for Sprout.                      # for Sprout. To start, we create the location position indicator.
                     my @locationList = split /\s*,\s*/, $locations;  
                     # Create the location position indicator.  
608                      my $i = 1;                      my $i = 1;
609                      # Loop through the locations.                      # Loop through the locations.
610                      for my $location (@locationList) {                      for my $locObject (@locObjectList) {
611                          # Parse the location.                          # Split this location into a list of chunks.
                         my $locObject = BasicLocation->new("$genomeID:$location");  
                         # Split it into a list of chunks.  
612                          my @locOList = ();                          my @locOList = ();
613                          while (my $peeling = $locObject->Peel($chunkSize)) {                          while (my $peeling = $locObject->Peel($chunkSize)) {
614                              $loadIsLocatedIn->Add("peeling");                              $loadIsLocatedIn->Add("peeling");
# Line 682  Line 623 
623                              $i++;                              $i++;
624                          }                          }
625                      }                      }
626                        # Finally, reassemble the location objects into a list of Sprout location strings.
627                        # Create the feature record.
628                        $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locationString);
629                  }                  }
630              }              }
631                Trace("Genome $genomeID processed.") if T(3);
632          }          }
633      }      }
634      # Finish the loads.      # Finish the loads.
# Line 693  Line 638 
638    
639  =head3 LoadSubsystemData  =head3 LoadSubsystemData
640    
641  C<< my $stats = $spl->LoadSubsystemData(); >>      my $stats = $spl->LoadSubsystemData();
642    
643  Load the subsystem data from FIG into Sprout.  Load the subsystem data from FIG into Sprout.
644    
# Line 709  Line 654 
654      SubsystemClass      SubsystemClass
655      Role      Role
656      RoleEC      RoleEC
657        IsIdentifiedByEC
658      SSCell      SSCell
659      ContainsFeature      ContainsFeature
660      IsGenomeOf      IsGenomeOf
# Line 750  Line 696 
696      # Get the map list.      # Get the map list.
697      my @maps = $fig->all_maps;      my @maps = $fig->all_maps;
698      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
699      my $loadDiagram = $self->_TableLoader('Diagram', $self->PrimaryOnly);      my $loadDiagram = $self->_TableLoader('Diagram');
700      my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $self->PrimaryOnly);      my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn');
701      my $loadSubsystem = $self->_TableLoader('Subsystem');      my $loadSubsystem = $self->_TableLoader('Subsystem');
702      my $loadRole = $self->_TableLoader('Role', $self->PrimaryOnly);      my $loadRole = $self->_TableLoader('Role');
703      my $loadRoleEC = $self->_TableLoader('RoleEC', $self->PrimaryOnly);      my $loadRoleEC = $self->_TableLoader('RoleEC');
704      my $loadCatalyzes = $self->_TableLoader('Catalyzes', $self->PrimaryOnly);      my $loadIsIdentifiedByEC = $self->_TableLoader('IsIdentifiedByEC');
705      my $loadSSCell = $self->_TableLoader('SSCell', $self->PrimaryOnly);      my $loadCatalyzes = $self->_TableLoader('Catalyzes');
706      my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $self->PrimaryOnly);      my $loadSSCell = $self->_TableLoader('SSCell');
707      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $self->PrimaryOnly);      my $loadContainsFeature = $self->_TableLoader('ContainsFeature');
708      my $loadIsRoleOf = $self->_TableLoader('IsRoleOf', $self->PrimaryOnly);      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf');
709      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $self->PrimaryOnly);      my $loadIsRoleOf = $self->_TableLoader('IsRoleOf');
710      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $self->PrimaryOnly);      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem');
711      my $loadHasSSCell = $self->_TableLoader('HasSSCell', $self->PrimaryOnly);      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn');
712      my $loadRoleSubset = $self->_TableLoader('RoleSubset', $self->PrimaryOnly);      my $loadHasSSCell = $self->_TableLoader('HasSSCell');
713      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $self->PrimaryOnly);      my $loadRoleSubset = $self->_TableLoader('RoleSubset');
714      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $self->PrimaryOnly);      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset');
715      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly);      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles');
716      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly);      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes');
717      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly);      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');
718      my $loadSubsystemClass = $self->_TableLoader('SubsystemClass', $self->PrimaryOnly);      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');
719        my $loadSubsystemClass = $self->_TableLoader('SubsystemClass');
720      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
721          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
722      } else {      } else {
723          Trace("Generating subsystem data.") if T(2);          Trace("Generating subsystem data.") if T(2);
724          # This hash will contain the role for each EC. When we're done, this          # This hash will contain the roles for each EC. When we're done, this
725          # information will be used to generate the Catalyzes table.          # information will be used to generate the Catalyzes table.
726          my %ecToRoles = ();          my %ecToRoles = ();
727          # Loop through the subsystems. Our first task will be to create the          # Loop through the subsystems. Our first task will be to create the
# Line 802  Line 749 
749                  $loadSubsystemClass->Put($subsysID, $classString);                  $loadSubsystemClass->Put($subsysID, $classString);
750                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
751                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
752                        # Get the role's abbreviation.
753                        my $abbr = $sub->get_role_abbr($col);
754                      # Connect to this role.                      # Connect to this role.
755                      $loadOccursInSubsystem->Add("roleIn");                      $loadOccursInSubsystem->Add("roleIn");
756                      $loadOccursInSubsystem->Put($roleID, $subsysID, $col);                      $loadOccursInSubsystem->Put($roleID, $subsysID, $abbr, $col);
757                      # If it's a new role, add it to the role table.                      # If it's a new role, add it to the role table.
758                      if (! exists $roleData{$roleID}) {                      if (! exists $roleData{$roleID}) {
759                          # Get the role's abbreviation.                          # Get the role's abbreviation.
                         my $abbr = $sub->get_role_abbr($col);  
760                          # Add the role.                          # Add the role.
761                          $loadRole->Put($roleID, $abbr);                          $loadRole->Put($roleID);
762                          $roleData{$roleID} = 1;                          $roleData{$roleID} = 1;
763                          # Check for an EC number.                          # Check for an EC number.
764                          if ($roleID =~ /\(EC ([^.]+\.[^.]+\.[^.]+\.[^)]+)\)\s*$/) {                          if ($roleID =~ /\(EC (\d+\.\d+\.\d+\.\d+)\s*\)\s*$/) {
765                              my $ec = $1;                              my $ec = $1;
766                              $loadRoleEC->Put($roleID, $ec);                              $loadIsIdentifiedByEC->Put($roleID, $ec);
767                              $ecToRoles{$ec} = $roleID;                              # Check to see if this is our first encounter with this EC.
768                                if (exists $ecToRoles{$ec}) {
769                                    # No, so just add this role to the EC list.
770                                    push @{$ecToRoles{$ec}}, $roleID;
771                                } else {
772                                    # Output this EC.
773                                    $loadRoleEC->Put($ec);
774                                    # Create its role list.
775                                    $ecToRoles{$ec} = [$roleID];
776                                }
777                          }                          }
778                      }                      }
779                  }                  }
# Line 929  Line 886 
886              # Now we need to link all the map's roles to it.              # Now we need to link all the map's roles to it.
887              # A hash is used to prevent duplicates.              # A hash is used to prevent duplicates.
888              my %roleHash = ();              my %roleHash = ();
889              for my $role ($fig->map_to_ecs($map)) {              for my $ec ($fig->map_to_ecs($map)) {
890                  if (exists $ecToRoles{$role} && ! $roleHash{$role}) {                  if (exists $ecToRoles{$ec}) {
891                      $loadRoleOccursIn->Put($ecToRoles{$role}, $map);                      for my $role (@{$ecToRoles{$ec}}) {
892                            if (! $roleHash{$role}) {
893                                $loadRoleOccursIn->Put($role, $map);
894                      $roleHash{$role} = 1;                      $roleHash{$role} = 1;
895                  }                  }
896              }              }
897          }          }
898                }
899            }
900          # Before we leave, we must create the Catalyzes table. We start with the reactions,          # Before we leave, we must create the Catalyzes table. We start with the reactions,
901          # then use the "ecToRoles" table to convert EC numbers to role IDs.          # then use the "ecToRoles" table to convert EC numbers to role IDs.
902          my @reactions = $fig->all_reactions();          my @reactions = $fig->all_reactions();
903          for my $reactionID (@reactions) {          for my $reactionID (@reactions) {
904              # Get this reaction's list of roles. The results will be EC numbers.              # Get this reaction's list of roles. The results will be EC numbers.
905              my @roles = $fig->catalyzed_by($reactionID);              my @ecs = $fig->catalyzed_by($reactionID);
906              # Loop through the roles, creating catalyzation records.              # Loop through the roles, creating catalyzation records.
907              for my $thisRole (@roles) {              for my $thisEC (@ecs) {
908                  if (exists $ecToRoles{$thisRole}) {                  if (exists $ecToRoles{$thisEC}) {
909                      $loadCatalyzes->Put($ecToRoles{$thisRole}, $reactionID);                      for my $thisRole (@{$ecToRoles{$thisEC}}) {
910                            $loadCatalyzes->Put($thisRole, $reactionID);
911                        }
912                  }                  }
913              }              }
914          }          }
# Line 957  Line 920 
920    
921  =head3 LoadPropertyData  =head3 LoadPropertyData
922    
923  C<< my $stats = $spl->LoadPropertyData(); >>      my $stats = $spl->LoadPropertyData();
924    
925  Load the attribute data from FIG into Sprout.  Load the attribute data from FIG into Sprout.
926    
# Line 993  Line 956 
956      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
957      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
958      my $loadProperty = $self->_TableLoader('Property');      my $loadProperty = $self->_TableLoader('Property');
959      my $loadHasProperty = $self->_TableLoader('HasProperty', $self->PrimaryOnly);      my $loadHasProperty = $self->_TableLoader('HasProperty');
960      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
961          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
962      } else {      } else {
# Line 1002  Line 965 
965          my %propertyKeys = ();          my %propertyKeys = ();
966          my $nextID = 1;          my $nextID = 1;
967          # Get the attributes we intend to store in the property table.          # Get the attributes we intend to store in the property table.
968          my @propKeys = $fig->get_group_keys("NMPDR");          my $propKeys = $self->{propKeys};
969          # Loop through the genomes.          # Loop through the genomes.
970          for my $genomeID (sort keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
971              $loadProperty->Add("genomeIn");              $loadProperty->Add("genomeIn");
# Line 1010  Line 973 
973              # Initialize a counter.              # Initialize a counter.
974              my $propertyCount = 0;              my $propertyCount = 0;
975              # Get the properties for this genome's features.              # Get the properties for this genome's features.
976              my @attributes = $fig->get_attributes("fig|$genomeID%", \@propKeys);              my @attributes = $fig->get_attributes("fig|$genomeID%", $propKeys);
977              Trace("Property list built for $genomeID.") if T(3);              Trace("Property list built for $genomeID.") if T(3);
978              # Loop through the results, creating HasProperty records.              # Loop through the results, creating HasProperty records.
979              for my $attributeData (@attributes) {              for my $attributeData (@attributes) {
# Line 1045  Line 1008 
1008    
1009  =head3 LoadAnnotationData  =head3 LoadAnnotationData
1010    
1011  C<< my $stats = $spl->LoadAnnotationData(); >>      my $stats = $spl->LoadAnnotationData();
1012    
1013  Load the annotation data from FIG into Sprout.  Load the annotation data from FIG into Sprout.
1014    
# Line 1079  Line 1042 
1042      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1043      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1044      my $loadAnnotation = $self->_TableLoader('Annotation');      my $loadAnnotation = $self->_TableLoader('Annotation');
1045      my $loadIsTargetOfAnnotation = $self->_TableLoader('IsTargetOfAnnotation', $self->PrimaryOnly);      my $loadIsTargetOfAnnotation = $self->_TableLoader('IsTargetOfAnnotation');
1046      my $loadSproutUser = $self->_TableLoader('SproutUser', $self->PrimaryOnly);      my $loadSproutUser = $self->_TableLoader('SproutUser');
1047      my $loadUserAccess = $self->_TableLoader('UserAccess', $self->PrimaryOnly);      my $loadUserAccess = $self->_TableLoader('UserAccess');
1048      my $loadMadeAnnotation = $self->_TableLoader('MadeAnnotation', $self->PrimaryOnly);      my $loadMadeAnnotation = $self->_TableLoader('MadeAnnotation');
1049      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1050          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1051      } else {      } else {
# Line 1152  Line 1115 
1115    
1116  =head3 LoadSourceData  =head3 LoadSourceData
1117    
1118  C<< my $stats = $spl->LoadSourceData(); >>      my $stats = $spl->LoadSourceData();
1119    
1120  Load the source data from FIG into Sprout.  Load the source data from FIG into Sprout.
1121    
# Line 1186  Line 1149 
1149      # Get the genome hash.      # Get the genome hash.
1150      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1151      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1152      my $loadComesFrom = $self->_TableLoader('ComesFrom', $self->PrimaryOnly);      my $loadComesFrom = $self->_TableLoader('ComesFrom');
1153      my $loadSource = $self->_TableLoader('Source');      my $loadSource = $self->_TableLoader('Source');
1154      my $loadSourceURL = $self->_TableLoader('SourceURL');      my $loadSourceURL = $self->_TableLoader('SourceURL');
1155      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
# Line 1230  Line 1193 
1193    
1194  =head3 LoadExternalData  =head3 LoadExternalData
1195    
1196  C<< my $stats = $spl->LoadExternalData(); >>      my $stats = $spl->LoadExternalData();
1197    
1198  Load the external data from FIG into Sprout.  Load the external data from FIG into Sprout.
1199    
# Line 1310  Line 1273 
1273    
1274  =head3 LoadReactionData  =head3 LoadReactionData
1275    
1276  C<< my $stats = $spl->LoadReactionData(); >>      my $stats = $spl->LoadReactionData();
1277    
1278  Load the reaction data from FIG into Sprout.  Load the reaction data from FIG into Sprout.
1279    
# Line 1323  Line 1286 
1286      Compound      Compound
1287      CompoundName      CompoundName
1288      CompoundCAS      CompoundCAS
1289        IsIdentifiedByCAS
1290        HasCompoundName
1291      IsAComponentOf      IsAComponentOf
1292    
1293  This method proceeds reaction by reaction rather than genome by genome.  This method proceeds reaction by reaction rather than genome by genome.
# Line 1344  Line 1309 
1309      my $fig = $self->{fig};      my $fig = $self->{fig};
1310      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1311      my $loadReaction = $self->_TableLoader('Reaction');      my $loadReaction = $self->_TableLoader('Reaction');
1312      my $loadReactionURL = $self->_TableLoader('ReactionURL', $self->PrimaryOnly);      my $loadReactionURL = $self->_TableLoader('ReactionURL');
1313      my $loadCompound = $self->_TableLoader('Compound', $self->PrimaryOnly);      my $loadCompound = $self->_TableLoader('Compound');
1314      my $loadCompoundName = $self->_TableLoader('CompoundName', $self->PrimaryOnly);      my $loadCompoundName = $self->_TableLoader('CompoundName');
1315      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $self->PrimaryOnly);      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS');
1316      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $self->PrimaryOnly);      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');
1317        my $loadIsIdentifiedByCAS = $self->_TableLoader('IsIdentifiedByCAS');
1318        my $loadHasCompoundName = $self->_TableLoader('HasCompoundName');
1319      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1320          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1321      } else {      } else {
1322          Trace("Generating annotation data.") if T(2);          Trace("Generating reaction data.") if T(2);
1323            # We need some hashes to prevent duplicates.
1324            my %compoundNames = ();
1325            my %compoundCASes = ();
1326          # First we create the compounds.          # First we create the compounds.
1327          my @compounds = $fig->all_compounds();          my @compounds = $fig->all_compounds();
1328          for my $cid (@compounds) {          for my $cid (@compounds) {
# Line 1361  Line 1331 
1331              # Each name will be given a priority number, starting with 1.              # Each name will be given a priority number, starting with 1.
1332              my $prio = 1;              my $prio = 1;
1333              for my $name (@names) {              for my $name (@names) {
1334                  $loadCompoundName->Put($cid, $name, $prio++);                  if (! exists $compoundNames{$name}) {
1335                        $loadCompoundName->Put($name);
1336                        $compoundNames{$name} = 1;
1337                    }
1338                    $loadHasCompoundName->Put($cid, $name, $prio++);
1339              }              }
1340              # Create the main compound record. Note that the first name              # Create the main compound record. Note that the first name
1341              # becomes the label.              # becomes the label.
# Line 1370  Line 1344 
1344              # Check for a CAS ID.              # Check for a CAS ID.
1345              my $cas = $fig->cas($cid);              my $cas = $fig->cas($cid);
1346              if ($cas) {              if ($cas) {
1347                  $loadCompoundCAS->Put($cid, $cas);                  $loadIsIdentifiedByCAS->Put($cid, $cas);
1348                    if (! exists $compoundCASes{$cas}) {
1349                        $loadCompoundCAS->Put($cas);
1350                        $compoundCASes{$cas} = 1;
1351                    }
1352              }              }
1353          }          }
1354          # All the compounds are set up, so we need to loop through the reactions next. First,          # All the compounds are set up, so we need to loop through the reactions next. First,
# Line 1407  Line 1385 
1385      return $retVal;      return $retVal;
1386  }  }
1387    
 =head3 LoadGroupData  
   
 C<< my $stats = $spl->LoadGroupData(); >>  
   
 Load the genome Groups into Sprout.  
   
 The following relations are loaded by this method.  
   
     GenomeGroups  
   
 Currently, we do not use groups. We used to use them for NMPDR groups,  
 butThere is no direct support for genome groups in FIG, so we access the SEED  
 files directly.  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadGroupData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the genome hash.  
     my $genomeHash = $self->{genomes};  
     # Create a load object for the table we're loading.  
     my $loadGenomeGroups = $self->_TableLoader('GenomeGroups');  
     if ($self->{options}->{loadOnly}) {  
         Trace("Loading from existing files.") if T(2);  
     } else {  
         Trace("Generating group data.") if T(2);  
         # Currently there are no groups.  
     }  
     # Finish the load.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
1388  =head3 LoadSynonymData  =head3 LoadSynonymData
1389    
1390  C<< my $stats = $spl->LoadSynonymData(); >>      my $stats = $spl->LoadSynonymData();
1391    
1392  Load the synonym groups into Sprout.  Load the synonym groups into Sprout.
1393    
# Line 1492  Line 1426 
1426          Trace("Generating synonym group data.") if T(2);          Trace("Generating synonym group data.") if T(2);
1427          # Get the database handle.          # Get the database handle.
1428          my $dbh = $fig->db_handle();          my $dbh = $fig->db_handle();
1429          # Ask for the synonyms.          # Ask for the synonyms. Note that "maps_to" is a group name, and "syn_id" is a PEG ID or alias.
1430          my $sth = $dbh->prepare_command("SELECT maps_to, syn_id FROM peg_synonyms ORDER BY maps_to");          my $sth = $dbh->prepare_command("SELECT maps_to, syn_id FROM peg_synonyms ORDER BY maps_to");
1431          my $result = $sth->execute();          my $result = $sth->execute();
1432          if (! defined($result)) {          if (! defined($result)) {
1433              Confess("Database error in Synonym load: " . $sth->errstr());              Confess("Database error in Synonym load: " . $sth->errstr());
1434          } else {          } else {
1435                Trace("Processing synonym results.") if T(2);
1436              # Remember the current synonym.              # Remember the current synonym.
1437              my $current_syn = "";              my $current_syn = "";
1438              # Count the features.              # Count the features.
1439              my $featureCount = 0;              my $featureCount = 0;
1440                my $entryCount = 0;
1441              # Loop through the synonym/peg pairs.              # Loop through the synonym/peg pairs.
1442              while (my @row = $sth->fetchrow()) {              while (my @row = $sth->fetchrow()) {
1443                  # Get the synonym ID and feature ID.                  # Get the synonym group ID and feature ID.
1444                  my ($syn_id, $peg) = @row;                  my ($syn_id, $peg) = @row;
1445                    # Count this row.
1446                    $entryCount++;
1447                    if ($entryCount % 1000 == 0) {
1448                        Trace("$entryCount rows processed.") if T(3);
1449                    }
1450                  # Insure it's for one of our genomes.                  # Insure it's for one of our genomes.
1451                  my $genomeID = FIG::genome_of($peg);                  my $genomeID = FIG::genome_of($peg);
1452                  if (exists $genomeHash->{$genomeID}) {                  if (exists $genomeHash->{$genomeID}) {
# Line 1524  Line 1465 
1465                      }                      }
1466                  }                  }
1467              }              }
1468                Trace("$entryCount rows produced $featureCount features.") if T(2);
1469          }          }
1470      }      }
1471      # Finish the load.      # Finish the load.
# Line 1533  Line 1475 
1475    
1476  =head3 LoadFamilyData  =head3 LoadFamilyData
1477    
1478  C<< my $stats = $spl->LoadFamilyData(); >>      my $stats = $spl->LoadFamilyData();
1479    
1480  Load the protein families into Sprout.  Load the protein families into Sprout.
1481    
# Line 1601  Line 1543 
1543    
1544  =head3 LoadDrugData  =head3 LoadDrugData
1545    
1546  C<< my $stats = $spl->LoadDrugData(); >>      my $stats = $spl->LoadDrugData();
1547    
1548  Load the drug target data into Sprout.  Load the drug target data into Sprout.
1549    
# Line 1735  Line 1677 
1677                          # Decode the score.                          # Decode the score.
1678                          my $realScore = FIGRules::DecodeScore($score);                          my $realScore = FIGRules::DecodeScore($score);
1679                          # Connect the PDB to the feature.                          # Connect the PDB to the feature.
1680                          $loadIsProteinForFeature->Put($pdbData->[0], $pdbID, $start, $realScore, $end);                          $loadIsProteinForFeature->Put($pdbID, $pdbData->[0], $start, $realScore, $end);
1681                      }                      }
1682                  }                  }
1683              }              }
# Line 1800  Line 1742 
1742    
1743  =head3 SpecialAttribute  =head3 SpecialAttribute
1744    
1745  C<< my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader); >>      my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader);
1746    
1747  Look for special attributes of a given type. A special attribute is found by comparing one of  Look for special attributes of a given type. A special attribute is found by comparing one of
1748  the columns of the incoming attribute list to a search pattern. If a match is found, then  the columns of the incoming attribute list to a search pattern. If a match is found, then
# Line 1890  Line 1832 
1832    
1833  Name of the table (relation) being loaded.  Name of the table (relation) being loaded.
1834    
 =item ignore  
   
 TRUE if the table should be ignored entirely, else FALSE.  
   
1835  =item RETURN  =item RETURN
1836    
1837  Returns an ERDBLoad object for loading the specified table.  Returns an ERDBLoad object for loading the specified table.
# Line 1904  Line 1842 
1842    
1843  sub _TableLoader {  sub _TableLoader {
1844      # Get the parameters.      # Get the parameters.
1845      my ($self, $tableName, $ignore) = @_;      my ($self, $tableName) = @_;
1846      # Create the load object.      # Create the load object.
1847      my $retVal = ERDBLoad->new($self->{erdb}, $tableName, $self->{loadDirectory}, $self->LoadOnly,      my $retVal = ERDBLoad->new($self->{erdb}, $tableName, $self->{loadDirectory}, $self->LoadOnly);
                                $ignore);  
1848      # Cache it in the loader list.      # Cache it in the loader list.
1849      push @{$self->{loaders}}, $retVal;      push @{$self->{loaders}}, $retVal;
1850      # Return it to the caller.      # Return it to the caller.
# Line 1981  Line 1918 
1918    
1919  =head3 GetGenomeAttributes  =head3 GetGenomeAttributes
1920    
1921  C<< my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids); >>      my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids, \@propKeys);
1922    
1923  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related
1924  attributes for all the features of a genome in a single call, then organizes them into  attributes for all the features of a genome in a single call, then organizes them into
# Line 2001  Line 1938 
1938    
1939  Reference to a list of the feature IDs whose attributes are to be kept.  Reference to a list of the feature IDs whose attributes are to be kept.
1940    
1941    =item propKeys
1942    
1943    A list of the keys to retrieve.
1944    
1945  =item RETURN  =item RETURN
1946    
1947  Returns a reference to a hash. The key of the hash is the feature ID. The value is the  Returns a reference to a hash. The key of the hash is the feature ID. The value is the
# Line 2013  Line 1954 
1954    
1955  sub GetGenomeAttributes {  sub GetGenomeAttributes {
1956      # Get the parameters.      # Get the parameters.
1957      my ($fig, $genomeID, $fids) = @_;      my ($fig, $genomeID, $fids, $propKeys) = @_;
1958      # Declare the return variable.      # Declare the return variable.
1959      my $retVal = {};      my $retVal = {};
     # Get a list of the attributes we care about.  
     my @propKeys = $fig->get_group_keys("NMPDR");  
     # Get the attributes.  
     my @aList = $fig->get_attributes("fig|$genomeID%", \@propKeys);  
1960      # Initialize the hash. This not only enables us to easily determine which FIDs to      # Initialize the hash. This not only enables us to easily determine which FIDs to
1961      # keep, it insures that the caller sees a list reference for every known fid,      # keep, it insures that the caller sees a list reference for every known fid,
1962      # simplifying the logic.      # simplifying the logic.
1963      for my $fid (@{$fids}) {      for my $fid (@{$fids}) {
1964          $retVal->{$fid} = [];          $retVal->{$fid} = [];
1965      }      }
1966      # Populate the hash.      # Get the attributes. If ev_code_cron is running, we may get a timeout error, so
1967        # an eval is used.
1968        my @aList = ();
1969        eval {
1970            @aList = $fig->get_attributes("fig|$genomeID%", $propKeys);
1971            Trace(scalar(@aList) . " attributes returned for genome $genomeID.") if T(3);
1972        };
1973        # Check for a problem.
1974        if ($@) {
1975            Trace("Retrying attributes for $genomeID due to error: $@") if T(1);
1976            # Our fallback plan is to process the attributes in blocks of 100. This is much slower,
1977            # but allows us to continue processing.
1978            my $nFids = scalar @{$fids};
1979            for (my $i = 0; $i < $nFids; $i += 100) {
1980                # Determine the index of the last feature ID we'll be specifying on this pass.
1981                # Normally it's $i + 99, but if we're close to the end it may be less.
1982                my $end = ($i + 100 > $nFids ? $nFids - 1 : $i + 99);
1983                # Get a slice of the fid list.
1984                my @slice = @{$fids}[$i .. $end];
1985                # Get the relevant attributes.
1986                Trace("Retrieving attributes for fids $i to $end.") if T(3);
1987                my @aShort = $fig->get_attributes(\@slice, $propKeys);
1988                Trace(scalar(@aShort) . " attributes returned for fids $i to $end.") if T(3);
1989                push @aList, @aShort;
1990            }
1991        }
1992        # Now we should have all the interesting attributes in @aList. Populate the hash with
1993        # them.
1994      for my $aListEntry (@aList) {      for my $aListEntry (@aList) {
1995          my $fid = $aListEntry->[0];          my $fid = $aListEntry->[0];
1996          if (exists $retVal->{$fid}) {          if (exists $retVal->{$fid}) {
# Line 2037  Line 2001 
2001      return $retVal;      return $retVal;
2002  }  }
2003    
2004    
2005  1;  1;

Legend:
Removed from v.1.84  
changed lines
  Added in v.1.90

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3