[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.62, Sun Jul 30 05:44:57 2006 UTC revision 1.89, Mon Nov 5 23:43:57 2007 UTC
# Line 7  Line 7 
7      use PageBuilder;      use PageBuilder;
8      use ERDBLoad;      use ERDBLoad;
9      use FIG;      use FIG;
10        use FIGRules;
11      use Sprout;      use Sprout;
12      use Stats;      use Stats;
13      use BasicLocation;      use BasicLocation;
14      use HTML;      use HTML;
15        use AliasAnalysis;
16    
17  =head1 Sprout Load Methods  =head1 Sprout Load Methods
18    
# Line 80  Line 82 
82  Either the name of the file containing the list of trusted subsystems or a reference  Either the name of the file containing the list of trusted subsystems or a reference
83  to a list of subsystem names. If nothing is specified, all NMPDR subsystems will be  to a list of subsystem names. If nothing is specified, all NMPDR subsystems will be
84  considered trusted. (A subsystem is considered NMPDR if it has a file named C<NMPDR>  considered trusted. (A subsystem is considered NMPDR if it has a file named C<NMPDR>
85  in its data directory.) Only subsystem data related to the trusted subsystems is loaded.  in its data directory.) Only subsystem data related to the NMPDR subsystems is loaded.
86    
87  =item options  =item options
88    
# Line 101  Line 103 
103              # Here we want all the complete genomes and an access code of 1.              # Here we want all the complete genomes and an access code of 1.
104              my @genomeList = $fig->genomes(1);              my @genomeList = $fig->genomes(1);
105              %genomes = map { $_ => 1 } @genomeList;              %genomes = map { $_ => 1 } @genomeList;
106                Trace(scalar(keys %genomes) . " genomes found.") if T(3);
107          } else {          } else {
108              my $type = ref $genomeFile;              my $type = ref $genomeFile;
109              Trace("Genome file parameter type is \"$type\".") if T(3);              Trace("Genome file parameter type is \"$type\".") if T(3);
# Line 120  Line 123 
123                      # an omitted access code can be defaulted to 1.                      # an omitted access code can be defaulted to 1.
124                      for my $genomeLine (@genomeList) {                      for my $genomeLine (@genomeList) {
125                          my ($genomeID, $accessCode) = split("\t", $genomeLine);                          my ($genomeID, $accessCode) = split("\t", $genomeLine);
126                          if (undef $accessCode) {                          if (! defined($accessCode)) {
127                              $accessCode = 1;                              $accessCode = 1;
128                          }                          }
129                          $genomes{$genomeID} = $accessCode;                          $genomes{$genomeID} = $accessCode;
# Line 138  Line 141 
141          if (! defined $subsysFile || $subsysFile eq '') {          if (! defined $subsysFile || $subsysFile eq '') {
142              # Here we want all the usable subsystems. First we get the whole list.              # Here we want all the usable subsystems. First we get the whole list.
143              my @subs = $fig->all_subsystems();              my @subs = $fig->all_subsystems();
144              # Loop through, checking for usability.              # Loop through, checking for the NMPDR file.
145              for my $sub (@subs) {              for my $sub (@subs) {
146                  if ($fig->usable_subsystem($sub)) {                  if ($fig->nmpdr_subsystem($sub)) {
147                      $subsystems{$sub} = 1;                      $subsystems{$sub} = 1;
148                  }                  }
149              }              }
# Line 163  Line 166 
166                  Confess("Invalid subsystem parameter in SproutLoad constructor.");                  Confess("Invalid subsystem parameter in SproutLoad constructor.");
167              }              }
168          }          }
169            # Go through the subsys hash again, creating the keyword list for each subsystem.
170            for my $subsystem (keys %subsystems) {
171                my $name = $subsystem;
172                $name =~ s/_/ /g;
173    #            my $classes = $fig->subsystem_classification($subsystem);
174    #            $name .= " " . join(" ", @{$classes});
175                $subsystems{$subsystem} = $name;
176      }      }
177        }
178        # Get the list of NMPDR-oriented attribute keys.
179        my @propKeys = $fig->get_group_keys("NMPDR");
180      # Get the data directory from the Sprout object.      # Get the data directory from the Sprout object.
181      my ($directory) = $sprout->LoadInfo();      my ($directory) = $sprout->LoadInfo();
182      # Create the Sprout load object.      # Create the Sprout load object.
# Line 175  Line 188 
188                    loadDirectory => $directory,                    loadDirectory => $directory,
189                    erdb => $sprout,                    erdb => $sprout,
190                    loaders => [],                    loaders => [],
191                    options => $options                    options => $options,
192                      propKeys => \@propKeys,
193                   };                   };
194      # Bless and return it.      # Bless and return it.
195      bless $retVal, $class;      bless $retVal, $class;
# Line 195  Line 209 
209      return $self->{options}->{loadOnly};      return $self->{options}->{loadOnly};
210  }  }
211    
 =head3 PrimaryOnly  
   
 C<< my $flag = $spl->PrimaryOnly; >>  
   
 Return TRUE if only the main entity is to be loaded, else FALSE.  
   
 =cut  
   
 sub PrimaryOnly {  
     my ($self) = @_;  
     return $self->{options}->{primaryOnly};  
 }  
212    
213  =head3 LoadGenomeData  =head3 LoadGenomeData
214    
# Line 247  Line 249 
249      my $genomeCount = (keys %{$genomeHash});      my $genomeCount = (keys %{$genomeHash});
250      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
251      my $loadGenome = $self->_TableLoader('Genome');      my $loadGenome = $self->_TableLoader('Genome');
252      my $loadHasContig = $self->_TableLoader('HasContig', $self->PrimaryOnly);      my $loadHasContig = $self->_TableLoader('HasContig');
253      my $loadContig = $self->_TableLoader('Contig', $self->PrimaryOnly);      my $loadContig = $self->_TableLoader('Contig');
254      my $loadIsMadeUpOf = $self->_TableLoader('IsMadeUpOf', $self->PrimaryOnly);      my $loadIsMadeUpOf = $self->_TableLoader('IsMadeUpOf');
255      my $loadSequence = $self->_TableLoader('Sequence', $self->PrimaryOnly);      my $loadSequence = $self->_TableLoader('Sequence');
256      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
257          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
258      } else {      } else {
# Line 266  Line 268 
268              my $extra = join " ", @extraData;              my $extra = join " ", @extraData;
269              # Get the full taxonomy.              # Get the full taxonomy.
270              my $taxonomy = $fig->taxonomy_of($genomeID);              my $taxonomy = $fig->taxonomy_of($genomeID);
271                # Get the version. If no version is specified, we default to the genome ID by itself.
272                my $version = $fig->genome_version($genomeID);
273                if (! defined($version)) {
274                    $version = $genomeID;
275                }
276                # Get the DNA size.
277                my $dnaSize = $fig->genome_szdna($genomeID);
278                # Open the NMPDR group file for this genome.
279                my $group;
280                if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&
281                    defined($group = <TMP>)) {
282                    # Clean the line ending.
283                    chomp $group;
284                } else {
285                    # No group, so use the default.
286                    $group = $FIG_Config::otherGroup;
287                }
288                close TMP;
289              # Output the genome record.              # Output the genome record.
290              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID),
291                               $species, $extra, $taxonomy);                               $dnaSize, $genus, $group, $species, $extra, $version, $taxonomy);
292              # Now we loop through each of the genome's contigs.              # Now we loop through each of the genome's contigs.
293              my @contigs = $fig->all_contigs($genomeID);              my @contigs = $fig->all_contigs($genomeID);
294              for my $contigID (@contigs) {              for my $contigID (@contigs) {
# Line 306  Line 326 
326      return $retVal;      return $retVal;
327  }  }
328    
 =head3 LoadCouplingData  
   
 C<< my $stats = $spl->LoadCouplingData(); >>  
   
 Load the coupling and evidence data from FIG into Sprout.  
   
 The coupling data specifies which genome features are functionally coupled. The  
 evidence data explains why the coupling is functional.  
   
 The following relations are loaded by this method.  
   
     Coupling  
     IsEvidencedBy  
     PCH  
     ParticipatesInCoupling  
     UsesAsEvidence  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadCouplingData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the genome hash.  
     my $genomeFilter = $self->{genomes};  
     # Set up an ID counter for the PCHs.  
     my $pchID = 0;  
     # Start the loads.  
     my $loadCoupling = $self->_TableLoader('Coupling');  
     my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);  
     my $loadPCH = $self->_TableLoader('PCH', $self->PrimaryOnly);  
     my $loadParticipatesInCoupling = $self->_TableLoader('ParticipatesInCoupling', $self->PrimaryOnly);  
     my $loadUsesAsEvidence = $self->_TableLoader('UsesAsEvidence', $self->PrimaryOnly);  
     if ($self->{options}->{loadOnly}) {  
         Trace("Loading from existing files.") if T(2);  
     } else {  
         Trace("Generating coupling data.") if T(2);  
         # Loop through the genomes found.  
         for my $genome (sort keys %{$genomeFilter}) {  
             Trace("Generating coupling data for $genome.") if T(3);  
             $loadCoupling->Add("genomeIn");  
             # Create a hash table for holding coupled pairs. We use this to prevent  
             # duplicates. For example, if A is coupled to B, we don't want to also  
             # assert that B is coupled to A, because we already know it. Fortunately,  
             # all couplings occur within a genome, so we can keep the hash table  
             # size reasonably small.  
             my %dupHash = ();  
             # Get all of the genome's PEGs.  
             my @pegs = $fig->pegs_of($genome);  
             # Loop through the PEGs.  
             for my $peg1 (@pegs) {  
                 $loadCoupling->Add("pegIn");  
                 Trace("Processing PEG $peg1 for $genome.") if T(4);  
                 # Get a list of the coupled PEGs.  
                 my @couplings = $fig->coupled_to($peg1);  
                 # For each coupled PEG, we need to verify that a coupling already  
                 # exists. If not, we have to create one.  
                 for my $coupleData (@couplings) {  
                     my ($peg2, $score) = @{$coupleData};  
                     # Compute the coupling ID.  
                     my $coupleID = $self->{erdb}->CouplingID($peg1, $peg2);  
                     if (! exists $dupHash{$coupleID}) {  
                         $loadCoupling->Add("couplingIn");  
                         # Here we have a new coupling to store in the load files.  
                         Trace("Storing coupling ($coupleID) with score $score.") if T(4);  
                         # Ensure we don't do this again.  
                         $dupHash{$coupleID} = $score;  
                         # Write the coupling record.  
                         $loadCoupling->Put($coupleID, $score);  
                         # Connect it to the coupled PEGs.  
                         $loadParticipatesInCoupling->Put($peg1, $coupleID, 1);  
                         $loadParticipatesInCoupling->Put($peg2, $coupleID, 2);  
                         # Get the evidence for this coupling.  
                         my @evidence = $fig->coupling_evidence($peg1, $peg2);  
                         # Organize the evidence into a hash table.  
                         my %evidenceMap = ();  
                         # Process each evidence item.  
                         for my $evidenceData (@evidence) {  
                             $loadPCH->Add("evidenceIn");  
                             my ($peg3, $peg4, $usage) = @{$evidenceData};  
                             # Only proceed if the evidence is from a Sprout  
                             # genome.  
                             if ($genomeFilter->{$fig->genome_of($peg3)}) {  
                                 $loadUsesAsEvidence->Add("evidenceChosen");  
                                 my $evidenceKey = "$coupleID $peg3 $peg4";  
                                 # We store this evidence in the hash if the usage  
                                 # is nonzero or no prior evidence has been found. This  
                                 # insures that if there is duplicate evidence, we  
                                 # at least keep the meaningful ones. Only evidence in  
                                 # the hash makes it to the output.  
                                 if ($usage || ! exists $evidenceMap{$evidenceKey}) {  
                                     $evidenceMap{$evidenceKey} = $evidenceData;  
                                 }  
                             }  
                         }  
                         for my $evidenceID (keys %evidenceMap) {  
                             # Get the ID for this evidence.  
                             $pchID++;  
                             # Create the evidence record.  
                             my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};  
                             $loadPCH->Put($pchID, $usage);  
                             # Connect it to the coupling.  
                             $loadIsEvidencedBy->Put($coupleID, $pchID);  
                             # Connect it to the features.  
                             $loadUsesAsEvidence->Put($pchID, $peg3, 1);  
                             $loadUsesAsEvidence->Put($pchID, $peg4, 2);  
                         }  
                     }  
                 }  
             }  
         }  
     }  
     # All done. Finish the load.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
329  =head3 LoadFeatureData  =head3 LoadFeatureData
330    
331  C<< my $stats = $spl->LoadFeatureData(); >>  C<< my $stats = $spl->LoadFeatureData(); >>
# Line 444  Line 338 
338    
339      Feature      Feature
340      FeatureAlias      FeatureAlias
341        IsAliasOf
342      FeatureLink      FeatureLink
343      FeatureTranslation      FeatureTranslation
344      FeatureUpstream      FeatureUpstream
345      IsLocatedIn      IsLocatedIn
346      HasFeature      HasFeature
347        HasRoleInSubsystem
348        FeatureEssential
349        FeatureVirulent
350        FeatureIEDB
351        CDD
352        IsPresentOnProteinOf
353    
354  =over 4  =over 4
355    
# Line 463  Line 364 
364  sub LoadFeatureData {  sub LoadFeatureData {
365      # Get this object instance.      # Get this object instance.
366      my ($self) = @_;      my ($self) = @_;
367      # Get the FIG object.      # Get the FIG and Sprout objects.
368      my $fig = $self->{fig};      my $fig = $self->{fig};
369        my $sprout = $self->{sprout};
370      # Get the table of genome IDs.      # Get the table of genome IDs.
371      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
372      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
373      my $loadFeature = $self->_TableLoader('Feature');      my $loadFeature = $self->_TableLoader('Feature');
374      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn', $self->PrimaryOnly);      my $loadIsLocatedIn = $self->_TableLoader('IsLocatedIn');
375      my $loadFeatureAlias = $self->_TableLoader('FeatureAlias');      my $loadFeatureAlias = $self->_TableLoader('FeatureAlias');
376        my $loadIsAliasOf = $self->_TableLoader('IsAliasOf');
377      my $loadFeatureLink = $self->_TableLoader('FeatureLink');      my $loadFeatureLink = $self->_TableLoader('FeatureLink');
378      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');
379      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');
380      my $loadHasFeature = $self->_TableLoader('HasFeature');      my $loadHasFeature = $self->_TableLoader('HasFeature');
381        my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem');
382        my $loadFeatureEssential = $self->_TableLoader('FeatureEssential');
383        my $loadFeatureVirulent = $self->_TableLoader('FeatureVirulent');
384        my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');
385        my $loadCDD = $self->_TableLoader('CDD');
386        my $loadIsPresentOnProteinOf = $self->_TableLoader('IsPresentOnProteinOf');
387        # Get the subsystem hash.
388        my $subHash = $self->{subsystems};
389        # Get the property keys.
390        my $propKeys = $self->{propKeys};
391        # Create a hashes to hold CDD and alias values.
392        my %CDD = ();
393        my %alias = ();
394      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
395      # locations.      # locations.
396      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 483  Line 399 
399      } else {      } else {
400          Trace("Generating feature data.") if T(2);          Trace("Generating feature data.") if T(2);
401          # Now we loop through the genomes, generating the data for each one.          # Now we loop through the genomes, generating the data for each one.
402          for my $genomeID (sort keys %{$genomeHash}) {          my @allGenomes = sort keys %{$genomeHash};
403            Trace(scalar(@allGenomes) . " genomes found in list.") if T(3);
404            for my $genomeID (@allGenomes) {
405              Trace("Loading features for genome $genomeID.") if T(3);              Trace("Loading features for genome $genomeID.") if T(3);
406              $loadFeature->Add("genomeIn");              $loadFeature->Add("genomeIn");
407              # Get the feature list for this genome.              # Get the feature list for this genome.
408              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed_fast($genomeID);
409              # Sort and count the list.              # Sort and count the list.
410              my @featureTuples = sort { $a->[0] cmp $b->[0] } @{$features};              my @featureTuples = sort { $a->[0] cmp $b->[0] } @{$features};
411              my $count = scalar @featureTuples;              my $count = scalar @featureTuples;
412                my @fids = map { $_->[0] } @featureTuples;
413              Trace("$count features found for genome $genomeID.") if T(3);              Trace("$count features found for genome $genomeID.") if T(3);
414                # Get the attributes for this genome and put them in a hash by feature ID.
415                my $attributes = GetGenomeAttributes($fig, $genomeID, \@fids, $propKeys);
416                Trace("Looping through features for $genomeID.") if T(3);
417              # Set up for our duplicate-feature check.              # Set up for our duplicate-feature check.
418              my $oldFeatureID = "";              my $oldFeatureID = "";
419              # Loop through the features.              # Loop through the features.
420              for my $featureTuple (@featureTuples) {              for my $featureTuple (@featureTuples) {
421                  # Split the tuple.                  # Split the tuple.
422                  my ($featureID, $locations, undef, $type) = @{$featureTuple};                  my ($featureID, $locations, undef, $type, $minloc, $maxloc, $assignment, $user, $quality) = @{$featureTuple};
423                  # Check for duplicates.                  # Check for duplicates.
424                  if ($featureID eq $oldFeatureID) {                  if ($featureID eq $oldFeatureID) {
425                      Trace("Duplicate feature $featureID found.") if T(1);                      Trace("Duplicate feature $featureID found.") if T(1);
# Line 505  Line 427 
427                      $oldFeatureID = $featureID;                      $oldFeatureID = $featureID;
428                      # Count this feature.                      # Count this feature.
429                      $loadFeature->Add("featureIn");                      $loadFeature->Add("featureIn");
430                      # Create the feature record.                      # Fix the quality. It is almost always a space, but some odd stuff might sneak through, and the
431                      $loadFeature->Put($featureID, 1, $type);                      # Sprout database requires a single character.
432                      # Link it to the parent genome.                      if (! defined($quality) || $quality eq "") {
433                      $loadHasFeature->Put($genomeID, $featureID, $type);                          $quality = " ";
434                        }
435                        # Begin building the keywords. We start with the genome ID, the
436                        # feature ID, the taxonomy, and the organism name.
437                        my @keywords = ($genomeID, $featureID, $fig->genus_species($genomeID),
438                                        $fig->taxonomy_of($genomeID));
439                      # Create the aliases.                      # Create the aliases.
440                      for my $alias ($fig->feature_aliases($featureID)) {                      for my $alias ($fig->feature_aliases($featureID)) {
441                          $loadFeatureAlias->Put($featureID, $alias);                          #Connect this alias to this feature.
442                      }                          $loadIsAliasOf->Put($alias, $featureID);
443                            push @keywords, $alias;
444                            # If this is a locus tag, also add its natural form as a keyword.
445                            my $naturalName = AliasAnalysis::Type(LocusTag => $alias);
446                            if ($naturalName) {
447                                push @keywords, $naturalName;
448                            }
449                            # If this is the first time for the specified alias, create its
450                            # alias record.
451                            if (! exists $alias{$alias}) {
452                                $loadFeatureAlias->Put($alias);
453                                $alias{$alias} = 1;
454                            }
455                        }
456                        Trace("Assignment for $featureID is: $assignment") if T(4);
457                        # Break the assignment into words and shove it onto the
458                        # keyword list.
459                        push @keywords, split(/\s+/, $assignment);
460                        # Link this feature to the parent genome.
461                        $loadHasFeature->Put($genomeID, $featureID, $type);
462                      # Get the links.                      # Get the links.
463                      my @links = $fig->fid_links($featureID);                      my @links = $fig->fid_links($featureID);
464                      for my $link (@links) {                      for my $link (@links) {
# Line 531  Line 477 
477                              $loadFeatureUpstream->Put($featureID, $upstream);                              $loadFeatureUpstream->Put($featureID, $upstream);
478                          }                          }
479                      }                      }
480                        # Now we need to find the subsystems this feature participates in.
481                        # We also add the subsystems to the keyword list. Before we do that,
482                        # we must convert underscores to spaces.
483                        my @subsystems = $fig->peg_to_subsystems($featureID);
484                        for my $subsystem (@subsystems) {
485                            # Only proceed if we like this subsystem.
486                            if (exists $subHash->{$subsystem}) {
487                                # Store the has-role link.
488                                $loadHasRoleInSubsystem->Put($featureID, $subsystem, $genomeID, $type);
489                                # Save the subsystem's keyword data.
490                                my $subKeywords = $subHash->{$subsystem};
491                                push @keywords, split /\s+/, $subKeywords;
492                                # Now we need to get this feature's role in the subsystem.
493                                my $subObject = $fig->get_subsystem($subsystem);
494                                my @roleColumns = $subObject->get_peg_roles($featureID);
495                                my @allRoles = $subObject->get_roles();
496                                for my $col (@roleColumns) {
497                                    my $role = $allRoles[$col];
498                                    push @keywords, split /\s+/, $role;
499                                    push @keywords, $subObject->get_role_abbr($col);
500                                }
501                            }
502                        }
503                        # There are three special attributes computed from property
504                        # data that we build next. If the special attribute is non-empty,
505                        # its name will be added to the keyword list. First, we get all
506                        # the attributes for this feature. They will come back as
507                        # 4-tuples: [peg, name, value, URL]. We use a 3-tuple instead:
508                        # [name, value, value with URL]. (We don't need the PEG, since
509                        # we already know it.)
510                        my @attributes = map { [$_->[1], $_->[2], Tracer::CombineURL($_->[2], $_->[3])] }
511                                             @{$attributes->{$featureID}};
512                        # Now we process each of the special attributes.
513                        if (SpecialAttribute($featureID, \@attributes,
514                                             1, [0,2], '^(essential|potential_essential)$',
515                                             $loadFeatureEssential)) {
516                            push @keywords, 'essential';
517                            $loadFeature->Add('essential');
518                        }
519                        if (SpecialAttribute($featureID, \@attributes,
520                                             0, [2], '^virulen',
521                                             $loadFeatureVirulent)) {
522                            push @keywords, 'virulent';
523                            $loadFeature->Add('virulent');
524                        }
525                        if (SpecialAttribute($featureID, \@attributes,
526                                             0, [0,2], '^iedb_',
527                                             $loadFeatureIEDB)) {
528                            push @keywords, 'iedb';
529                            $loadFeature->Add('iedb');
530                        }
531                        # Now we have some other attributes we need to process. Currently,
532                        # this is CDD and CELLO, but we expect the number to increase.
533                        my %attributeHash = ();
534                        for my $attrRow (@{$attributes->{$featureID}}) {
535                            my (undef, $key, @values) = @{$attrRow};
536                            $key =~ /^([^:]+)::(.+)/;
537                            if (exists $attributeHash{$1}) {
538                                $attributeHash{$1}->{$2} = \@values;
539                            } else {
540                                $attributeHash{$1} = {$2 => \@values};
541                            }
542                        }
543                        my $celloValue = "unknown";
544                        # Pull in the CELLO attribute. There will never be more than one.
545                        # If we have one, it's a feature attribute AND a keyword.
546                        my @celloData = keys %{$attributeHash{CELLO}};
547                        if (@celloData) {
548                            $celloValue = $celloData[0];
549                            push @keywords, $celloValue;
550                        }
551                        # Now we handle CDD. This is a bit more complicated, because
552                        # there are multiple CDDs per protein.
553                        if (exists $attributeHash{CDD}) {
554                            # Get the hash of CDD IDs to scores for this feature. We
555                            # already know it exists because of the above IF.
556                            my $cddHash = $attributeHash{CDD};
557                            my @cddData = sort keys %{$cddHash};
558                            for my $cdd (@cddData) {
559                                # Extract the score for this CDD and decode it.
560                                my ($codeScore) = split(/\s*,\s*/, $cddHash->{$cdd}->[1]);
561                                my $realScore = FIGRules::DecodeScore($codeScore);
562                                # We can't afford to crash because of a bad attribute
563                                # value, hence the IF below.
564                                if (! defined($realScore)) {
565                                    # Bad score, so count it.
566                                    $loadFeature->Add('badCDDscore');
567                                } else {
568                                    # Create the connection.
569                                    $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);
570                                    # If this CDD does not yet exist, create its record.
571                                    if (! exists $CDD{$cdd}) {
572                                        $CDD{$cdd} = 1;
573                                        $loadCDD->Put($cdd);
574                                    }
575                                }
576                            }
577                        }
578                        # Now we need to bust up hyphenated words in the keyword
579                        # list. We keep them separate and put them at the end so
580                        # the original word order is available.
581                        my $keywordString = "";
582                        my $bustedString = "";
583                        for my $keyword (@keywords) {
584                            if (length $keyword >= 3) {
585                                $keywordString .= " $keyword";
586                                if ($keyword =~ /-/) {
587                                    my @words = split /-/, $keyword;
588                                    $bustedString .= join(" ", "", @words);
589                                }
590                            }
591                        }
592                        $keywordString .= $bustedString;
593                        # Get rid of annoying punctuation.
594                        $keywordString =~ s/[();]//g;
595                        # Clean the keyword list.
596                        my $cleanWords = $sprout->CleanKeywords($keywordString);
597                        Trace("Keyword string for $featureID: $cleanWords") if T(4);
598                        # Now we need to process the feature's locations. First, we split them up.
599                        my @locationList = split /\s*,\s*/, $locations;
600                        # Next, we convert them to Sprout location objects.
601                        my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
602                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
603                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
604                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
605                      # for Sprout.                      # for Sprout. To start, we create the location position indicator.
                     my @locationList = split /\s*,\s*/, $locations;  
                     # Create the location position indicator.  
606                      my $i = 1;                      my $i = 1;
607                      # Loop through the locations.                      # Loop through the locations.
608                      for my $location (@locationList) {                      for my $locObject (@locObjectList) {
609                          # Parse the location.                          # Split this location into a list of chunks.
                         my $locObject = BasicLocation->new("$genomeID:$location");  
                         # Split it into a list of chunks.  
610                          my @locOList = ();                          my @locOList = ();
611                          while (my $peeling = $locObject->Peel($chunkSize)) {                          while (my $peeling = $locObject->Peel($chunkSize)) {
612                              $loadIsLocatedIn->Add("peeling");                              $loadIsLocatedIn->Add("peeling");
# Line 557  Line 621 
621                              $i++;                              $i++;
622                          }                          }
623                      }                      }
624                        # Finally, reassemble the location objects into a list of Sprout location strings.
625                        $locations = join(", ", map { $_->String } @locObjectList);
626                        # Create the feature record.
627                        $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locations);
628                  }                  }
629              }              }
630          }              Trace("Genome $genomeID processed.") if T(3);
     }  
     # Finish the loads.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
 =head3 LoadBBHData  
   
 C<< my $stats = $spl->LoadBBHData(); >>  
   
 Load the bidirectional best hit data from FIG into Sprout.  
   
 Sprout does not store information on similarities. Instead, it has only the  
 bi-directional best hits. Even so, the BBH table is one of the largest in  
 the database.  
   
 The following relations are loaded by this method.  
   
     IsBidirectionalBestHitOf  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadBBHData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the table of genome IDs.  
     my $genomeHash = $self->{genomes};  
     # Create load objects for each of the tables we're loading.  
     my $loadIsBidirectionalBestHitOf = $self->_TableLoader('IsBidirectionalBestHitOf');  
     if ($self->{options}->{loadOnly}) {  
         Trace("Loading from existing files.") if T(2);  
     } else {  
         Trace("Generating BBH data.") if T(2);  
         # Now we loop through the genomes, generating the data for each one.  
         for my $genomeID (sort keys %{$genomeHash}) {  
             $loadIsBidirectionalBestHitOf->Add("genomeIn");  
             Trace("Processing features for genome $genomeID.") if T(3);  
             # Get the feature list for this genome.  
             my $features = $fig->all_features_detailed($genomeID);  
             # Loop through the features.  
             for my $featureData (@{$features}) {  
                 # Split the tuple.  
                 my ($featureID, $locations, $aliases, $type) = @{$featureData};  
                 # Get the bi-directional best hits.  
                 my @bbhList = $fig->bbhs($featureID);  
                 for my $bbhEntry (@bbhList) {  
                     # Get the target feature ID and the score.  
                     my ($targetID, $score) = @{$bbhEntry};  
                     # Check the target feature's genome.  
                     my $targetGenomeID = $fig->genome_of($targetID);  
                     # Only proceed if it's one of our genomes.  
                     if ($genomeHash->{$targetGenomeID}) {  
                         $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,  
                                                            $score);  
                     }  
                 }  
             }  
631          }          }
632      }      }
633      # Finish the loads.      # Finish the loads.
# Line 652  Line 653 
653      SubsystemClass      SubsystemClass
654      Role      Role
655      RoleEC      RoleEC
656        IsIdentifiedByEC
657      SSCell      SSCell
658      ContainsFeature      ContainsFeature
659      IsGenomeOf      IsGenomeOf
# Line 693  Line 695 
695      # Get the map list.      # Get the map list.
696      my @maps = $fig->all_maps;      my @maps = $fig->all_maps;
697      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
698      my $loadDiagram = $self->_TableLoader('Diagram', $self->PrimaryOnly);      my $loadDiagram = $self->_TableLoader('Diagram');
699      my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn', $self->PrimaryOnly);      my $loadRoleOccursIn = $self->_TableLoader('RoleOccursIn');
700      my $loadSubsystem = $self->_TableLoader('Subsystem');      my $loadSubsystem = $self->_TableLoader('Subsystem');
701      my $loadRole = $self->_TableLoader('Role', $self->PrimaryOnly);      my $loadRole = $self->_TableLoader('Role');
702      my $loadRoleEC = $self->_TableLoader('RoleEC', $self->PrimaryOnly);      my $loadRoleEC = $self->_TableLoader('RoleEC');
703      my $loadCatalyzes = $self->_TableLoader('Catalyzes', $self->PrimaryOnly);      my $loadIsIdentifiedByEC = $self->_TableLoader('IsIdentifiedByEC');
704      my $loadSSCell = $self->_TableLoader('SSCell', $self->PrimaryOnly);      my $loadCatalyzes = $self->_TableLoader('Catalyzes');
705      my $loadContainsFeature = $self->_TableLoader('ContainsFeature', $self->PrimaryOnly);      my $loadSSCell = $self->_TableLoader('SSCell');
706      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf', $self->PrimaryOnly);      my $loadContainsFeature = $self->_TableLoader('ContainsFeature');
707      my $loadIsRoleOf = $self->_TableLoader('IsRoleOf', $self->PrimaryOnly);      my $loadIsGenomeOf = $self->_TableLoader('IsGenomeOf');
708      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem', $self->PrimaryOnly);      my $loadIsRoleOf = $self->_TableLoader('IsRoleOf');
709      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn', $self->PrimaryOnly);      my $loadOccursInSubsystem = $self->_TableLoader('OccursInSubsystem');
710      my $loadHasSSCell = $self->_TableLoader('HasSSCell', $self->PrimaryOnly);      my $loadParticipatesIn = $self->_TableLoader('ParticipatesIn');
711      my $loadRoleSubset = $self->_TableLoader('RoleSubset', $self->PrimaryOnly);      my $loadHasSSCell = $self->_TableLoader('HasSSCell');
712      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset', $self->PrimaryOnly);      my $loadRoleSubset = $self->_TableLoader('RoleSubset');
713      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles', $self->PrimaryOnly);      my $loadGenomeSubset = $self->_TableLoader('GenomeSubset');
714      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly);      my $loadConsistsOfRoles = $self->_TableLoader('ConsistsOfRoles');
715      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly);      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes');
716      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly);      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');
717      my $loadSubsystemClass = $self->_TableLoader('SubsystemClass', $self->PrimaryOnly);      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');
718        my $loadSubsystemClass = $self->_TableLoader('SubsystemClass');
719      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
720          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
721      } else {      } else {
722          Trace("Generating subsystem data.") if T(2);          Trace("Generating subsystem data.") if T(2);
723          # This hash will contain the role for each EC. When we're done, this          # This hash will contain the roles for each EC. When we're done, this
724          # information will be used to generate the Catalyzes table.          # information will be used to generate the Catalyzes table.
725          my %ecToRoles = ();          my %ecToRoles = ();
726          # Loop through the subsystems. Our first task will be to create the          # Loop through the subsystems. Our first task will be to create the
# Line 731  Line 734 
734              # Get the subsystem object.              # Get the subsystem object.
735              my $sub = $fig->get_subsystem($subsysID);              my $sub = $fig->get_subsystem($subsysID);
736              # Only proceed if the subsystem has a spreadsheet.              # Only proceed if the subsystem has a spreadsheet.
737              if (! $sub->{empty_ss}) {              if (defined($sub) && ! $sub->{empty_ss}) {
738                  Trace("Creating subsystem $subsysID.") if T(3);                  Trace("Creating subsystem $subsysID.") if T(3);
739                  $loadSubsystem->Add("subsystemIn");                  $loadSubsystem->Add("subsystemIn");
740                  # Create the subsystem record.                  # Create the subsystem record.
741                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
742                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
743                  $loadSubsystem->Put($subsysID, $curator, $notes);                  $loadSubsystem->Put($subsysID, $curator, $notes);
744                  my $class = $fig->subsystem_classification($subsysID);                  # Now for the classification string. This comes back as a list
745                  if ($class) {                  # reference and we convert it to a space-delimited string.
746                      $loadSubsystemClass->Put($subsysID, $class);                  my $classList = $fig->subsystem_classification($subsysID);
747                  }                  my $classString = join($FIG_Config::splitter, grep { $_ } @$classList);
748                    $loadSubsystemClass->Put($subsysID, $classString);
749                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
750                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
751                        # Get the role's abbreviation.
752                        my $abbr = $sub->get_role_abbr($col);
753                      # Connect to this role.                      # Connect to this role.
754                      $loadOccursInSubsystem->Add("roleIn");                      $loadOccursInSubsystem->Add("roleIn");
755                      $loadOccursInSubsystem->Put($roleID, $subsysID, $col);                      $loadOccursInSubsystem->Put($roleID, $subsysID, $abbr, $col);
756                      # If it's a new role, add it to the role table.                      # If it's a new role, add it to the role table.
757                      if (! exists $roleData{$roleID}) {                      if (! exists $roleData{$roleID}) {
758                          # Get the role's abbreviation.                          # Get the role's abbreviation.
                         my $abbr = $sub->get_role_abbr($col);  
759                          # Add the role.                          # Add the role.
760                          $loadRole->Put($roleID, $abbr);                          $loadRole->Put($roleID);
761                          $roleData{$roleID} = 1;                          $roleData{$roleID} = 1;
762                          # Check for an EC number.                          # Check for an EC number.
763                          if ($roleID =~ /\(EC ([^.]+\.[^.]+\.[^.]+\.[^)]+)\)\s*$/) {                          if ($roleID =~ /\(EC (\d+\.\d+\.\d+\.\d+)\s*\)\s*$/) {
764                              my $ec = $1;                              my $ec = $1;
765                              $loadRoleEC->Put($roleID, $ec);                              $loadIsIdentifiedByEC->Put($roleID, $ec);
766                              $ecToRoles{$ec} = $roleID;                              # Check to see if this is our first encounter with this EC.
767                                if (exists $ecToRoles{$ec}) {
768                                    # No, so just add this role to the EC list.
769                                    push @{$ecToRoles{$ec}}, $roleID;
770                                } else {
771                                    # Output this EC.
772                                    $loadRoleEC->Put($ec);
773                                    # Create its role list.
774                                    $ecToRoles{$ec} = [$roleID];
775                                }
776                          }                          }
777                      }                      }
778                  }                  }
# Line 871  Line 885 
885              # Now we need to link all the map's roles to it.              # Now we need to link all the map's roles to it.
886              # A hash is used to prevent duplicates.              # A hash is used to prevent duplicates.
887              my %roleHash = ();              my %roleHash = ();
888              for my $role ($fig->map_to_ecs($map)) {              for my $ec ($fig->map_to_ecs($map)) {
889                  if (exists $ecToRoles{$role} && ! $roleHash{$role}) {                  if (exists $ecToRoles{$ec}) {
890                      $loadRoleOccursIn->Put($ecToRoles{$role}, $map);                      for my $role (@{$ecToRoles{$ec}}) {
891                            if (! $roleHash{$role}) {
892                                $loadRoleOccursIn->Put($role, $map);
893                      $roleHash{$role} = 1;                      $roleHash{$role} = 1;
894                  }                  }
895              }              }
896          }          }
897                }
898            }
899          # Before we leave, we must create the Catalyzes table. We start with the reactions,          # Before we leave, we must create the Catalyzes table. We start with the reactions,
900          # then use the "ecToRoles" table to convert EC numbers to role IDs.          # then use the "ecToRoles" table to convert EC numbers to role IDs.
901          my @reactions = $fig->all_reactions();          my @reactions = $fig->all_reactions();
902          for my $reactionID (@reactions) {          for my $reactionID (@reactions) {
903              # Get this reaction's list of roles. The results will be EC numbers.              # Get this reaction's list of roles. The results will be EC numbers.
904              my @roles = $fig->catalyzed_by($reactionID);              my @ecs = $fig->catalyzed_by($reactionID);
905              # Loop through the roles, creating catalyzation records.              # Loop through the roles, creating catalyzation records.
906              for my $thisRole (@roles) {              for my $thisEC (@ecs) {
907                  if (exists $ecToRoles{$thisRole}) {                  if (exists $ecToRoles{$thisEC}) {
908                      $loadCatalyzes->Put($ecToRoles{$thisRole}, $reactionID);                      for my $thisRole (@{$ecToRoles{$thisEC}}) {
909                            $loadCatalyzes->Put($thisRole, $reactionID);
910                        }
911                  }                  }
912              }              }
913          }          }
# Line 935  Line 955 
955      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
956      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
957      my $loadProperty = $self->_TableLoader('Property');      my $loadProperty = $self->_TableLoader('Property');
958      my $loadHasProperty = $self->_TableLoader('HasProperty', $self->PrimaryOnly);      my $loadHasProperty = $self->_TableLoader('HasProperty');
959      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
960          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
961      } else {      } else {
# Line 943  Line 963 
963          # Create a hash for storing property IDs.          # Create a hash for storing property IDs.
964          my %propertyKeys = ();          my %propertyKeys = ();
965          my $nextID = 1;          my $nextID = 1;
966            # Get the attributes we intend to store in the property table.
967            my $propKeys = $self->{propKeys};
968          # Loop through the genomes.          # Loop through the genomes.
969          for my $genomeID (keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
970              $loadProperty->Add("genomeIn");              $loadProperty->Add("genomeIn");
971              Trace("Generating properties for $genomeID.") if T(3);              Trace("Generating properties for $genomeID.") if T(3);
972              # Get the genome's features. The feature ID is the first field in the              # Initialize a counter.
             # tuples returned by "all_features_detailed". We use "all_features_detailed"  
             # rather than "all_features" because we want all features regardless of type.  
             my @features = map { $_->[0] } @{$fig->all_features_detailed($genomeID)};  
             my $featureCount = 0;  
973              my $propertyCount = 0;              my $propertyCount = 0;
974              # Loop through the features, creating HasProperty records.              # Get the properties for this genome's features.
975              for my $fid (@features) {              my @attributes = $fig->get_attributes("fig|$genomeID%", $propKeys);
976                  # Get all attributes for this feature. We do this one feature at a time              Trace("Property list built for $genomeID.") if T(3);
977                  # to insure we do not get any genome attributes.              # Loop through the results, creating HasProperty records.
978                  my @attributeList = $fig->get_attributes($fid, '', '', '');              for my $attributeData (@attributes) {
979                  if (scalar @attributeList) {                  # Pull apart the attribute tuple.
980                      $featureCount++;                  my ($fid, $key, $value, $url) = @{$attributeData};
                 }  
                 # Loop through the attributes.  
                 for my $tuple (@attributeList) {  
                     $propertyCount++;  
                     # Get this attribute value's data. Note that we throw away the FID,  
                     # since it will always be the same as the value if "$fid".  
                     my (undef, $key, $value, $url) = @{$tuple};  
981                      # Concatenate the key and value and check the "propertyKeys" hash to                      # Concatenate the key and value and check the "propertyKeys" hash to
982                      # see if we already have an ID for it. We use a tab for the separator                      # see if we already have an ID for it. We use a tab for the separator
983                      # character.                      # character.
# Line 984  Line 995 
995                      # Create the HasProperty entry for this feature/property association.                      # Create the HasProperty entry for this feature/property association.
996                      $loadHasProperty->Put($fid, $propertyID, $url);                      $loadHasProperty->Put($fid, $propertyID, $url);
997                  }                  }
             }  
998              # Update the statistics.              # Update the statistics.
999              Trace("$propertyCount attributes processed for $featureCount features.") if T(3);              Trace("$propertyCount attributes processed.") if T(3);
             $loadHasProperty->Add("featuresIn", $featureCount);  
1000              $loadHasProperty->Add("propertiesIn", $propertyCount);              $loadHasProperty->Add("propertiesIn", $propertyCount);
1001          }          }
1002      }      }
# Line 1032  Line 1041 
1041      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1042      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1043      my $loadAnnotation = $self->_TableLoader('Annotation');      my $loadAnnotation = $self->_TableLoader('Annotation');
1044      my $loadIsTargetOfAnnotation = $self->_TableLoader('IsTargetOfAnnotation', $self->PrimaryOnly);      my $loadIsTargetOfAnnotation = $self->_TableLoader('IsTargetOfAnnotation');
1045      my $loadSproutUser = $self->_TableLoader('SproutUser', $self->PrimaryOnly);      my $loadSproutUser = $self->_TableLoader('SproutUser');
1046      my $loadUserAccess = $self->_TableLoader('UserAccess', $self->PrimaryOnly);      my $loadUserAccess = $self->_TableLoader('UserAccess');
1047      my $loadMadeAnnotation = $self->_TableLoader('MadeAnnotation', $self->PrimaryOnly);      my $loadMadeAnnotation = $self->_TableLoader('MadeAnnotation');
1048      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1049          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1050      } else {      } else {
# Line 1139  Line 1148 
1148      # Get the genome hash.      # Get the genome hash.
1149      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1150      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1151      my $loadComesFrom = $self->_TableLoader('ComesFrom', $self->PrimaryOnly);      my $loadComesFrom = $self->_TableLoader('ComesFrom');
1152      my $loadSource = $self->_TableLoader('Source');      my $loadSource = $self->_TableLoader('Source');
1153      my $loadSourceURL = $self->_TableLoader('SourceURL');      my $loadSourceURL = $self->_TableLoader('SourceURL');
1154      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
# Line 1276  Line 1285 
1285      Compound      Compound
1286      CompoundName      CompoundName
1287      CompoundCAS      CompoundCAS
1288        IsIdentifiedByCAS
1289        HasCompoundName
1290      IsAComponentOf      IsAComponentOf
1291    
1292  This method proceeds reaction by reaction rather than genome by genome.  This method proceeds reaction by reaction rather than genome by genome.
# Line 1297  Line 1308 
1308      my $fig = $self->{fig};      my $fig = $self->{fig};
1309      # Create load objects for each of the tables we're loading.      # Create load objects for each of the tables we're loading.
1310      my $loadReaction = $self->_TableLoader('Reaction');      my $loadReaction = $self->_TableLoader('Reaction');
1311      my $loadReactionURL = $self->_TableLoader('ReactionURL', $self->PrimaryOnly);      my $loadReactionURL = $self->_TableLoader('ReactionURL');
1312      my $loadCompound = $self->_TableLoader('Compound', $self->PrimaryOnly);      my $loadCompound = $self->_TableLoader('Compound');
1313      my $loadCompoundName = $self->_TableLoader('CompoundName', $self->PrimaryOnly);      my $loadCompoundName = $self->_TableLoader('CompoundName');
1314      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS', $self->PrimaryOnly);      my $loadCompoundCAS = $self->_TableLoader('CompoundCAS');
1315      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf', $self->PrimaryOnly);      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');
1316        my $loadIsIdentifiedByCAS = $self->_TableLoader('IsIdentifiedByCAS');
1317        my $loadHasCompoundName = $self->_TableLoader('HasCompoundName');
1318      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1319          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1320      } else {      } else {
1321          Trace("Generating annotation data.") if T(2);          Trace("Generating reaction data.") if T(2);
1322            # We need some hashes to prevent duplicates.
1323            my %compoundNames = ();
1324            my %compoundCASes = ();
1325          # First we create the compounds.          # First we create the compounds.
1326          my @compounds = $fig->all_compounds();          my @compounds = $fig->all_compounds();
1327          for my $cid (@compounds) {          for my $cid (@compounds) {
# Line 1314  Line 1330 
1330              # Each name will be given a priority number, starting with 1.              # Each name will be given a priority number, starting with 1.
1331              my $prio = 1;              my $prio = 1;
1332              for my $name (@names) {              for my $name (@names) {
1333                  $loadCompoundName->Put($cid, $name, $prio++);                  if (! exists $compoundNames{$name}) {
1334                        $loadCompoundName->Put($name);
1335                        $compoundNames{$name} = 1;
1336                    }
1337                    $loadHasCompoundName->Put($cid, $name, $prio++);
1338              }              }
1339              # Create the main compound record. Note that the first name              # Create the main compound record. Note that the first name
1340              # becomes the label.              # becomes the label.
# Line 1323  Line 1343 
1343              # Check for a CAS ID.              # Check for a CAS ID.
1344              my $cas = $fig->cas($cid);              my $cas = $fig->cas($cid);
1345              if ($cas) {              if ($cas) {
1346                  $loadCompoundCAS->Put($cid, $cas);                  $loadIsIdentifiedByCAS->Put($cid, $cas);
1347                    if (! exists $compoundCASes{$cas}) {
1348                        $loadCompoundCAS->Put($cas);
1349                        $compoundCASes{$cas} = 1;
1350                    }
1351              }              }
1352          }          }
1353          # All the compounds are set up, so we need to loop through the reactions next. First,          # All the compounds are set up, so we need to loop through the reactions next. First,
# Line 1360  Line 1384 
1384      return $retVal;      return $retVal;
1385  }  }
1386    
 =head3 LoadGroupData  
   
 C<< my $stats = $spl->LoadGroupData(); >>  
   
 Load the genome Groups into Sprout.  
   
 The following relations are loaded by this method.  
   
     GenomeGroups  
   
 There is no direct support for genome groups in FIG, so we access the SEED  
 files directly.  
   
 =over 4  
   
 =item RETURNS  
   
 Returns a statistics object for the loads.  
   
 =back  
   
 =cut  
 #: Return Type $%;  
 sub LoadGroupData {  
     # Get this object instance.  
     my ($self) = @_;  
     # Get the FIG object.  
     my $fig = $self->{fig};  
     # Get the genome hash.  
     my $genomeHash = $self->{genomes};  
     # Create a load object for the table we're loading.  
     my $loadGenomeGroups = $self->_TableLoader('GenomeGroups');  
     if ($self->{options}->{loadOnly}) {  
         Trace("Loading from existing files.") if T(2);  
     } else {  
         Trace("Generating group data.") if T(2);  
         # Loop through the genomes.  
         my $line;  
         for my $genomeID (keys %{$genomeHash}) {  
             Trace("Processing $genomeID.") if T(3);  
             # Open the NMPDR group file for this genome.  
             if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&  
                 defined($line = <TMP>)) {  
                 # Clean the line ending.  
                 chomp $line;  
                 # Add the group to the table. Note that there can only be one group  
                 # per genome.  
                 $loadGenomeGroups->Put($genomeID, $line);  
             }  
             close TMP;  
         }  
     }  
     # Finish the load.  
     my $retVal = $self->_FinishAll();  
     return $retVal;  
 }  
   
1387  =head3 LoadSynonymData  =head3 LoadSynonymData
1388    
1389  C<< my $stats = $spl->LoadSynonymData(); >>  C<< my $stats = $spl->LoadSynonymData(); >>
# Line 1458  Line 1425 
1425          Trace("Generating synonym group data.") if T(2);          Trace("Generating synonym group data.") if T(2);
1426          # Get the database handle.          # Get the database handle.
1427          my $dbh = $fig->db_handle();          my $dbh = $fig->db_handle();
1428          # Ask for the synonyms.          # Ask for the synonyms. Note that "maps_to" is a group name, and "syn_id" is a PEG ID or alias.
1429          my $sth = $dbh->prepare_command("SELECT maps_to, syn_id FROM peg_synonyms ORDER BY maps_to");          my $sth = $dbh->prepare_command("SELECT maps_to, syn_id FROM peg_synonyms ORDER BY maps_to");
1430          my $result = $sth->execute();          my $result = $sth->execute();
1431          if (! defined($result)) {          if (! defined($result)) {
# Line 1470  Line 1437 
1437              my $featureCount = 0;              my $featureCount = 0;
1438              # Loop through the synonym/peg pairs.              # Loop through the synonym/peg pairs.
1439              while (my @row = $sth->fetchrow()) {              while (my @row = $sth->fetchrow()) {
1440                  # Get the synonym ID and feature ID.                  # Get the synonym group ID and feature ID.
1441                  my ($syn_id, $peg) = @row;                  my ($syn_id, $peg) = @row;
1442                  # Insure it's for one of our genomes.                  # Insure it's for one of our genomes.
1443                  my $genomeID = FIG::genome_of($peg);                  my $genomeID = FIG::genome_of($peg);
# Line 1506  Line 1473 
1473  The following relations are loaded by this method.  The following relations are loaded by this method.
1474    
1475      Family      Family
1476      ContainsFeature      IsFamilyForFeature
1477    
1478  The source information for these relations is taken from the C<families_for_protein>,  The source information for these relations is taken from the C<families_for_protein>,
1479  C<family_function>, and C<sz_family> methods of the B<FIG> object.  C<family_function>, and C<sz_family> methods of the B<FIG> object.
# Line 1530  Line 1497 
1497      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1498      # Create load objects for the tables we're loading.      # Create load objects for the tables we're loading.
1499      my $loadFamily = $self->_TableLoader('Family');      my $loadFamily = $self->_TableLoader('Family');
1500      my $loadContainsFeature = $self->_TableLoader('ContainsFeature');      my $loadIsFamilyForFeature = $self->_TableLoader('IsFamilyForFeature');
1501      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1502          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1503      } else {      } else {
# Line 1542  Line 1509 
1509              Trace("Processing features for $genomeID.") if T(2);              Trace("Processing features for $genomeID.") if T(2);
1510              # Loop through this genome's PEGs.              # Loop through this genome's PEGs.
1511              for my $fid ($fig->all_features($genomeID, "peg")) {              for my $fid ($fig->all_features($genomeID, "peg")) {
1512                  $loadContainsFeature->Add("features", 1);                  $loadIsFamilyForFeature->Add("features", 1);
1513                  # Get this feature's families.                  # Get this feature's families.
1514                  my @families = $fig->families_for_protein($fid);                  my @families = $fig->families_for_protein($fid);
1515                  # Loop through the families, connecting them to the feature.                  # Loop through the families, connecting them to the feature.
1516                  for my $family (@families) {                  for my $family (@families) {
1517                      $loadContainsFeature->Put($family, $fid);                      $loadIsFamilyForFeature->Put($family, $fid);
1518                      # If this is a new family, create a record for it.                      # If this is a new family, create a record for it.
1519                      if (! exists $familyHash{$family}) {                      if (! exists $familyHash{$family}) {
1520                          $familyHash{$family} = 1;                          $familyHash{$family} = 1;
# Line 1565  Line 1532 
1532      return $retVal;      return $retVal;
1533  }  }
1534    
1535    =head3 LoadDrugData
1536    
1537    C<< my $stats = $spl->LoadDrugData(); >>
1538    
1539    Load the drug target data into Sprout.
1540    
1541    The following relations are loaded by this method.
1542    
1543        PDB
1544        DocksWith
1545        IsProteinForFeature
1546        Ligand
1547    
1548    The source information for these relations is taken from attributes. The
1549    C<PDB> attribute links a PDB to a feature, and is used to build B<IsProteinForFeature>.
1550    The C<zinc_name> attribute describes the ligands. The C<docking_results>
1551    attribute contains the information for the B<DocksWith> relationship. It is
1552    expected that additional attributes and tables will be added in the future.
1553    
1554    =over 4
1555    
1556    =item RETURNS
1557    
1558    Returns a statistics object for the loads.
1559    
1560    =back
1561    
1562    =cut
1563    #: Return Type $%;
1564    sub LoadDrugData {
1565        # Get this object instance.
1566        my ($self) = @_;
1567        # Get the FIG object.
1568        my $fig = $self->{fig};
1569        # Get the genome hash.
1570        my $genomeHash = $self->{genomes};
1571        # Create load objects for the tables we're loading.
1572        my $loadPDB = $self->_TableLoader('PDB');
1573        my $loadLigand = $self->_TableLoader('Ligand');
1574        my $loadIsProteinForFeature = $self->_TableLoader('IsProteinForFeature');
1575        my $loadDocksWith = $self->_TableLoader('DocksWith');
1576        if ($self->{options}->{loadOnly}) {
1577            Trace("Loading from existing files.") if T(2);
1578        } else {
1579            Trace("Generating drug target data.") if T(2);
1580            # First comes the "DocksWith" relationship. This will give us a list of PDBs.
1581            # We can also encounter PDBs when we process "IsProteinForFeature". To manage
1582            # this process, PDB information is collected in a hash table and then
1583            # unspooled after both relationships are created.
1584            my %pdbHash = ();
1585            Trace("Generating docking data.") if T(2);
1586            # Get all the docking data. This may cause problems if there are too many PDBs,
1587            # at which point we'll need another algorithm. The indicator that this is
1588            # happening will be a timeout error in the next statement.
1589            my @dockData = $fig->query_attributes('$key = ? AND $value < ?',
1590                                                  ['docking_results', $FIG_Config::dockLimit]);
1591            Trace(scalar(@dockData) . " rows of docking data found.") if T(3);
1592            for my $dockData (@dockData) {
1593                # Get the docking data components.
1594                my ($pdbID, $docking_key, @valueData) = @{$dockData};
1595                # Fix the PDB ID. It's supposed to be lower-case, but this does not always happen.
1596                $pdbID = lc $pdbID;
1597                # Strip off the object type.
1598                $pdbID =~ s/pdb://;
1599                # Extract the ZINC ID from the docking key. Note that there are two possible
1600                # formats.
1601                my (undef, $zinc_id) = $docking_key =~ /^docking_results::(ZINC)?(\d+)$/;
1602                if (! $zinc_id) {
1603                    Trace("Invalid docking result key $docking_key for $pdbID.") if T(0);
1604                    $loadDocksWith->Add("errors");
1605                } else {
1606                    # Get the pieces of the value and parse the energy.
1607                    # Note that we don't care about the rank, since
1608                    # we can sort on the energy level itself in our database.
1609                    my ($energy, $tool, $type) = @valueData;
1610                    my ($rank, $total, $vanderwaals, $electrostatic) = split /\s*;\s*/, $energy;
1611                    # Ignore predicted results.
1612                    if ($type ne "Predicted") {
1613                        # Count this docking result.
1614                        if (! exists $pdbHash{$pdbID}) {
1615                            $pdbHash{$pdbID} = 1;
1616                        } else {
1617                            $pdbHash{$pdbID}++;
1618                        }
1619                        # Write the result to the output.
1620                        $loadDocksWith->Put($pdbID, $zinc_id, $electrostatic, $type, $tool,
1621                                            $total, $vanderwaals);
1622                    }
1623                }
1624            }
1625            Trace("Connecting features.") if T(2);
1626            # Loop through the genomes.
1627            for my $genome (sort keys %{$genomeHash}) {
1628                Trace("Generating PDBs for $genome.") if T(3);
1629                # Get all of the PDBs that BLAST against this genome's features.
1630                my @attributeData = $fig->get_attributes("fig|$genome%", 'PDB::%');
1631                for my $pdbData (@attributeData) {
1632                    # The PDB ID is coded as a subkey.
1633                    if ($pdbData->[1] !~ /PDB::(.+)/i) {
1634                        Trace("Invalid PDB ID \"$pdbData->[1]\" in attribute table.") if T(0);
1635                        $loadPDB->Add("errors");
1636                    } else {
1637                        my $pdbID = $1;
1638                        # Insure the PDB is in the hash.
1639                        if (! exists $pdbHash{$pdbID}) {
1640                            $pdbHash{$pdbID} = 0;
1641                        }
1642                        # The score and locations are coded in the attribute value.
1643                        if ($pdbData->[2] !~ /^([^;]+)(.*)$/) {
1644                            Trace("Invalid PDB data for $pdbID and feature $pdbData->[0].") if T(0);
1645                            $loadIsProteinForFeature->Add("errors");
1646                        } else {
1647                            my ($score, $locData) = ($1,$2);
1648                            # The location data may not be present, so we have to start with some
1649                            # defaults and then check.
1650                            my ($start, $end) = (1, 0);
1651                            if ($locData) {
1652                                $locData =~ /(\d+)-(\d+)/;
1653                                $start = $1;
1654                                $end = $2;
1655                            }
1656                            # If we still don't have the end location, compute it from
1657                            # the feature length.
1658                            if (! $end) {
1659                                # Most features have one location, but we do a list iteration
1660                                # just in case.
1661                                my @locations = $fig->feature_location($pdbData->[0]);
1662                                $end = 0;
1663                                for my $loc (@locations) {
1664                                    my $locObject = BasicLocation->new($loc);
1665                                    $end += $locObject->Length;
1666                                }
1667                            }
1668                            # Decode the score.
1669                            my $realScore = FIGRules::DecodeScore($score);
1670                            # Connect the PDB to the feature.
1671                            $loadIsProteinForFeature->Put($pdbData->[0], $pdbID, $start, $realScore, $end);
1672                        }
1673                    }
1674                }
1675            }
1676            # We've got all our PDBs now, so we unspool them from the hash.
1677            Trace("Generating PDBs. " . scalar(keys %pdbHash) . " found.") if T(2);
1678            my $count = 0;
1679            for my $pdbID (sort keys %pdbHash) {
1680                $loadPDB->Put($pdbID, $pdbHash{$pdbID});
1681                $count++;
1682                Trace("$count PDBs processed.") if T(3) && ($count % 500 == 0);
1683            }
1684            # Finally we create the ligand table. This information can be found in the
1685            # zinc_name attribute.
1686            Trace("Loading ligands.") if T(2);
1687            # The ligand list is huge, so we have to get it in pieces. We also have to check for duplicates.
1688            my $last_zinc_id = "";
1689            my $zinc_id = "";
1690            my $done = 0;
1691            while (! $done) {
1692                # Get the next 10000 ligands. We insist that the object ID is greater than
1693                # the last ID we processed.
1694                Trace("Loading batch starting with ZINC:$zinc_id.") if T(3);
1695                my @attributeData = $fig->query_attributes('$object > ? AND $key = ? ORDER BY $object LIMIT 10000',
1696                                                           ["ZINC:$zinc_id", "zinc_name"]);
1697                Trace(scalar(@attributeData) . " attribute rows returned.") if T(3);
1698                if (! @attributeData) {
1699                    # Here there are no attributes left, so we quit the loop.
1700                    $done = 1;
1701                } else {
1702                    # Process the attribute data we've received.
1703                    for my $zinc_data (@attributeData) {
1704                        # The ZINC ID is found in the first return column, prefixed with the word ZINC.
1705                        if ($zinc_data->[0] =~ /^ZINC:(\d+)$/) {
1706                            $zinc_id = $1;
1707                            # Check for a duplicate.
1708                            if ($zinc_id eq $last_zinc_id) {
1709                                $loadLigand->Add("duplicate");
1710                            } else {
1711                                # Here it's safe to output the ligand. The ligand name is the attribute value
1712                                # (third column in the row).
1713                                $loadLigand->Put($zinc_id, $zinc_data->[2]);
1714                                # Insure we don't try to add this ID again.
1715                                $last_zinc_id = $zinc_id;
1716                            }
1717                        } else {
1718                            Trace("Invalid zinc ID \"$zinc_data->[0]\" in attribute table.") if T(0);
1719                            $loadLigand->Add("errors");
1720                        }
1721                    }
1722                }
1723            }
1724            Trace("Ligands loaded.") if T(2);
1725        }
1726        # Finish the load.
1727        my $retVal = $self->_FinishAll();
1728        return $retVal;
1729    }
1730    
1731    
1732  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1733    
1734    =head3 SpecialAttribute
1735    
1736    C<< my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader); >>
1737    
1738    Look for special attributes of a given type. A special attribute is found by comparing one of
1739    the columns of the incoming attribute list to a search pattern. If a match is found, then
1740    a set of columns is put into an output table connected to the specified ID.
1741    
1742    For example, when processing features, the attribute list we look at has three columns: attribute
1743    name, attribute value, and attribute value HTML. The IEDB attribute exists if the attribute name
1744    begins with C<iedb_>. The call signature is therefore
1745    
1746        my $found = SpecialAttribute($fid, \@attributeList, 0, [0,2], '^iedb_', $loadFeatureIEDB);
1747    
1748    The pattern is matched against column 0, and if we have a match, then column 2's value is put
1749    to the output along with the specified feature ID.
1750    
1751    =over 4
1752    
1753    =item id
1754    
1755    ID of the object whose special attributes are being loaded. This forms the first column of the
1756    output.
1757    
1758    =item attributes
1759    
1760    Reference to a list of tuples.
1761    
1762    =item idxMatch
1763    
1764    Index in each tuple of the column to be matched against the pattern. If the match is
1765    successful, an output record will be generated.
1766    
1767    =item idxValues
1768    
1769    Reference to a list containing the indexes in each tuple of the columns to be put as
1770    the second column of the output.
1771    
1772    =item pattern
1773    
1774    Pattern to be matched against the specified column. The match will be case-insensitive.
1775    
1776    =item loader
1777    
1778    An object to which each output record will be put. Usually this is an B<ERDBLoad> object,
1779    but technically it could be anything with a C<Put> method.
1780    
1781    =item RETURN
1782    
1783    Returns a count of the matches found.
1784    
1785    =item
1786    
1787    =back
1788    
1789    =cut
1790    
1791    sub SpecialAttribute {
1792        # Get the parameters.
1793        my ($id, $attributes, $idxMatch, $idxValues, $pattern, $loader) = @_;
1794        # Declare the return variable.
1795        my $retVal = 0;
1796        # Loop through the attribute rows.
1797        for my $row (@{$attributes}) {
1798            # Check for a match.
1799            if ($row->[$idxMatch] =~ m/$pattern/i) {
1800                # We have a match, so output a row. This is a bit tricky, since we may
1801                # be putting out multiple columns of data from the input.
1802                my $value = join(" ", map { $row->[$_] } @{$idxValues});
1803                $loader->Put($id, $value);
1804                $retVal++;
1805            }
1806        }
1807        Trace("$retVal special attributes found for $id and loader " . $loader->RelName() . ".") if T(4) && $retVal;
1808        # Return the number of matches.
1809        return $retVal;
1810    }
1811    
1812  =head3 TableLoader  =head3 TableLoader
1813    
1814  Create an ERDBLoad object for the specified table. The object is also added to  Create an ERDBLoad object for the specified table. The object is also added to
# Line 1581  Line 1823 
1823    
1824  Name of the table (relation) being loaded.  Name of the table (relation) being loaded.
1825    
 =item ignore  
   
 TRUE if the table should be ignored entirely, else FALSE.  
   
1826  =item RETURN  =item RETURN
1827    
1828  Returns an ERDBLoad object for loading the specified table.  Returns an ERDBLoad object for loading the specified table.
# Line 1595  Line 1833 
1833    
1834  sub _TableLoader {  sub _TableLoader {
1835      # Get the parameters.      # Get the parameters.
1836      my ($self, $tableName, $ignore) = @_;      my ($self, $tableName) = @_;
1837      # Create the load object.      # Create the load object.
1838      my $retVal = ERDBLoad->new($self->{erdb}, $tableName, $self->{loadDirectory}, $self->LoadOnly,      my $retVal = ERDBLoad->new($self->{erdb}, $tableName, $self->{loadDirectory}, $self->LoadOnly);
                                $ignore);  
1839      # Cache it in the loader list.      # Cache it in the loader list.
1840      push @{$self->{loaders}}, $retVal;      push @{$self->{loaders}}, $retVal;
1841      # Return it to the caller.      # Return it to the caller.
# Line 1670  Line 1907 
1907      return $retVal;      return $retVal;
1908  }  }
1909    
1910    =head3 GetGenomeAttributes
1911    
1912    C<< my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids, \@propKeys); >>
1913    
1914    Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related
1915    attributes for all the features of a genome in a single call, then organizes them into
1916    a hash.
1917    
1918    =over 4
1919    
1920    =item fig
1921    
1922    FIG-like object for accessing attributes.
1923    
1924    =item genomeID
1925    
1926    ID of the genome who's attributes are desired.
1927    
1928    =item fids
1929    
1930    Reference to a list of the feature IDs whose attributes are to be kept.
1931    
1932    =item propKeys
1933    
1934    A list of the keys to retrieve.
1935    
1936    =item RETURN
1937    
1938    Returns a reference to a hash. The key of the hash is the feature ID. The value is the
1939    reference to a list of the feature's attribute tuples. Each tuple contains the feature ID,
1940    the attribute key, and one or more attribute values.
1941    
1942    =back
1943    
1944    =cut
1945    
1946    sub GetGenomeAttributes {
1947        # Get the parameters.
1948        my ($fig, $genomeID, $fids, $propKeys) = @_;
1949        # Declare the return variable.
1950        my $retVal = {};
1951        # Initialize the hash. This not only enables us to easily determine which FIDs to
1952        # keep, it insures that the caller sees a list reference for every known fid,
1953        # simplifying the logic.
1954        for my $fid (@{$fids}) {
1955            $retVal->{$fid} = [];
1956        }
1957        # Get the attributes. If ev_code_cron is running, we may get a timeout error, so
1958        # an eval is used.
1959        my @aList = ();
1960        eval {
1961            @aList = $fig->get_attributes("fig|$genomeID%", $propKeys);
1962            Trace(scalar(@aList) . " attributes returned for genome $genomeID.") if T(3);
1963        };
1964        # Check for a problem.
1965        if ($@) {
1966            Trace("Retrying attributes for $genomeID due to error: $@") if T(1);
1967            # Our fallback plan is to process the attributes in blocks of 100. This is much slower,
1968            # but allows us to continue processing.
1969            my $nFids = scalar @{$fids};
1970            for (my $i = 0; $i < $nFids; $i += 100) {
1971                # Determine the index of the last feature ID we'll be specifying on this pass.
1972                # Normally it's $i + 99, but if we're close to the end it may be less.
1973                my $end = ($i + 100 > $nFids ? $nFids - 1 : $i + 99);
1974                # Get a slice of the fid list.
1975                my @slice = @{$fids}[$i .. $end];
1976                # Get the relevant attributes.
1977                Trace("Retrieving attributes for fids $i to $end.") if T(3);
1978                my @aShort = $fig->get_attributes(\@slice, $propKeys);
1979                Trace(scalar(@aShort) . " attributes returned for fids $i to $end.") if T(3);
1980                push @aList, @aShort;
1981            }
1982        }
1983        # Now we should have all the interesting attributes in @aList. Populate the hash with
1984        # them.
1985        for my $aListEntry (@aList) {
1986            my $fid = $aListEntry->[0];
1987            if (exists $retVal->{$fid}) {
1988                push @{$retVal->{$fid}}, $aListEntry;
1989            }
1990        }
1991        # Return the result.
1992        return $retVal;
1993    }
1994    
1995    
1996  1;  1;

Legend:
Removed from v.1.62  
changed lines
  Added in v.1.89

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3