[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Dec 14 19:48:38 2010 UTC revision 1.9, Thu Apr 21 19:58:18 2011 UTC
# Line 25  Line 25 
25      use SeedUtils;      use SeedUtils;
26      use SAPserver;      use SAPserver;
27      use Sapling;      use Sapling;
28        use AliasAnalysis;
29        use base qw(SaplingDataLoader);
30    
31  =head1 Sapling Genome Loader  =head1 Sapling Genome Loader
32    
# Line 65  Line 67 
67      # Create the loader object.      # Create the loader object.
68      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory);      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory);
69      # Load the contigs.      # Load the contigs.
70        Trace("Loading contigs for $genome.") if T(2);
71      $loaderObject->LoadContigs();      $loaderObject->LoadContigs();
72      # Load the features.      # Load the features.
73        Trace("Loading features for $genome.") if T(2);
74      $loaderObject->LoadFeatures();      $loaderObject->LoadFeatures();
75        # Check for annotation history. If we have it, load the history records into the
76        # database.
77        if (-f "$directory/annotations") {
78            Trace("Processing annotations.") if T(3);
79            $loaderObject->LoadAnnotations("$directory/annotations");
80        }
81      # Load the subsystem bindings.      # Load the subsystem bindings.
82        Trace("Loading subsystems for $genome.") if T(2);
83      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems();
84      # Create the Genome record and taxonomy information.      # Create the Genome record and taxonomy information.
85        Trace("Creating root for $genome.") if T(2);
86      $loaderObject->CreateGenome();      $loaderObject->CreateGenome();
87      # Return the statistics.      # Return the statistics.
88      return $loaderObject->{stats};      return $loaderObject->{stats};
# Line 106  Line 118 
118      my ($sap, $genome) = @_;      my ($sap, $genome) = @_;
119      # Create the statistics object.      # Create the statistics object.
120      my $stats = Stats->new();      my $stats = Stats->new();
121      # Delete the DNA.      # Delete the DNA sequences.
122      DeleteRelatedRecords($sap, $genome, $stats, 'HasSection', 'DNASequence');      my @seqs = $sap->GetFlat('DNASequence', 'DNASequence(id) LIKE ?', ["$genome:%"], 'id');
123        for my $seq (@seqs) {
124            my $delStats = $sap->Delete(DNASequence => $seq);
125            $stats->Accumulate($delStats);
126        }
127      # Delete the contigs.      # Delete the contigs.
128      DeleteRelatedRecords($sap, $genome, $stats, 'IsMadeUpOf', 'Contig');      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsMadeUpOf', 'Contig');
129      # Delete the features.      # Delete the features.
130      DeleteRelatedRecords($sap, $genome, $stats, 'IsOwnerOf', 'Feature');      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsOwnerOf', 'Feature');
131      # Delete the molecular machines.      # Delete the molecular machines.
132      DeleteRelatedRecords($sap, $genome, $stats, 'Uses', 'MolecularMachine');      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'Uses', 'MolecularMachine');
133        # Delete the annotations.
134        SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsAnnotatedBy', 'Annotation');
135      # Delete the genome itself.      # Delete the genome itself.
136      my $subStats = $sap->Delete(Genome => $genome);      my $subStats = $sap->Delete(Genome => $genome);
137      # Accumulate the statistics from the delete.      # Accumulate the statistics from the delete.
# Line 122  Line 140 
140      return $stats;      return $stats;
141  }  }
142    
143    
144    =head3 Process
145    
146        my $stats = SaplingGenomeLoader::Process($sap, $genome, $directory);
147    
148    Load genome data from the specified directory. If the genome data already
149    exists in the database, it will be deleted first.
150    
151    =over 4
152    
153    =item sap
154    
155    L</Sapling> object for accessing the database.
156    
157    =item genome
158    
159    ID of the genome whose  data is being loaded.
160    
161    =item directory
162    
163    Name of the directory containing the genome data files.
164    
165    =item RETURN
166    
167    Returns a statistics object describing the activity during the reload.
168    
169    =back
170    
171    =cut
172    
173    sub Process {
174        # Get the parameters.
175        my ($sap, $genome, $directory) = @_;
176        # Clear the existing data for the specified genome.
177        my $stats = ClearGenome($sap, $genome);
178        # Load the new expression data from the specified directory.
179        my $newStats = Load($sap, $genome, $directory);
180        # Merge the statistics.
181        $stats->Accumulate($newStats);
182        # Return the result.
183        return $stats;
184    }
185    
186    
187  =head2 Loader Object Methods  =head2 Loader Object Methods
188    
189  =head3 new  =head3 new
# Line 171  Line 233 
233    
234  L<Stats> object for tracking statistical information about the load.  L<Stats> object for tracking statistical information about the load.
235    
236    =item timestamps
237    
238    A hash of hashes, keyed by feature ID. The sub-hashes are keyed by annotation timestamp,
239    and used to prevent duplicate timestamps.
240    
241  =back  =back
242    
243  =cut  =cut
# Line 179  Line 246 
246      # Get the parameters.      # Get the parameters.
247      my ($class, $sap, $genome, $directory) = @_;      my ($class, $sap, $genome, $directory) = @_;
248      # Create the object.      # Create the object.
249      my $retVal = {      my $retVal = SaplingDataLoader::new($class, $sap, qw(contigs dna pegs rnas));
250          sap => $sap,      # Add our specialized data.
251          genome => $genome,      $retVal->{genome} = $genome;
252          directory => $directory,      $retVal->{directory} = $directory;
253          stats => Stats->new(qw(contigs dna pegs rnas)),      $retVal->{timestamps} = {};
254          supportRecords => {}      # Return the result.
     };  
     # Bless and return it.  
     bless $retVal, $class;  
255      return $retVal;      return $retVal;
256  }  }
257    
# Line 300  Line 364 
364      # Compute the chunk ID.      # Compute the chunk ID.
365      my $chunkID = "$contigID:" . Tracer::Pad($ordinal, 7, 1, '0');      my $chunkID = "$contigID:" . Tracer::Pad($ordinal, 7, 1, '0');
366      # Connect this sequence to the contig.      # Connect this sequence to the contig.
367      $sap->InsertObject('HasSection', from_link => $contigID, to_link => $chunk);      $sap->InsertObject('HasSection', from_link => $contigID, to_link => $chunkID);
368      # Create the DNA sequence.      # Create the DNA sequence.
369      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);
370      # Record the chunk.      # Record the chunk.
# Line 355  Line 419 
419  sub LoadFeatures {  sub LoadFeatures {
420      # Get the parameters.      # Get the parameters.
421      my ($self) = @_;      my ($self) = @_;
422        # Read in the functional assignments.
423        Trace("Reading functional assignments.") if T(3);
424        my $assignHash = $self->ReadAssignments();
425      # Get the directory of feature types.      # Get the directory of feature types.
426      my $featureDir = "$self->{directory}/Features";      my $featureDir = "$self->{directory}/Features";
427      my @types = Tracer::OpenDir("$self->{directory}/Features", 1);      my @types = Tracer::OpenDir("$self->{directory}/Features", 1);
# Line 363  Line 430 
430          # Insure this is a genuine feature directory.          # Insure this is a genuine feature directory.
431          if (-f "$featureDir/$type/tbl") {          if (-f "$featureDir/$type/tbl") {
432              # Yes, load the feature data.              # Yes, load the feature data.
433              $self->LoadFeatureData($featureDir, $type);              $self->LoadFeatureData($featureDir, $type, $assignHash);
434          }          }
435      }      }
436      # Check for protein sequences. If we have some, load them into the database.      # Check for protein sequences. If we have some, load them into the database.
437      if (-f "$featureDir/peg/fasta") {      if (-f "$featureDir/peg/fasta") {
438            Trace("Processing protein sequences.") if T(3);
439          $self->LoadProteinData("$featureDir/peg/fasta");          $self->LoadProteinData("$featureDir/peg/fasta");
440      }      }
441        # Now loop through the features, connecting them to their roles. Note that deleted
442        # features will not be in the assignment hash.
443        Trace("Connecting features to roles.") if T(3);
444        for my $fid (keys %$assignHash) {
445            $self->ConnectFunctionRoles($fid, $assignHash->{$fid});
446        }
447  }  }
448    
449  =head3 LoadFeatureData  =head3 LoadFeatureData
450    
451      $self->LoadFeatureData($featureDir, $type);      $loaderObject->LoadFeatureData($featureDir, $type, $assignHash);
452    
453  Load the basic data for each feature into the database. The number of features of  Load the basic data for each feature into the database. The number of features of
454  the type found will be recorded in the statistics object.  the type found will be recorded in the statistics object.
# Line 389  Line 463 
463    
464  Type of feature to load.  Type of feature to load.
465    
466    =item assignHash
467    
468    Reference to a hash mapping each feature ID to its functional assignment.
469    
470  =back  =back
471    
472  =cut  =cut
473    
474  sub LoadFeatureData {  sub LoadFeatureData {
475      # Get the parameters.      # Get the parameters.
476      my ($self, $featureDir, $type) = @_;      my ($self, $featureDir, $type, $assignHash) = @_;
477      # Get the sapling database.      # Get the sapling database.
478      my $sap = $self->{sap};      my $sap = $self->{sap};
479      # Get the maximum location  segment length. We'll need this later.      # Get the maximum location  segment length. We'll need this later.
480      my $maxLength = $sap->TuningParameter('maxLocationLength');      my $maxLength = $sap->TuningParameter('maxLocationLength');
481      # Get the statistics object.      # Get the statistics object.
482      my $stats = $self->{stats};      my $stats = $self->{stats};
     # Read in the functional assignments.  
     my $assignHash = $self->ReadAssignments();  
483      # This hash will track the features we've created. If a feature is found a second      # This hash will track the features we've created. If a feature is found a second
484      # time, it overwrites the original.      # time, it overwrites the original.
485      my %fids;      my $fidHash = $self->{timestamps};
486        # Finally, we need the timestamp hash. The initial feature population
487      # Insure we have a tbl file for this feature type.      # Insure we have a tbl file for this feature type.
488      my $fileName = "$featureDir/$type/tbl";      my $fileName = "$featureDir/$type/tbl";
489      if (-f $fileName) {      if (-f $fileName) {
490          # We have one, so open it for input.          # We have one, so we can read through it. First, however, we need to get the list
491            # of deleted features.
492            my %deletedFids;
493            my $deleteFile = "$featureDir/$type/deleted.features";
494            if (-f $deleteFile) {
495                %deletedFids = map { $_ => 1 } Tracer::GetFile($deleteFile);
496            }
497            # Open the main file for input.
498            Trace("Reading features from $fileName.") if T(3);
499          my $ih = Open(undef, "<$fileName");          my $ih = Open(undef, "<$fileName");
500          while (! eof $ih) {          while (! eof $ih) {
501              # Read this feature's information.              # Read this feature's information.
502              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);
503              # If the feature already exists, delete it.              # Only proceed if the feature is NOT deleted.
504              if (exists $fids{$fid}) {              if (! exists $deletedFids{$fid}) {
505                    # If the feature already exists, delete it. (This should be extremely rare.)
506                    if (exists $fidHash->{$fid}) {
507                  $sap->Delete(Feature => $fid);                  $sap->Delete(Feature => $fid);
508                  $stats->Add(duplicateFid => 1);                  $stats->Add(duplicateFid => 1);
             } else {  
                 # Otherwise connect it to the genome.  
                 $sap->InsertObject('IsOwnerOf', from_link => $self->{genome}, to_link => $fid);  
509              }              }
510                    # Otherwise connect this feature to the genome.
511                    $sap->InsertObject('IsOwnerOf', from_link => $self->{genome}, to_link => $fid);
512              # Now we must parse the locations. This will contain a list of the location              # Now we must parse the locations. This will contain a list of the location
513              # data 4-tuples (contig, start, dir, len).              # data 4-tuples (contig, start, dir, len).
514              my @locData;              my @locData;
# Line 451  Line 537 
537              $sap->InsertObject('Feature', id => $fid, feature_type => $type,              $sap->InsertObject('Feature', id => $fid, feature_type => $type,
538                                 function => $assignHash->{$fid}, locked => 0,                                 function => $assignHash->{$fid}, locked => 0,
539                                 sequence_length => $length);                                 sequence_length => $length);
540              $fids{$fid} = 1;                  $fidHash->{$fid} = {};
541              $stats->Add($type => 1);              $stats->Add($type => 1);
542              # The next step is to connect the feature record to its locations. This              # The next step is to connect the feature record to its locations. This
543              # involves dividing the location into segments. The following variable              # involves dividing the location into segments. The following variable
# Line 482  Line 568 
568                  # Output the last segment.                  # Output the last segment.
569                  $self->ConnectLocation($fid, $contig, $segment, $left, $dir, $len);                  $self->ConnectLocation($fid, $contig, $segment, $left, $dir, $len);
570              }              }
571                    # Now we process the aliases and create the identifiers. We don't do this
572                    # for RNA, because the RNA function is stored in the aliases.
573                    if ($type ne 'rna') {
574                        for my $alias (@aliases) {
575                            my $normalized;
576                            # Determine the type.
577                            my $aliasType = AliasAnalysis::TypeOf($alias);
578                            $stats->Add(aliasAll => 1);
579                            # Is this a recognized type?
580                            if ($aliasType) {
581                                $stats->Add(aliasNormal => 1);
582                                # Yes. Write it normally.
583                                $self->CreateIdentifier($alias, B => $aliasType, $fid);
584                            } elsif ($alias =~ /^LocusTag:(.+)/ || $alias =~ /^(?:locus|locus_tag|LocusTag)\|(.+)/) {
585                                # No, but this is a specially-marked locus tag.
586                                $normalized = $1;
587                                $stats->Add(aliasLocus => 1);
588                                $self->CreateIdentifier($normalized, B => 'LocusTag', $fid);
589                            } elsif ($normalized = AliasAnalysis::IsNatural(LocusTag => $alias)) {
590                                # No, but this is a natural locus tag.
591                                $stats->Add(aliasLocus => 1);
592                                $self->CreateIdentifier($normalized, B => 'LocusTag', $fid);
593                            } elsif ($normalized = AliasAnalysis::IsNatural(GENE => $alias)) {
594                                # No, but this is a natural gene name.
595                                $stats->Add(aliasGene => 1);
596                                $self->CreateIdentifier($normalized, B => 'GENE', $fid);
597                            } elsif ($alias =~ /^\d+$/) {
598                                # Here it's a naked number, which means it's a GI number
599                                # of some sort.
600                                $stats->Add(aliasGI => 1);
601                                $self->CreateIdentifier("gi|$alias", B => 'NCBI', $fid);
602                            } elsif ($alias =~ /^protein_id\|(.+)/) {
603                                # Here we have a REFSEQ protein ID. Right now we don't have a way to
604                                # handle that, because we don't know the feature's protein ID here.
605                                $stats->Add(aliasProtein => 1);
606                            } elsif ($alias =~ /[:|]/) {
607                                # Here it's an alias of an unknown type, so we skip it.
608                                $stats->Add(aliasUnknown => 1);
609                            } else {
610                                # Here it's a miscellaneous type.
611                                $stats->Add(aliasMisc => 1);
612                                $self->CreateIdentifier($alias, B => 'Miscellaneous', $fid);
613                            }
614                        }
615                    }
616                }
617          }          }
618      }      }
619  }  }
# Line 540  Line 672 
672      }      }
673  }  }
674    
675    
676    =head3 LoadAnnotations
677    
678        $loaderObject->LoadAnnotations($fileName);
679    
680    Read in the annotation history information and use it to create annotation records.
681    
682    =over 4
683    
684    =item fileName
685    
686    Name of the annotation history file. This file is formatted with four fields per
687    record. Each field is on a separate line, with a double slash (C<//>) used as the
688    line terminator. The fields, in order, are (0) the feature ID, (1) the timestamp
689    (formatted as an integer), (2) the user name, and (3) the annotation text.
690    
691    =back
692    
693    =cut
694    
695    sub LoadAnnotations {
696        # Get the parameters.
697        my ($self, $fileName) = @_;
698        # Get the timestamp hash.
699        my $timeHash = $self->{timestamps};
700        # Get the Sapling database.
701        my $sap = $self->{sap};
702        # Get the statistics object.
703        my $stats = $self->{stats};
704        # Open the input file.
705        my $ih = Tracer::Open(undef, "<$fileName");
706        # Loop through the input.
707        while (! eof $ih) {
708            # Read in the peg, timestamp, and user ID.
709            my ($fid, $timestamp, $user, $text) = ReadAnnotation($ih);
710            # Only proceed if the feature exists.
711            if (! exists $timeHash->{$fid}) {
712                $stats->Add(skippedAnnotation => 1);
713            } else {
714                # Change assignments by the master user to FIG assignments.
715                $text =~ s/Set master function/Set FIG function/s;
716                # Insure the time stamp is valid.
717                if ($timestamp =~ /^\d+$/) {
718                    # Here it's a number. We need to insure the one we use to form
719                    # the key is unique.
720                    my $keyStamp = $timestamp;
721                    while ($timeHash->{$fid}{$keyStamp}) {
722                        $keyStamp++;
723                        $stats->Add(skippedStamp => 1);
724                    }
725                    # Form the annotation ID.
726                    my $annotationID = SaplingDataLoader::ComputeAnnotationID($fid, $keyStamp);
727                    $timeHash->{$fid}{$keyStamp} = 1;
728                    # Generate the annotation.
729                    $sap->InsertObject('IsAnnotatedBy', from_link => $fid, to_link => $annotationID);
730                    $sap->InsertObject('Annotation', id => $annotationID, annotation_time => $timestamp,
731                                comment => $text, annotator => $user);
732                } else {
733                    # Here we have an invalid time stamp.
734                    Trace("Invalid time stamp \"$timestamp\" in annotations for $fid.") if T(1);
735                }
736            }
737        }
738    }
739    
740    
741  =head3 WriteProtein  =head3 WriteProtein
742    
743      $loaderObject->WriteProtein($fid, $sequence);      $loaderObject->WriteProtein($fid, $sequence);
# Line 565  Line 763 
763      # Get the parameters.      # Get the parameters.
764      my ($self, $fid, $sequence) = @_;      my ($self, $fid, $sequence) = @_;
765      # Compute the key of the protein sequence.      # Compute the key of the protein sequence.
766      my $protID = ERDB::DigestKey($sequence);      my $protID = $self->{sap}->ProteinID($sequence);
767      # Insure the protein exists.      # Insure the protein exists.
768      $self->InsureEntity(ProteinSequence => $protID, sequence => $sequence);      $self->InsureEntity(ProteinSequence => $protID, sequence => $sequence);
769      # Connect the feature to it.      # Connect the feature to it.
770      $self->{sap}->InsertObject('IsProteinFor', from_link => $protID, to_link => $fid);      $self->{sap}->InsertObject('IsProteinFor', from_link => $protID, to_link => $fid);
771  }  }
772    
 =head3 InsureEntity  
   
     my $createdFlag = $loaderObject->InsureEntity($entityType => $id, %fields);  
   
 Insure that the specified record exists in the database. If no record is found of the  
 specified type with the specified ID, one will be created with the indicated fields.  
   
 =over 4  
   
 =item $entityType  
   
 Type of entity to check.  
   
 =item id  
   
 ID of the entity instance in question.  
   
 =item fields  
   
 Hash mapping field names to values for all the fields in the desired entity record except  
 for the ID.  
   
 =item RETURN  
   
 Returns TRUE if a new object was created, FALSE if it already existed.  
   
 =back  
   
 =cut  
   
 sub InsureEntity {  
     # Get the parameters.  
     my ($self, $entityType, $id, %fields) = @_;  
     # Get the database.  
     my $sap = $self->{sap};  
     # Get the support record ID hash.  
     my $supportHash = $self->{supportRecords};  
     # Denote we haven't created a new record.  
     my $retVal = 0;  
     # Get the sub-hash for this entity type.  
     my $entityHash = $supportHash->{$entityType};  
     if (! defined $entityHash) {  
         $entityHash = {};  
         $supportHash->{$entityType} = $entityHash;  
     }  
     # Check for this instance.  
     if (! $entityHash->{$id}) {  
         # It's not found. Check the database.  
         if (! $sap->Exists($entityType => $id)) {  
             # It's not in the database either, so create it.  
             $sap->InsertObject($entityType, id => $id, %fields);  
             $self->{stats}->Add(insertSupport => 1);  
             $retVal = 1;  
         }  
         # Mark the record in the hash so we know we have it.  
         $entityHash->{$id} = 1;  
     }  
     # Return the insertion indicator.  
     return $retVal;  
 }  
   
773  =head3 LoadSubsystems  =head3 LoadSubsystems
774    
775      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems();
# Line 688  Line 825 
825              if (@$roleList > 0) {              if (@$roleList > 0) {
826                  # Get the subsystem information from the first role and create the subsystem.                  # Get the subsystem information from the first role and create the subsystem.
827                  my $roleH = $roleList->[0];                  my $roleH = $roleList->[0];
828                  my %subFields = ExtractFields(Subsystem => $roleH);                  my %subFields = SaplingDataLoader::ExtractFields(Subsystem => $roleH);
829                  $sap->InsertObject('Subsystem', %subFields);                  $sap->InsertObject('Subsystem', %subFields);
830                  # Now loop through the roles. The Includes records are always inserted, but the                  # Now loop through the roles. The Includes records are always inserted, but the
831                  # roles are only inserted if they don't already exist.                  # roles are only inserted if they don't already exist.
832                  for $roleH (@$roleList) {                  for $roleH (@$roleList) {
833                      # Create the Includes record.                      # Create the Includes record.
834                      my %incFields = ExtractFields(Includes => $roleH);                      my %incFields = SaplingDataLoader::ExtractFields(Includes => $roleH);
835                      $sap->InsertObject('Includes', %incFields);                      $sap->InsertObject('Includes', %incFields);
836                      # Insure we have the role in place.                      # Insure we have the role in place.
837                      my %roleFields = ExtractFields(Role => $roleH);                      my %roleFields = SaplingDataLoader::ExtractFields(Role => $roleH);
838                      my $roleID = $roleFields{id};                      my $roleID = $roleFields{id};
839                      delete $roleFields{id};                      delete $roleFields{id};
840                      $self->InsureEntity('Role', $roleID, %roleFields);                      $self->InsureEntity('Role', $roleID, %roleFields);
# Line 935  Line 1072 
1072      $self->{stats}->Add(segment => 1);      $self->{stats}->Add(segment => 1);
1073  }  }
1074    
1075  =head2 Internal Utility Methods  =head3 CreateIdentifier
   
 =head3 DeleteRelatedRecords  
1076    
1077      DeleteRelatedRecords($sap, $genome, $stats, $relName, $entityName);      $loaderObject->CreateIdentifier($alias, $conf, $aliasType, $fid);
1078    
1079  Delete all the records in the named entity and relationship relating to the  Link an identifier to a feature. The identifier is presented in prefixed form and is of the
1080  specified genome and roll up the statistics in the specified statistics object.  specified type and the specified confidence level.
1081    
1082  =over 4  =over 4
1083    
1084  =item sap  =item alias
1085    
1086  L<Sapling> object for accessing the database.  Identifier to connect to the feature.
1087    
1088  =item genome  =item conf
1089    
1090  ID of the relevant genome.  Confidence level (C<A> curated, C<B> normal, C<C> protein only).
1091    
1092  =item stats  =item aliasType
1093    
1094  L<Stats> object for tracking the delete activity.  Type of alias (e.g. C<NCBI>, C<LocusTag>).
1095    
1096  =item relName  =item fid
   
 Name of a relationship from the B<Genome> table.  
   
 =item entityName  
1097    
1098  Name of the entity on the other side of the relationship.  ID of the relevant feature.
1099    
1100  =back  =back
1101    
1102  =cut  =cut
1103    
1104  sub DeleteRelatedRecords {  sub CreateIdentifier {
1105      # Get the parameters.      # Get the parameters.
1106      my ($sap, $genome, $stats, $relName, $entityName) = @_;      my ($self, $alias, $conf, $aliasType, $fid) = @_;
1107      # Get all the relationship records.      # Get the Sapling object.
1108      my (@targets) = $sap->GetFlat($relName, "$relName(from-link) = ?", [$genome],      my $sap = $self->{sap};
1109                                    "to-link");      # Compute the identifier's natural form.
1110      # Loop through the relationship records, deleting them and the target entity      my $natural = $alias;
1111      # records.      if ($natural =~ /[:|](.+)/) {
1112      for my $target (@targets) {          $natural = $1;
1113          # Delete the relationship instance.      }
1114          $sap->DeleteRow($relName, $genome, $target);      # Insure the identifier exists in the database.
1115          $stats->Add($relName => 1);      $self->InsureEntity(Identifier => $alias, source => $aliasType, natural_form => $natural);
1116          # Delete the entity instance.      # Connect the identifier to the feature.
1117          my $subStats = $sap->Delete($entityName, $target);      $sap->InsertObject('IsIdentifiedBy', to_link => $alias, from_link => $fid, conf => $conf);
         # Roll up the statistics.  
         $stats->Accumulate($subStats);  
     }  
1118  }  }
1119    
1120  =head3 ExtractFields  =head2 Internal Utility Methods
   
     my %fieldHash = SaplingGenomeLoader::ExtractFields($tableName, $dataHash);  
1121    
1122  Extract from the incoming hash the field names and values from the specified table.  =head3 ReadAnnotation
1123    
1124  =over 4      my ($fid, $timestamp, $user, $text) = SaplingGenomeLoader::ReadAnnotation($ih);
1125    
1126  =item tableName  Read the next record from an annotation file. The next record must exist (that is, an
1127    end-of-file check should have been performed before calling this method).
1128    
1129  Name of the table whose field names and values are desired.  =over 4
1130    
1131  =item dataHash  =item ih
1132    
1133  Reference to a hash mapping fully-qualified ERDB field names to values.  Open file handle for the annotation file.
1134    
1135  =item RETURN  =item RETURN
1136    
1137  Returns a hash containing only the fields from the specified table and their values.  Returns a list containing the four fields of the record read-- (0) the feature ID, (1) the
1138    timestamp, (2) the user ID, and (3) the annotation text.
1139    
1140  =back  =back
1141    
1142  =cut  =cut
1143    
1144  sub ExtractFields {  sub ReadAnnotation {
1145      # Get the parameters.      # Get the parameter.
1146      my ($tableName, $dataHash) = @_;      my ($ih) = @_;
1147      # Declare the return variable.      # Read the three fixed fields.
1148      my %retVal;      my $fid = <$ih>; chomp $fid;
1149      # Extract the desired fields.      my $timestamp = <$ih>; chomp $timestamp;
1150      for my $field (keys %$dataHash) {      my $user = <$ih>; chomp $user;
1151          # Is this a field for the specified table?      # Loop through the lines of the text field.
1152          if ($field =~ /^$tableName\(([^)]+)/) {      my $text = "";
1153              # Yes, put it in the output hash.      my $line = <$ih>;
1154              $retVal{$1} = $dataHash->{$field};      while ($line ne "//\n") {
1155          }          $text .= $line;
1156      }          $line = <$ih>;
1157      # Return the computed hash.      }
1158      return %retVal;      # Remove the trailing new-line from the text.
1159        chomp $text;
1160        # Return the fields.
1161        return ($fid, $timestamp, $user, $text);
1162  }  }
1163    
1164  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3