[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Sat Feb 26 19:05:32 2011 UTC revision 1.9, Thu Apr 21 19:58:18 2011 UTC
# Line 72  Line 72 
72      # Load the features.      # Load the features.
73      Trace("Loading features for $genome.") if T(2);      Trace("Loading features for $genome.") if T(2);
74      $loaderObject->LoadFeatures();      $loaderObject->LoadFeatures();
75        # Check for annotation history. If we have it, load the history records into the
76        # database.
77        if (-f "$directory/annotations") {
78            Trace("Processing annotations.") if T(3);
79            $loaderObject->LoadAnnotations("$directory/annotations");
80        }
81      # Load the subsystem bindings.      # Load the subsystem bindings.
82      Trace("Loading subsystems for $genome.") if T(2);      Trace("Loading subsystems for $genome.") if T(2);
83      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems();
# Line 112  Line 118 
118      my ($sap, $genome) = @_;      my ($sap, $genome) = @_;
119      # Create the statistics object.      # Create the statistics object.
120      my $stats = Stats->new();      my $stats = Stats->new();
121      # Delete the DNA.      # Delete the DNA sequences.
122      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'HasSection', 'DNASequence');      my @seqs = $sap->GetFlat('DNASequence', 'DNASequence(id) LIKE ?', ["$genome:%"], 'id');
123        for my $seq (@seqs) {
124            my $delStats = $sap->Delete(DNASequence => $seq);
125            $stats->Accumulate($delStats);
126        }
127      # Delete the contigs.      # Delete the contigs.
128      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsMadeUpOf', 'Contig');      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsMadeUpOf', 'Contig');
129      # Delete the features.      # Delete the features.
130      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsOwnerOf', 'Feature');      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsOwnerOf', 'Feature');
131      # Delete the molecular machines.      # Delete the molecular machines.
132      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'Uses', 'MolecularMachine');      SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'Uses', 'MolecularMachine');
133        # Delete the annotations.
134        SaplingDataLoader::DeleteRelatedRecords($sap, $genome, $stats, 'IsAnnotatedBy', 'Annotation');
135      # Delete the genome itself.      # Delete the genome itself.
136      my $subStats = $sap->Delete(Genome => $genome);      my $subStats = $sap->Delete(Genome => $genome);
137      # Accumulate the statistics from the delete.      # Accumulate the statistics from the delete.
# Line 128  Line 140 
140      return $stats;      return $stats;
141  }  }
142    
143    
144    =head3 Process
145    
146        my $stats = SaplingGenomeLoader::Process($sap, $genome, $directory);
147    
148    Load genome data from the specified directory. If the genome data already
149    exists in the database, it will be deleted first.
150    
151    =over 4
152    
153    =item sap
154    
155    L</Sapling> object for accessing the database.
156    
157    =item genome
158    
159    ID of the genome whose  data is being loaded.
160    
161    =item directory
162    
163    Name of the directory containing the genome data files.
164    
165    =item RETURN
166    
167    Returns a statistics object describing the activity during the reload.
168    
169    =back
170    
171    =cut
172    
173    sub Process {
174        # Get the parameters.
175        my ($sap, $genome, $directory) = @_;
176        # Clear the existing data for the specified genome.
177        my $stats = ClearGenome($sap, $genome);
178        # Load the new expression data from the specified directory.
179        my $newStats = Load($sap, $genome, $directory);
180        # Merge the statistics.
181        $stats->Accumulate($newStats);
182        # Return the result.
183        return $stats;
184    }
185    
186    
187  =head2 Loader Object Methods  =head2 Loader Object Methods
188    
189  =head3 new  =head3 new
# Line 363  Line 419 
419  sub LoadFeatures {  sub LoadFeatures {
420      # Get the parameters.      # Get the parameters.
421      my ($self) = @_;      my ($self) = @_;
422        # Read in the functional assignments.
423        Trace("Reading functional assignments.") if T(3);
424        my $assignHash = $self->ReadAssignments();
425      # Get the directory of feature types.      # Get the directory of feature types.
426      my $featureDir = "$self->{directory}/Features";      my $featureDir = "$self->{directory}/Features";
427      my @types = Tracer::OpenDir("$self->{directory}/Features", 1);      my @types = Tracer::OpenDir("$self->{directory}/Features", 1);
# Line 371  Line 430 
430          # Insure this is a genuine feature directory.          # Insure this is a genuine feature directory.
431          if (-f "$featureDir/$type/tbl") {          if (-f "$featureDir/$type/tbl") {
432              # Yes, load the feature data.              # Yes, load the feature data.
433              $self->LoadFeatureData($featureDir, $type);              $self->LoadFeatureData($featureDir, $type, $assignHash);
434          }          }
435      }      }
436      # Check for protein sequences. If we have some, load them into the database.      # Check for protein sequences. If we have some, load them into the database.
437      if (-f "$featureDir/peg/fasta") {      if (-f "$featureDir/peg/fasta") {
438            Trace("Processing protein sequences.") if T(3);
439          $self->LoadProteinData("$featureDir/peg/fasta");          $self->LoadProteinData("$featureDir/peg/fasta");
440      }      }
441      # Check for annotation history. If we have it, load the history records into the      # Now loop through the features, connecting them to their roles. Note that deleted
442      # database.      # features will not be in the assignment hash.
443      if (-f "$featureDir/annotations") {      Trace("Connecting features to roles.") if T(3);
444          $self->LoadAnnotations("$featureDir/annotations");      for my $fid (keys %$assignHash) {
445            $self->ConnectFunctionRoles($fid, $assignHash->{$fid});
446      }      }
447  }  }
448    
449  =head3 LoadFeatureData  =head3 LoadFeatureData
450    
451      $loaderObject->LoadFeatureData($featureDir, $type);      $loaderObject->LoadFeatureData($featureDir, $type, $assignHash);
452    
453  Load the basic data for each feature into the database. The number of features of  Load the basic data for each feature into the database. The number of features of
454  the type found will be recorded in the statistics object.  the type found will be recorded in the statistics object.
# Line 402  Line 463 
463    
464  Type of feature to load.  Type of feature to load.
465    
466    =item assignHash
467    
468    Reference to a hash mapping each feature ID to its functional assignment.
469    
470  =back  =back
471    
472  =cut  =cut
473    
474  sub LoadFeatureData {  sub LoadFeatureData {
475      # Get the parameters.      # Get the parameters.
476      my ($self, $featureDir, $type) = @_;      my ($self, $featureDir, $type, $assignHash) = @_;
477      # Get the sapling database.      # Get the sapling database.
478      my $sap = $self->{sap};      my $sap = $self->{sap};
479      # Get the maximum location  segment length. We'll need this later.      # Get the maximum location  segment length. We'll need this later.
480      my $maxLength = $sap->TuningParameter('maxLocationLength');      my $maxLength = $sap->TuningParameter('maxLocationLength');
481      # Get the statistics object.      # Get the statistics object.
482      my $stats = $self->{stats};      my $stats = $self->{stats};
     # Read in the functional assignments.  
     my $assignHash = $self->ReadAssignments();  
483      # This hash will track the features we've created. If a feature is found a second      # This hash will track the features we've created. If a feature is found a second
484      # time, it overwrites the original.      # time, it overwrites the original.
485      my $fidHash = $self->{timestamps};      my $fidHash = $self->{timestamps};
# Line 432  Line 495 
495              %deletedFids = map { $_ => 1 } Tracer::GetFile($deleteFile);              %deletedFids = map { $_ => 1 } Tracer::GetFile($deleteFile);
496          }          }
497          # Open the main file for input.          # Open the main file for input.
498            Trace("Reading features from $fileName.") if T(3);
499          my $ih = Open(undef, "<$fileName");          my $ih = Open(undef, "<$fileName");
500          while (! eof $ih) {          while (! eof $ih) {
501              # Read this feature's information.              # Read this feature's information.
# Line 552  Line 616 
616              }              }
617          }          }
618      }      }
     # Now loop through the features, connecting them to their roles. Note that deleted  
     # features will not be in the assignment hash.  
     for my $fid (keys %$assignHash) {  
         # Get the roles and the error count.  
         my ($roles, $errors) = SeedUtils::roles_for_loading($assignHash->{$fid});  
         # Accumulate the errors in the stats object.  
         $stats->Add(roleErrors => $errors);  
         # Is this a suspicious function?  
         if (! defined $roles) {  
             # Yes, so track it.  
             $stats->Add(badFunction => 1);  
         } else {  
             # No, connect the roles.  
             for my $role (@$roles) {  
                 # Insure this role exists.  
                 my $hypo = hypo($role);  
                 $self->InsureEntity(Role => $role, hypothetical => $hypo);  
                 # Connect it to the feature.  
                 $sap->InsertObject('IsFunctionalIn', from_link => $role, to_link => $fid);  
             }  
         }  
     }  
619  }  }
620    
621  =head3 LoadProteinData  =head3 LoadProteinData
# Line 681  Line 723 
723                      $stats->Add(skippedStamp => 1);                      $stats->Add(skippedStamp => 1);
724                  }                  }
725                  # Form the annotation ID.                  # Form the annotation ID.
726                  my $annotationID = "$fid:" . Tracer::Pad(9999999999 - $keyStamp, 10,                  my $annotationID = SaplingDataLoader::ComputeAnnotationID($fid, $keyStamp);
                                                          1, "0");  
727                  $timeHash->{$fid}{$keyStamp} = 1;                  $timeHash->{$fid}{$keyStamp} = 1;
728                  # Generate the annotation.                  # Generate the annotation.
729                  $sap->InsertObject('IsAnnotatedBy', from_link => $fid, to_link => $annotationID);                  $sap->InsertObject('IsAnnotatedBy', from_link => $fid, to_link => $annotationID);
# Line 722  Line 763 
763      # Get the parameters.      # Get the parameters.
764      my ($self, $fid, $sequence) = @_;      my ($self, $fid, $sequence) = @_;
765      # Compute the key of the protein sequence.      # Compute the key of the protein sequence.
766      my $protID = ERDB::DigestKey($sequence);      my $protID = $self->{sap}->ProteinID($sequence);
767      # Insure the protein exists.      # Insure the protein exists.
768      $self->InsureEntity(ProteinSequence => $protID, sequence => $sequence);      $self->InsureEntity(ProteinSequence => $protID, sequence => $sequence);
769      # Connect the feature to it.      # Connect the feature to it.

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3