[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Sat Feb 26 19:05:32 2011 UTC revision 1.7, Wed Mar 23 18:42:34 2011 UTC
# Line 72  Line 72 
72      # Load the features.      # Load the features.
73      Trace("Loading features for $genome.") if T(2);      Trace("Loading features for $genome.") if T(2);
74      $loaderObject->LoadFeatures();      $loaderObject->LoadFeatures();
75        # Check for annotation history. If we have it, load the history records into the
76        # database.
77        if (-f "$directory/annotations") {
78            Trace("Processing annotations.") if T(3);
79            $loaderObject->LoadAnnotations("$directory/annotations");
80        }
81      # Load the subsystem bindings.      # Load the subsystem bindings.
82      Trace("Loading subsystems for $genome.") if T(2);      Trace("Loading subsystems for $genome.") if T(2);
83      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems();
# Line 128  Line 134 
134      return $stats;      return $stats;
135  }  }
136    
137    
138    =head3 Process
139    
140        my $stats = SaplingGenomeLoader::Process($sap, $genome, $directory);
141    
142    Load genome data from the specified directory. If the genome data already
143    exists in the database, it will be deleted first.
144    
145    =over 4
146    
147    =item sap
148    
149    L</Sapling> object for accessing the database.
150    
151    =item genome
152    
153    ID of the genome whose  data is being loaded.
154    
155    =item directory
156    
157    Name of the directory containing the genome data files.
158    
159    =item RETURN
160    
161    Returns a statistics object describing the activity during the reload.
162    
163    =back
164    
165    =cut
166    
167    sub Process {
168        # Get the parameters.
169        my ($sap, $genome, $directory) = @_;
170        # Clear the existing data for the specified genome.
171        my $stats = ClearGenome($sap, $genome);
172        # Load the new expression data from the specified directory.
173        my $newStats = Load($sap, $genome, $directory);
174        # Merge the statistics.
175        $stats->Accumulate($newStats);
176        # Return the result.
177        return $stats;
178    }
179    
180    
181  =head2 Loader Object Methods  =head2 Loader Object Methods
182    
183  =head3 new  =head3 new
# Line 363  Line 413 
413  sub LoadFeatures {  sub LoadFeatures {
414      # Get the parameters.      # Get the parameters.
415      my ($self) = @_;      my ($self) = @_;
416        # Read in the functional assignments.
417        Trace("Reading functional assignments.") if T(3);
418        my $assignHash = $self->ReadAssignments();
419      # Get the directory of feature types.      # Get the directory of feature types.
420      my $featureDir = "$self->{directory}/Features";      my $featureDir = "$self->{directory}/Features";
421      my @types = Tracer::OpenDir("$self->{directory}/Features", 1);      my @types = Tracer::OpenDir("$self->{directory}/Features", 1);
# Line 371  Line 424 
424          # Insure this is a genuine feature directory.          # Insure this is a genuine feature directory.
425          if (-f "$featureDir/$type/tbl") {          if (-f "$featureDir/$type/tbl") {
426              # Yes, load the feature data.              # Yes, load the feature data.
427              $self->LoadFeatureData($featureDir, $type);              $self->LoadFeatureData($featureDir, $type, $assignHash);
428          }          }
429      }      }
430      # Check for protein sequences. If we have some, load them into the database.      # Check for protein sequences. If we have some, load them into the database.
431      if (-f "$featureDir/peg/fasta") {      if (-f "$featureDir/peg/fasta") {
432            Trace("Processing protein sequences.") if T(3);
433          $self->LoadProteinData("$featureDir/peg/fasta");          $self->LoadProteinData("$featureDir/peg/fasta");
434      }      }
435      # Check for annotation history. If we have it, load the history records into the      # Now loop through the features, connecting them to their roles. Note that deleted
436      # database.      # features will not be in the assignment hash.
437      if (-f "$featureDir/annotations") {      Trace("Connecting features to roles.") if T(3);
438          $self->LoadAnnotations("$featureDir/annotations");      for my $fid (keys %$assignHash) {
439            $self->ConnectFunctionRoles($fid, $assignHash->{$fid});
440      }      }
441  }  }
442    
443  =head3 LoadFeatureData  =head3 LoadFeatureData
444    
445      $loaderObject->LoadFeatureData($featureDir, $type);      $loaderObject->LoadFeatureData($featureDir, $type, $assignHash);
446    
447  Load the basic data for each feature into the database. The number of features of  Load the basic data for each feature into the database. The number of features of
448  the type found will be recorded in the statistics object.  the type found will be recorded in the statistics object.
# Line 402  Line 457 
457    
458  Type of feature to load.  Type of feature to load.
459    
460    =item assignHash
461    
462    Reference to a hash mapping each feature ID to its functional assignment.
463    
464  =back  =back
465    
466  =cut  =cut
467    
468  sub LoadFeatureData {  sub LoadFeatureData {
469      # Get the parameters.      # Get the parameters.
470      my ($self, $featureDir, $type) = @_;      my ($self, $featureDir, $type, $assignHash) = @_;
471      # Get the sapling database.      # Get the sapling database.
472      my $sap = $self->{sap};      my $sap = $self->{sap};
473      # Get the maximum location  segment length. We'll need this later.      # Get the maximum location  segment length. We'll need this later.
474      my $maxLength = $sap->TuningParameter('maxLocationLength');      my $maxLength = $sap->TuningParameter('maxLocationLength');
475      # Get the statistics object.      # Get the statistics object.
476      my $stats = $self->{stats};      my $stats = $self->{stats};
     # Read in the functional assignments.  
     my $assignHash = $self->ReadAssignments();  
477      # This hash will track the features we've created. If a feature is found a second      # This hash will track the features we've created. If a feature is found a second
478      # time, it overwrites the original.      # time, it overwrites the original.
479      my $fidHash = $self->{timestamps};      my $fidHash = $self->{timestamps};
# Line 432  Line 489 
489              %deletedFids = map { $_ => 1 } Tracer::GetFile($deleteFile);              %deletedFids = map { $_ => 1 } Tracer::GetFile($deleteFile);
490          }          }
491          # Open the main file for input.          # Open the main file for input.
492            Trace("Reading features from $fileName.") if T(3);
493          my $ih = Open(undef, "<$fileName");          my $ih = Open(undef, "<$fileName");
494          while (! eof $ih) {          while (! eof $ih) {
495              # Read this feature's information.              # Read this feature's information.
# Line 552  Line 610 
610              }              }
611          }          }
612      }      }
     # Now loop through the features, connecting them to their roles. Note that deleted  
     # features will not be in the assignment hash.  
     for my $fid (keys %$assignHash) {  
         # Get the roles and the error count.  
         my ($roles, $errors) = SeedUtils::roles_for_loading($assignHash->{$fid});  
         # Accumulate the errors in the stats object.  
         $stats->Add(roleErrors => $errors);  
         # Is this a suspicious function?  
         if (! defined $roles) {  
             # Yes, so track it.  
             $stats->Add(badFunction => 1);  
         } else {  
             # No, connect the roles.  
             for my $role (@$roles) {  
                 # Insure this role exists.  
                 my $hypo = hypo($role);  
                 $self->InsureEntity(Role => $role, hypothetical => $hypo);  
                 # Connect it to the feature.  
                 $sap->InsertObject('IsFunctionalIn', from_link => $role, to_link => $fid);  
             }  
         }  
     }  
613  }  }
614    
615  =head3 LoadProteinData  =head3 LoadProteinData
# Line 681  Line 717 
717                      $stats->Add(skippedStamp => 1);                      $stats->Add(skippedStamp => 1);
718                  }                  }
719                  # Form the annotation ID.                  # Form the annotation ID.
720                  my $annotationID = "$fid:" . Tracer::Pad(9999999999 - $keyStamp, 10,                  my $annotationID = SaplingDataLoader::ComputeAnnotationID($fid, $keyStamp);
                                                          1, "0");  
721                  $timeHash->{$fid}{$keyStamp} = 1;                  $timeHash->{$fid}{$keyStamp} = 1;
722                  # Generate the annotation.                  # Generate the annotation.
723                  $sap->InsertObject('IsAnnotatedBy', from_link => $fid, to_link => $annotationID);                  $sap->InsertObject('IsAnnotatedBy', from_link => $fid, to_link => $annotationID);

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3