[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13, Fri Jul 22 19:20:11 2011 UTC revision 1.15, Mon Dec 5 22:05:15 2011 UTC
# Line 57  Line 57 
57    
58  Name of the directory containing the genome information.  Name of the directory containing the genome information.
59    
60    =item disconnected
61    
62    True if the application is disconnected from the network - do not
63    attempt to contact a SAP server for more data.
64    
65  =item assignHash  =item assignHash
66    
67  Hash of feature IDs to functional assignments. Deleted features are removed, which  Hash of feature IDs to functional assignments. Deleted features are removed, which
# Line 68  Line 73 
73    
74  sub Load {  sub Load {
75      # Get the parameters.      # Get the parameters.
76      my ($sap, $genome, $directory) = @_;      my ($sap, $genome, $directory, $disconnected) = @_;
77      # Create the loader object.      # Create the loader object.
78      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory);      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory, $disconnected);
79      # Load the contigs.      # Load the contigs.
80      Trace("Loading contigs for $genome.") if T(SaplingDataLoader => 2);      Trace("Loading contigs for $genome.") if T(SaplingDataLoader => 2);
81      $loaderObject->LoadContigs();      $loaderObject->LoadContigs();
# Line 191  Line 196 
196    
197  =head3 new  =head3 new
198    
199      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory);      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory, $disconnected);
200    
201  Create a loader object that can be used to facilitate loading genome data from a  Create a loader object that can be used to facilitate loading genome data from a
202  directory.  directory.
# Line 210  Line 215 
215    
216  Name of the directory containing the genome information.  Name of the directory containing the genome information.
217    
218    =item disconnected
219    
220    Set to a true value if the application should be considered to be disconnected
221    from the network - that is, do not attempt to connect to a Sapling server
222    to load subsystem data.
223    
224  =back  =back
225    
226  The object created contains the following fields.  The object created contains the following fields.
# Line 242  Line 253 
253    
254  sub new {  sub new {
255      # Get the parameters.      # Get the parameters.
256      my ($class, $sap, $genome, $directory) = @_;      my ($class, $sap, $genome, $directory, $disconnected) = @_;
257      # Create the object.      # Create the object.
258      my $retVal = SaplingDataLoader::new($class, $sap, qw(contigs dna pegs rnas));      my $retVal = SaplingDataLoader::new($class, $sap, qw(contigs dna pegs rnas));
259      # Add our specialized data.      # Add our specialized data.
# Line 250  Line 261 
261      $retVal->{directory} = $directory;      $retVal->{directory} = $directory;
262      # Leave the assignment hash undefined until we populate it.      # Leave the assignment hash undefined until we populate it.
263      $retVal->{assignHash} = undef;      $retVal->{assignHash} = undef;
264        $retVal->{disconnected} = defined($disconnected) ? 1 : 0;
265      # Return the result.      # Return the result.
266      return $retVal;      return $retVal;
267  }  }
# Line 259  Line 271 
271      $loaderObject->LoadContigs();      $loaderObject->LoadContigs();
272    
273  Load the contig information into the database. This includes the contigs themselves and  Load the contig information into the database. This includes the contigs themselves and
274  the DNA. The number of contigs will be recorded as the C<contigs> statistic and the  the DNA. The number of contigs will be recorded as the C<contigs> statistic, the
275  number of base pairs as the C<dna> statistic.  number of base pairs as the C<dna> statistic, and the number of GC instances as the
276    C<gc_content> statistic.
277    
278  =cut  =cut
279    
# Line 368  Line 381 
381      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);
382      # Record the chunk.      # Record the chunk.
383      $self->{stats}->Add(chunks => 1);      $self->{stats}->Add(chunks => 1);
384        # Update the GC count.
385        $self->{stats}->Add(gc_content => ($chunk =~ tr/GCgc//));
386  }  }
387    
388  =head3 OutputContig  =head3 OutputContig
# Line 574  Line 589 
589      # Finally, we need the timestamp hash. The initial feature population      # Finally, we need the timestamp hash. The initial feature population
590      # Insure we have a tbl file for this feature type.      # Insure we have a tbl file for this feature type.
591      my $fileName = "$featureDir/$type/tbl";      my $fileName = "$featureDir/$type/tbl";
592        my %deleted_features;
593      if (-f $fileName) {      if (-f $fileName) {
594          # We have one, so we can read through it. First, however, we need to get the list          # We have one, so we can read through it. First, however, we need to get the list
595          # of deleted features and remove them from the assignment hash. This insures          # of deleted features and remove them from the assignment hash. This insures
# Line 586  Line 602 
602                  if (exists $assignHash->{$deletedFid}) {                  if (exists $assignHash->{$deletedFid}) {
603                      delete $assignHash->{$deletedFid};                      delete $assignHash->{$deletedFid};
604                      $stats->Add(deletedFid => 1);                      $stats->Add(deletedFid => 1);
605                        $deleted_features{$deletedFid} = 1;
606                  }                  }
607              }              }
608          }          }
# Line 596  Line 613 
613              # Read this feature's information.              # Read this feature's information.
614              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);
615              # Only proceed if the feature is NOT deleted.              # Only proceed if the feature is NOT deleted.
616              if (exists $assignHash->{$fid}) {              if (!$deleted_features{$fid}) {
617                  # If the feature already exists, delete it. (This should be extremely rare.)                  # If the feature already exists, delete it. (This should be extremely rare.)
618                  if ($fidHash{$fid}) {                  if ($fidHash{$fid}) {
619                      $sap->Delete(Feature => $fid);                      $sap->Delete(Feature => $fid);
# Line 770  Line 787 
787  sub LoadSubsystems {  sub LoadSubsystems {
788      # Get the parameters.      # Get the parameters.
789      my ($self) = @_;      my ($self) = @_;
790    
791        #
792        # If we are running in disconnected mode, do not actually load subsystems.
793        # They rely too much on information from the external sapling.
794        #
795        if ($self->{disconnected})
796        {
797            return;
798        }
799    
800      # Get the sapling object.      # Get the sapling object.
801      my $sap = $self->{sap};      my $sap = $self->{sap};
802      # Get the statistics object.      # Get the statistics object.
# Line 958  Line 985 
985      $fields{'dna-size'} = $stats->Ask('dna');      $fields{'dna-size'} = $stats->Ask('dna');
986      $fields{pegs} = $stats->Ask('peg');      $fields{pegs} = $stats->Ask('peg');
987      $fields{rnas} = $stats->Ask('rna');      $fields{rnas} = $stats->Ask('rna');
988        $fields{gc_content} = $stats->Ask('gc_content') * 100 / $stats->Ask('dna');
989      # Get the genetic code. The default is 11.      # Get the genetic code. The default is 11.
990      $fields{'genetic-code'} = 11;      $fields{'genetic-code'} = 11;
991      my $geneticCodeFile = "$dir/GENETIC_CODE";      my $geneticCodeFile = "$dir/GENETIC_CODE";

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.15

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3