[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12, Tue Jul 12 18:41:53 2011 UTC revision 1.18, Wed May 8 20:28:44 2013 UTC
# Line 26  Line 26 
26      use SAPserver;      use SAPserver;
27      use Sapling;      use Sapling;
28      use AliasAnalysis;      use AliasAnalysis;
29        use BaseSaplingLoader;
30        use MD5Computer;
31      use base qw(SaplingDataLoader);      use base qw(SaplingDataLoader);
32    
33  =head1 Sapling Genome Loader  =head1 Sapling Genome Loader
# Line 57  Line 59 
59    
60  Name of the directory containing the genome information.  Name of the directory containing the genome information.
61    
 =item assignHash  
   
 Hash of feature IDs to functional assignments. Deleted features are removed, which  
 means only features listed in this hash can be legally inserted into the database.  
   
62  =back  =back
63    
64  =cut  =cut
# Line 139  Line 136 
136      my $subStats = $sap->Delete(Genome => $genome);      my $subStats = $sap->Delete(Genome => $genome);
137      # Accumulate the statistics from the delete.      # Accumulate the statistics from the delete.
138      $stats->Accumulate($subStats);      $stats->Accumulate($subStats);
139        Trace("Statistics for delete of $genome:\n" . $stats->Show()) if T(SaplingDataLoader => 3);
140      # Return the statistics object.      # Return the statistics object.
141      return $stats;      return $stats;
142  }  }
# Line 163  Line 161 
161    
162  =item directory  =item directory
163    
164  Name of the directory containing the genome data files.  Name of the directory containing the genome data files. If omitted, the
165    genome will be deleted from the database.
166    
167  =item RETURN  =item RETURN
168    
# Line 178  Line 177 
177      my ($sap, $genome, $directory) = @_;      my ($sap, $genome, $directory) = @_;
178      # Clear the existing data for the specified genome.      # Clear the existing data for the specified genome.
179      my $stats = ClearGenome($sap, $genome);      my $stats = ClearGenome($sap, $genome);
180      # Load the new expression data from the specified directory.      # Load the new expression data from the specified directory (if one is
181        # specified).
182        if ($directory) {
183      my $newStats = Load($sap, $genome, $directory);      my $newStats = Load($sap, $genome, $directory);
184      # Merge the statistics.      # Merge the statistics.
185      $stats->Accumulate($newStats);      $stats->Accumulate($newStats);
186        }
187      # Return the result.      # Return the result.
188      return $stats;      return $stats;
189  }  }
# Line 250  Line 252 
252      $retVal->{directory} = $directory;      $retVal->{directory} = $directory;
253      # Leave the assignment hash undefined until we populate it.      # Leave the assignment hash undefined until we populate it.
254      $retVal->{assignHash} = undef;      $retVal->{assignHash} = undef;
255        # Create an MD5 Computer for this genome.
256        $retVal->{md5} = MD5Computer->new();
257      # Return the result.      # Return the result.
258      return $retVal;      return $retVal;
259  }  }
# Line 259  Line 263 
263      $loaderObject->LoadContigs();      $loaderObject->LoadContigs();
264    
265  Load the contig information into the database. This includes the contigs themselves and  Load the contig information into the database. This includes the contigs themselves and
266  the DNA. The number of contigs will be recorded as the C<contigs> statistic and the  the DNA. The number of contigs will be recorded as the C<contigs> statistic, the
267  number of base pairs as the C<dna> statistic.  number of base pairs as the C<dna> statistic, and the number of GC instances as the
268    C<gc_content> statistic.
269    
270  =cut  =cut
271    
# Line 274  Line 279 
279      # Get the genome ID.      # Get the genome ID.
280      my $genome = $self->{genome};      my $genome = $self->{genome};
281      # These variables will contain the current contig ID, the current contig length,      # These variables will contain the current contig ID, the current contig length,
282      # the ordinal of the current chunk, the accumulated DNA sequence, and its length.      # the accumulated DNA sequence for the current chunk, and its length.
283      my ($contigID, $contigLen, $ordinal, $chunk, $chunkLen) = (undef, 0, 0, '', 0);      my ($contigID, $contigLen, $chunk, $chunkLen) = (undef, 0, '', 0);
284        # This variable contains a list of the chunks, for use in computing the MD5.
285        my @chunks;
286      # Loop through the contig file.      # Loop through the contig file.
287      while (! eof $ih) {      while (! eof $ih) {
288          # Get the current record.          # Get the current record.
# Line 286  Line 293 
293              my $newContigID = $1;              my $newContigID = $1;
294              # Is there a current contig?              # Is there a current contig?
295              if (defined $contigID) {              if (defined $contigID) {
296                  # Yes. Output the current chunk.                  # Yes. Output the contig.
297                  $self->OutputChunk($contigID, $ordinal, $chunk);                  $self->OutputContig($contigID, $contigLen, $chunk, \@chunks);
                 # Output the contig.  
                 $self->OutputContig($contigID, $contigLen);  
298              }              }
299              # Compute the new contig ID. We need to insure it has a genome ID in front.              # Compute the new contig ID. We need to insure it has a genome ID in front.
300              $self->FixContig(\$newContigID);              $self->FixContig(\$newContigID);
301              # Initialize the new contig.              # Initialize the new contig.
302              $contigID = $newContigID;              $contigID = $newContigID;
303              $contigLen = 0;              $contigLen = 0;
             $ordinal = 0;  
304              $chunk = '';              $chunk = '';
305              $chunkLen = 0;              $chunkLen = 0;
306                @chunks = ();
307          } else {          } else {
308              # Here we have more DNA in the current contig. Are we at the end of              # Here we have more DNA in the current contig. Are we at the end of
309              # the current chunk?              # the current chunk?
# Line 312  Line 317 
317                  # Yes. Create the actual chunk.                  # Yes. Create the actual chunk.
318                  $chunk .= substr($line, 0, $segmentLength - $chunkLen);                  $chunk .= substr($line, 0, $segmentLength - $chunkLen);
319                  # Write it out.                  # Write it out.
320                  $self->OutputChunk($contigID, $ordinal, $chunk);                  $self->OutputChunk($contigID, scalar @chunks, $chunk);
321                  # Set up the new chunk.                  # Set up the new chunk.
322                  $chunk = substr($line, $segmentLength - $chunkLen);                  $chunk = substr($line, $segmentLength - $chunkLen);
                 $ordinal++;  
323                  $chunkLen = length($chunk);                  $chunkLen = length($chunk);
324                    push @chunks, $chunk;
325              }              }
326              # Update the contig length.              # Update the contig length.
327              $contigLen += $lineLen;              $contigLen += $lineLen;
# Line 324  Line 329 
329      }      }
330      # Is there a current contig?      # Is there a current contig?
331      if (defined $contigID) {      if (defined $contigID) {
332          # Yes. Output the residual chunk.          # Yes. Output the contig itself.
333          $self->OutputChunk($contigID, $ordinal, $chunk);          $self->OutputContig($contigID, $contigLen, $chunk, \@chunks);
         # Output the contig itself.  
         $self->OutputContig($contigID, $contigLen);  
334      }      }
335  }  }
336    
# Line 368  Line 371 
371      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);
372      # Record the chunk.      # Record the chunk.
373      $self->{stats}->Add(chunks => 1);      $self->{stats}->Add(chunks => 1);
374        # Update the GC count.
375        $self->{stats}->Add(gc_content => ($chunk =~ tr/GCgc//));
376  }  }
377    
378  =head3 OutputContig  =head3 OutputContig
379    
380      $loaderObject->OutputContig($contigID, $contigLen);      $loaderObject->OutputContig($contigID, $contigLen, $chunk, \@chunks);
381    
382  Write out the current contig.  Write out the current contig.
383    
# Line 386  Line 391 
391    
392  Length of the contig in base pairs.  Length of the contig in base pairs.
393    
394    =item chunk
395    
396    Last DNA chunk of the contig.
397    
398    =item chunks
399    
400    Reference to a list of the DNA chunks up to (but not including) the last one.
401    
402  =back  =back
403    
404  =cut  =cut
405    
406  sub OutputContig {  sub OutputContig {
407      # Get the parameters.      # Get the parameters.
408      my ($self, $contigID, $contigLen) = @_;      my ($self, $contigID, $contigLen, $chunk, $chunks) = @_;
409      # Get the sapling object.      # Get the sapling object.
410      my $sap = $self->{sap};      my $sap = $self->{sap};
411        # Get the MD5 computer.
412        my $md5C = $self->{md5};
413        # Output the last chunk.
414        $self->OutputChunk($contigID, scalar @$chunks, $chunk);
415      # Connect the contig to the genome.      # Connect the contig to the genome.
416      $sap->InsertObject('IsMadeUpOf', from_link => $self->{genome}, to_link => $contigID);      $sap->InsertObject('IsMadeUpOf', from_link => $self->{genome}, to_link => $contigID);
417        # Compute the MD5.
418        push @$chunks, $chunk;
419        my $contigMD5 = $md5C->ProcessContig($contigID, $chunks);
420      # Output the contig record.      # Output the contig record.
421      $sap->InsertObject('Contig', id => $contigID, length => $contigLen);      $sap->InsertObject('Contig', id => $contigID, length => $contigLen,
422                md5_identifier => $contigMD5);
423      # Record the contig.      # Record the contig.
424      $self->{stats}->Add(contigs => 1);      $self->{stats}->Add(contigs => 1);
425      $self->{stats}->Add(dna => $contigLen);      $self->{stats}->Add(dna => $contigLen);
# Line 571  Line 592 
592      # This hash will track the features we've created. If a feature is found a second      # This hash will track the features we've created. If a feature is found a second
593      # time, it overwrites the original.      # time, it overwrites the original.
594      my %fidHash;      my %fidHash;
595      # Finally, we need the timestamp hash. The initial feature population      # This hash tracks the deleted features. We don't want to update these.
596        my %deleted_features;
597      # Insure we have a tbl file for this feature type.      # Insure we have a tbl file for this feature type.
598      my $fileName = "$featureDir/$type/tbl";      my $fileName = "$featureDir/$type/tbl";
599      if (-f $fileName) {      if (-f $fileName) {
# Line 586  Line 608 
608                  if (exists $assignHash->{$deletedFid}) {                  if (exists $assignHash->{$deletedFid}) {
609                      delete $assignHash->{$deletedFid};                      delete $assignHash->{$deletedFid};
610                      $stats->Add(deletedFid => 1);                      $stats->Add(deletedFid => 1);
611                        $deleted_features{$deletedFid} = 1;
612                  }                  }
613              }              }
614          }          }
# Line 596  Line 619 
619              # Read this feature's information.              # Read this feature's information.
620              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);
621              # Only proceed if the feature is NOT deleted.              # Only proceed if the feature is NOT deleted.
622              if (exists $assignHash->{$fid}) {              if (!$deleted_features{$fid}) {
623                  # If the feature already exists, delete it. (This should be extremely rare.)                  # If the feature already exists, delete it. (This should be extremely rare.)
624                  if ($fidHash{$fid}) {                  if ($fidHash{$fid}) {
625                      $sap->Delete(Feature => $fid);                      $sap->Delete(Feature => $fid);
626                      $stats->Add(duplicateFid => 1);                      $stats->Add(duplicateFid => 1);
627                    } else {
628                        # It doesn't exist, so record it in the statistics.
629                        $stats->Add($type => 1);
630                  }                  }
631                  # If this is RNA, the alias list is always empty. Sometimes, the functional                  # If this is RNA, the alias list is always empty. Sometimes, the functional
632                  # assignment is found there.                  # assignment is found there.
# Line 611  Line 637 
637                      @aliases = ();                      @aliases = ();
638                  }                  }
639                  # Add the feature to the database.                  # Add the feature to the database.
640                  $self->AddFeature($fid, $assignHash->{$fid}, $locations, \@aliases,                  my $function = $assignHash->{$fid} || "";
641                    $self->AddFeature($fid, $function, $locations, \@aliases,
642                                    $protHash->{$fid}, $evHash->{$fid});                                    $protHash->{$fid}, $evHash->{$fid});
643                  # Denote we've added this feature, so that if a duplicate occurs we're ready.                  # Denote we've added this feature, so that if a duplicate occurs we're ready.
644                  $fidHash{$fid} = 1;                  $fidHash{$fid} = 1;
# Line 760  Line 787 
787    
788  =head3 LoadSubsystems  =head3 LoadSubsystems
789    
790      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems($subsysList);
791    
792  Load the subsystem data into the database. This includes all of the subsystems,  Load the subsystem data into the database. This requires looking through the
793  their categories, roles, and the bindings that connect them to this genome's features.  bindings and using them to connect to the subsystems that already exist in the
794    database. If the subsystem does not exist, its bindings will not be loaded.
795    
796    =over 4
797    
798    =item subsysList
799    
800    Reference to a list of subsystem IDs. If specified, only the subsystems in the
801    list will be processed. This is useful when a particular set of subsystems
802    has been replaced.
803    
804    =back
805    
806  =cut  =cut
807    
808  sub LoadSubsystems {  sub LoadSubsystems {
809      # Get the parameters.      # Get the parameters.
810      my ($self) = @_;      my ($self, $subsysList) = @_;
811      # Get the sapling object.      # Get the sapling object.
812      my $sap = $self->{sap};      my $sap = $self->{sap};
813      # Get the statistics object.      # Get the statistics object.
814      my $stats = $self->{stats};      my $stats = $self->{stats};
815      # Get the genome ID.      # Get the genome ID.
816      my $genome = $self->{genome};      my $genome = $self->{genome};
817      # Connect to the Sapling server so we can get the subsystem variant and role information.      # Compute the subsystem and binding file names.
818      my $sapO = SAPserver->new();      my $subFileName = "$self->{directory}/Subsystems/subsystems";
819        my $bindFileName = "$self->{directory}/Subsystems/bindings";
820        # Only proceed if both exist.
821        if (! -f $subFileName || ! -f $bindFileName) {
822            Trace("Missing subsystem data for $genome.") if T(1);
823            $stats->Add(noSubsystems => 1);
824        } else {
825      # This hash maps subsystem IDs to molecular machine IDs.      # This hash maps subsystem IDs to molecular machine IDs.
826      my %machines;      my %machines;
827      # This hash maps subsystem/role pairs to machine role IDs.      # This hash maps subsystem/role pairs to machine role IDs.
828      my %machineRoles;      my %machineRoles;
829      # The first step is to create the subsystems themselves and connect them to the          # This hash will contain the list of subsystems found in the database.
830      # genome. A list is given in the subsystems file of the Subsystems directory.          my %subsystems;
831      my $ih = Open(undef, "<$self->{directory}/Subsystems/subsystems");          # We loop through the subsystems, looking for the ones already in the
832      # Create the field hash for the subsystem query.          # database. The list is given in the subsystems file of the Subsystems
833      my @fields = qw(Subsystem(id) Subsystem(cluster-based) Subsystem(curator) Subsystem(description)          # directory.
834                      Subsystem(experimental) Subsystem(notes) Subsystem(private) Subsystem(usable)          my $ih = Open(undef, "<$subFileName");
                     Subsystem(version)  
                     Includes(from-link) Includes(to-link) Includes(abbreviation) Includes(auxiliary)  
                     Includes(sequence)  
                     Role(id) Role(hypothetical) Role(role-index));  
     my %fields = map { $_ => $_ } @fields;  
835      # Loop through the subsystems in the file, insuring we have them in the database.      # Loop through the subsystems in the file, insuring we have them in the database.
836      while (! eof $ih) {      while (! eof $ih) {
837          # Get this subsystem.          # Get this subsystem.
# Line 800  Line 839 
839          Trace("Processing subsystem $subsystem variant $variant.") if T(SaplingDataLoader => 3);          Trace("Processing subsystem $subsystem variant $variant.") if T(SaplingDataLoader => 3);
840          # Normalize the subsystem name.          # Normalize the subsystem name.
841          $subsystem = $sap->SubsystemID($subsystem);          $subsystem = $sap->SubsystemID($subsystem);
842                # Insure the subsystem is in the database and is one we're interested in.
843                if ((! $subsysList || (grep { $_ eq $subsystem } @$subsysList)) &&
844                        $sap->Exists(Subsystem => $subsystem)) {
845                    # We have the subsystem in the database. We need to compute the machine
846                    # role IDs and create the molecular machine. First, we need to remember
847                    # the subsystem.
848                    $subsystems{$subsystem} = 1;
849          # Compute this subsystem's MD5.          # Compute this subsystem's MD5.
850          my $subsystemMD5 = ERDB::DigestKey($subsystem);          my $subsystemMD5 = ERDB::DigestKey($subsystem);
851          # Insure the subsystem is in the database.                  my $rolePrefix = "$subsystemMD5:$genome";
         if (! $sap->Exists(Subsystem => $subsystem)) {  
             # The subsystem isn't present, so we need to read it. We get the subsystem,  
             # its roles, and its classification information. The variant will be added  
             # later if we need it.  
             my $roleList = $sapO->get(-objects => "Subsystem Includes Role",  
                                       -filter => {"Subsystem(id)" => ["=", $subsystem] },  
                                       -fields => \%fields);  
             # Only proceed if we found some roles.  
             if (@$roleList > 0) {  
                 # Get the subsystem information from the first role and create the subsystem.  
                 my $roleH = $roleList->[0];  
                 my %subFields = SaplingDataLoader::ExtractFields(Subsystem => $roleH);  
                 $sap->InsertObject('Subsystem', %subFields);  
                 $stats->Add(subsystems => 1);  
                 # Now loop through the roles. The Includes records are always inserted, but the  
                 # roles are only inserted if they don't already exist.  
                 for $roleH (@$roleList) {  
                     # Create the Includes record.  
                     my %incFields = SaplingDataLoader::ExtractFields(Includes => $roleH);  
                     $sap->InsertObject('Includes', %incFields);  
                     # Insure we have the role in place.  
                     my %roleFields = SaplingDataLoader::ExtractFields(Role => $roleH);  
                     my $roleID = $roleFields{id};  
                     delete $roleFields{id};  
                     $self->InsureEntity('Role', $roleID, %roleFields);  
                     # Compute the machine-role ID.  
                     my $machineRoleID = join(":", $subsystemMD5, $genome, $incFields{abbreviation});  
                     $machineRoles{$subsystem}{$roleID} = $machineRoleID;  
                     $stats->Add(subsystemRoles => 1);  
                 }  
             }  
         } else {  
             # We already have the subsystem in the database, but we still need to compute the  
             # machine role IDs. We need information from the sapling server for this.  
             my $subHash = $sapO->subsystem_roles(-ids => $subsystem, -aux => 1, -abbr => 1);  
852              # Loop through the roles.              # Loop through the roles.
853              my $roleList = $subHash->{$subsystem};                  my @roleList = $sap->GetAll('Includes', 'Includes(from-link) = ?',
854              for my $roleTuple (@$roleList) {                          [$subsystem], "to-link abbreviation");
855                    for my $roleTuple (@roleList) {
856                  my ($roleID, $abbr) = @$roleTuple;                  my ($roleID, $abbr) = @$roleTuple;
857                  my $machineRoleID = join(":", $subsystemMD5, $genome, $abbr);                      my $machineRoleID = $rolePrefix . '::' . $abbr;
858                  $machineRoles{$subsystem}{$roleID} = $machineRoleID;                  $machineRoles{$subsystem}{$roleID} = $machineRoleID;
859              }              }
860          }                  # Next we need the variant code and key.
861          # Now we need to connect the variant to the subsystem. First, we compute the real                  my $variantCode = BaseSaplingLoader::Starless($variant);
         # variant code by removing the asterisks.  
         my $variantCode = $variant;  
         $variantCode =~ s/^\*+//;  
         $variantCode =~ s/\*+$//;  
         # Compute the variant key.  
862          my $variantKey = ERDB::DigestKey("$subsystem:$variantCode");          my $variantKey = ERDB::DigestKey("$subsystem:$variantCode");
863          # Insure we have it in the database.                  # Now we create the molecular machine connecting this genome to the
864          if (! $sap->Exists(Variant => $variantKey)) {                  # subsystem variant.
             # Get the variant from the sapling server.  
             my $variantH = $sapO->get(-objects => "Variant",  
                                       -filter => {"Variant(id)" => ["=", $variantKey]},  
                                       -fields => {"Variant(code)" => "code",  
                                                   "Variant(comment)" => "comment",  
                                                   "Variant(type)" => "type",  
                                                   "Variant(role-rule)" => "role-rule"},  
                                       -firstOnly => 1);  
             $self->InsureEntity('Variant', $variantKey, %$variantH);  
             # Connect it to the subsystem.  
             $sap->InsertObject('Describes', from_link => $subsystem, to_link => $variantKey);  
             $stats->Add(subsystemVariants => 1);  
         }  
         # Now we create the molecular machine connecting this genome to the subsystem  
         # variant.  
865          my $machineID = ERDB::DigestKey("$subsystem:$variantCode:$genome");          my $machineID = ERDB::DigestKey("$subsystem:$variantCode:$genome");
866          $sap->InsertObject('Uses', from_link => $genome, to_link => $machineID);          $sap->InsertObject('Uses', from_link => $genome, to_link => $machineID);
867          $sap->InsertObject('MolecularMachine', id => $machineID, curated => 0, region => '');          $sap->InsertObject('MolecularMachine', id => $machineID, curated => 0, region => '');
# Line 877  Line 869 
869          # Remember the machine ID.          # Remember the machine ID.
870          $machines{$subsystem} = $machineID;          $machines{$subsystem} = $machineID;
871      }      }
872      # Now we go through the bindings file. This file connects the subsystem roles to          }
873      # the molecular machines.          # Now we go through the bindings file. This file connects the subsystem
874      $ih = Open(undef, "<$self->{directory}/Subsystems/bindings");          # roles to the molecular machines.
875            $ih = Open(undef, "<$bindFileName");
876      # Loop through the bindings.      # Loop through the bindings.
877      while (! eof $ih) {      while (! eof $ih) {
878          # Get the binding data.          # Get the binding data.
879          my ($subsystem, $role, $fid) = Tracer::GetLine($ih);          my ($subsystem, $role, $fid) = Tracer::GetLine($ih);
880          # Normalize the subsystem name.          # Normalize the subsystem name.
881          $subsystem = $sap->SubsystemID($subsystem);          $subsystem = $sap->SubsystemID($subsystem);
882                # Insure the subsystem is in the database.
883                if ($subsystems{$subsystem}) {
884          # Compute the machine role.          # Compute the machine role.
885          my $machineRoleID = $machineRoles{$subsystem}{$role};          my $machineRoleID = $machineRoles{$subsystem}{$role};
886          # Insure it exists.          # Insure it exists.
# Line 901  Line 896 
896          $sap->InsertObject('Contains', from_link => $machineRoleID, to_link => $fid);          $sap->InsertObject('Contains', from_link => $machineRoleID, to_link => $fid);
897      }      }
898  }  }
899        }
900    }
901    
902  =head3 CreateGenome  =head3 CreateGenome
903    
# Line 923  Line 920 
920      my $dir = $self->{directory};      my $dir = $self->{directory};
921      # Get the sapling database.      # Get the sapling database.
922      my $sap = $self->{sap};      my $sap = $self->{sap};
923        # Get the MD5 computer.
924        my $md5C = $self->{md5};
925      # We'll put the genome attributes in here.      # We'll put the genome attributes in here.
926      my %fields;      my %fields;
927      # Check for a basic statistics file.      # Check for a basic statistics file.
# Line 950  Line 949 
949          $fields{'scientific-name'} = $line;          $fields{'scientific-name'} = $line;
950          # Get the taxonomy and extract the domain from it.          # Get the taxonomy and extract the domain from it.
951          $ih = Open(undef, "<$dir/TAXONOMY");          $ih = Open(undef, "<$dir/TAXONOMY");
952          ($fields{domain}) = split /;/, <$ih>, 2;          ($fields{domain}) = split m/;/, <$ih>, 2;
953      }      }
954      # Get the counts from the statistics object.      # Get the counts from the statistics object.
955      my $stats = $self->{stats};      my $stats = $self->{stats};
# Line 958  Line 957 
957      $fields{'dna-size'} = $stats->Ask('dna');      $fields{'dna-size'} = $stats->Ask('dna');
958      $fields{pegs} = $stats->Ask('peg');      $fields{pegs} = $stats->Ask('peg');
959      $fields{rnas} = $stats->Ask('rna');      $fields{rnas} = $stats->Ask('rna');
960        $fields{gc_content} = $stats->Ask('gc_content') * 100 / $stats->Ask('dna');
961      # Get the genetic code. The default is 11.      # Get the genetic code. The default is 11.
962      $fields{'genetic-code'} = 11;      $fields{'genetic-code'} = 11;
963      my $geneticCodeFile = "$dir/GENETIC_CODE";      my $geneticCodeFile = "$dir/GENETIC_CODE";
# Line 968  Line 968 
968      }      }
969      # Use the domain to determine whether or not the genome is prokaryotic.      # Use the domain to determine whether or not the genome is prokaryotic.
970      $fields{prokaryotic} = PROK_FLAG->{$fields{domain}} || 0;      $fields{prokaryotic} = PROK_FLAG->{$fields{domain}} || 0;
971        # Compute the genome MD5.
972        $fields{md5_identifier} = $md5C->CloseGenome();
973      # Finally, add the genome ID.      # Finally, add the genome ID.
974      $fields{id} = $self->{genome};      $fields{id} = $self->{genome};
975      # Create the genome record.      # Create the genome record.
# Line 1099  Line 1101 
1101      # Loop through the lines of the text field.      # Loop through the lines of the text field.
1102      my $text = "";      my $text = "";
1103      my $line = <$ih>;      my $line = <$ih>;
1104      while ($line ne "//\n") {      while (defined($line) && $line ne "//\n") {
1105          $text .= $line;          $text .= $line;
1106          $line = <$ih>;          $line = <$ih>;
1107      }      }

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.18

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3