[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12, Tue Jul 12 18:41:53 2011 UTC revision 1.19, Thu Jul 11 19:34:38 2013 UTC
# Line 26  Line 26 
26      use SAPserver;      use SAPserver;
27      use Sapling;      use Sapling;
28      use AliasAnalysis;      use AliasAnalysis;
29        use BaseSaplingLoader;
30        use MD5Computer;
31      use base qw(SaplingDataLoader);      use base qw(SaplingDataLoader);
32    
33  =head1 Sapling Genome Loader  =head1 Sapling Genome Loader
# Line 57  Line 59 
59    
60  Name of the directory containing the genome information.  Name of the directory containing the genome information.
61    
 =item assignHash  
   
 Hash of feature IDs to functional assignments. Deleted features are removed, which  
 means only features listed in this hash can be legally inserted into the database.  
   
62  =back  =back
63    
64  =cut  =cut
# Line 139  Line 136 
136      my $subStats = $sap->Delete(Genome => $genome);      my $subStats = $sap->Delete(Genome => $genome);
137      # Accumulate the statistics from the delete.      # Accumulate the statistics from the delete.
138      $stats->Accumulate($subStats);      $stats->Accumulate($subStats);
139        Trace("Statistics for delete of $genome:\n" . $stats->Show()) if T(SaplingDataLoader => 3);
140      # Return the statistics object.      # Return the statistics object.
141      return $stats;      return $stats;
142  }  }
# Line 163  Line 161 
161    
162  =item directory  =item directory
163    
164  Name of the directory containing the genome data files.  Name of the directory containing the genome data files. If omitted, the
165    genome will be deleted from the database.
166    
167  =item RETURN  =item RETURN
168    
# Line 178  Line 177 
177      my ($sap, $genome, $directory) = @_;      my ($sap, $genome, $directory) = @_;
178      # Clear the existing data for the specified genome.      # Clear the existing data for the specified genome.
179      my $stats = ClearGenome($sap, $genome);      my $stats = ClearGenome($sap, $genome);
180      # Load the new expression data from the specified directory.      # Load the new expression data from the specified directory (if one is
181        # specified).
182        if ($directory) {
183      my $newStats = Load($sap, $genome, $directory);      my $newStats = Load($sap, $genome, $directory);
184      # Merge the statistics.      # Merge the statistics.
185      $stats->Accumulate($newStats);      $stats->Accumulate($newStats);
186        }
187      # Return the result.      # Return the result.
188      return $stats;      return $stats;
189  }  }
# Line 250  Line 252 
252      $retVal->{directory} = $directory;      $retVal->{directory} = $directory;
253      # Leave the assignment hash undefined until we populate it.      # Leave the assignment hash undefined until we populate it.
254      $retVal->{assignHash} = undef;      $retVal->{assignHash} = undef;
255        # Create an MD5 Computer for this genome.
256        $retVal->{md5} = MD5Computer->new();
257      # Return the result.      # Return the result.
258      return $retVal;      return $retVal;
259  }  }
# Line 259  Line 263 
263      $loaderObject->LoadContigs();      $loaderObject->LoadContigs();
264    
265  Load the contig information into the database. This includes the contigs themselves and  Load the contig information into the database. This includes the contigs themselves and
266  the DNA. The number of contigs will be recorded as the C<contigs> statistic and the  the DNA. The number of contigs will be recorded as the C<contigs> statistic, the
267  number of base pairs as the C<dna> statistic.  number of base pairs as the C<dna> statistic, and the number of GC instances as the
268    C<gc_content> statistic.
269    
270  =cut  =cut
271    
# Line 274  Line 279 
279      # Get the genome ID.      # Get the genome ID.
280      my $genome = $self->{genome};      my $genome = $self->{genome};
281      # These variables will contain the current contig ID, the current contig length,      # These variables will contain the current contig ID, the current contig length,
282      # the ordinal of the current chunk, the accumulated DNA sequence, and its length.      # the accumulated DNA sequence for the current chunk, and its length.
283      my ($contigID, $contigLen, $ordinal, $chunk, $chunkLen) = (undef, 0, 0, '', 0);      my ($contigID, $contigLen, $chunk, $chunkLen) = (undef, 0, '', 0);
284        # This variable contains a list of the chunks, for use in computing the MD5.
285        my @chunks;
286      # Loop through the contig file.      # Loop through the contig file.
287      while (! eof $ih) {      while (! eof $ih) {
288          # Get the current record.          # Get the current record.
# Line 286  Line 293 
293              my $newContigID = $1;              my $newContigID = $1;
294              # Is there a current contig?              # Is there a current contig?
295              if (defined $contigID) {              if (defined $contigID) {
296                  # Yes. Output the current chunk.                  # Yes. Output the contig.
297                  $self->OutputChunk($contigID, $ordinal, $chunk);                  $self->OutputContig($contigID, $contigLen, $chunk, \@chunks);
                 # Output the contig.  
                 $self->OutputContig($contigID, $contigLen);  
298              }              }
299              # Compute the new contig ID. We need to insure it has a genome ID in front.              # Compute the new contig ID. We need to insure it has a genome ID in front.
300              $self->FixContig(\$newContigID);              $self->FixContig(\$newContigID);
301              # Initialize the new contig.              # Initialize the new contig.
302              $contigID = $newContigID;              $contigID = $newContigID;
303              $contigLen = 0;              $contigLen = 0;
             $ordinal = 0;  
304              $chunk = '';              $chunk = '';
305              $chunkLen = 0;              $chunkLen = 0;
306                @chunks = ();
307          } else {          } else {
308              # Here we have more DNA in the current contig. Are we at the end of              # Here we have more DNA in the current contig. Are we at the end of
309              # the current chunk?              # the current chunk?
# Line 312  Line 317 
317                  # Yes. Create the actual chunk.                  # Yes. Create the actual chunk.
318                  $chunk .= substr($line, 0, $segmentLength - $chunkLen);                  $chunk .= substr($line, 0, $segmentLength - $chunkLen);
319                  # Write it out.                  # Write it out.
320                  $self->OutputChunk($contigID, $ordinal, $chunk);                  $self->OutputChunk($contigID, scalar @chunks, $chunk);
321                  # Set up the new chunk.                  # Set up the new chunk.
322                  $chunk = substr($line, $segmentLength - $chunkLen);                  $chunk = substr($line, $segmentLength - $chunkLen);
                 $ordinal++;  
323                  $chunkLen = length($chunk);                  $chunkLen = length($chunk);
324                    push @chunks, $chunk;
325              }              }
326              # Update the contig length.              # Update the contig length.
327              $contigLen += $lineLen;              $contigLen += $lineLen;
# Line 324  Line 329 
329      }      }
330      # Is there a current contig?      # Is there a current contig?
331      if (defined $contigID) {      if (defined $contigID) {
332          # Yes. Output the residual chunk.          # Yes. Output the contig itself.
333          $self->OutputChunk($contigID, $ordinal, $chunk);          $self->OutputContig($contigID, $contigLen, $chunk, \@chunks);
         # Output the contig itself.  
         $self->OutputContig($contigID, $contigLen);  
334      }      }
335  }  }
336    
# Line 368  Line 371 
371      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);      $sap->InsertObject('DNASequence', id => $chunkID, sequence => $chunk);
372      # Record the chunk.      # Record the chunk.
373      $self->{stats}->Add(chunks => 1);      $self->{stats}->Add(chunks => 1);
374        # Update the GC count.
375        $self->{stats}->Add(gc_content => ($chunk =~ tr/GCgc//));
376  }  }
377    
378  =head3 OutputContig  =head3 OutputContig
379    
380      $loaderObject->OutputContig($contigID, $contigLen);      $loaderObject->OutputContig($contigID, $contigLen, $chunk, \@chunks);
381    
382  Write out the current contig.  Write out the current contig.
383    
# Line 386  Line 391 
391    
392  Length of the contig in base pairs.  Length of the contig in base pairs.
393    
394    =item chunk
395    
396    Last DNA chunk of the contig.
397    
398    =item chunks
399    
400    Reference to a list of the DNA chunks up to (but not including) the last one.
401    
402  =back  =back
403    
404  =cut  =cut
405    
406  sub OutputContig {  sub OutputContig {
407      # Get the parameters.      # Get the parameters.
408      my ($self, $contigID, $contigLen) = @_;      my ($self, $contigID, $contigLen, $chunk, $chunks) = @_;
409      # Get the sapling object.      # Get the sapling object.
410      my $sap = $self->{sap};      my $sap = $self->{sap};
411        # Get the MD5 computer.
412        my $md5C = $self->{md5};
413        # Output the last chunk.
414        $self->OutputChunk($contigID, scalar @$chunks, $chunk);
415      # Connect the contig to the genome.      # Connect the contig to the genome.
416      $sap->InsertObject('IsMadeUpOf', from_link => $self->{genome}, to_link => $contigID);      $sap->InsertObject('IsMadeUpOf', from_link => $self->{genome}, to_link => $contigID);
417        # Compute the MD5.
418        push @$chunks, $chunk;
419        my $contigMD5 = $md5C->ProcessContig($contigID, $chunks);
420      # Output the contig record.      # Output the contig record.
421      $sap->InsertObject('Contig', id => $contigID, length => $contigLen);      $sap->InsertObject('Contig', id => $contigID, length => $contigLen,
422                md5_identifier => $contigMD5);
423      # Record the contig.      # Record the contig.
424      $self->{stats}->Add(contigs => 1);      $self->{stats}->Add(contigs => 1);
425      $self->{stats}->Add(dna => $contigLen);      $self->{stats}->Add(dna => $contigLen);
# Line 571  Line 592 
592      # This hash will track the features we've created. If a feature is found a second      # This hash will track the features we've created. If a feature is found a second
593      # time, it overwrites the original.      # time, it overwrites the original.
594      my %fidHash;      my %fidHash;
595      # Finally, we need the timestamp hash. The initial feature population      # This hash tracks the deleted features. We don't want to update these.
596        my %deleted_features;
597      # Insure we have a tbl file for this feature type.      # Insure we have a tbl file for this feature type.
598      my $fileName = "$featureDir/$type/tbl";      my $fileName = "$featureDir/$type/tbl";
599      if (-f $fileName) {      if (-f $fileName) {
# Line 586  Line 608 
608                  if (exists $assignHash->{$deletedFid}) {                  if (exists $assignHash->{$deletedFid}) {
609                      delete $assignHash->{$deletedFid};                      delete $assignHash->{$deletedFid};
610                      $stats->Add(deletedFid => 1);                      $stats->Add(deletedFid => 1);
611                        $deleted_features{$deletedFid} = 1;
612                  }                  }
613              }              }
614          }          }
# Line 596  Line 619 
619              # Read this feature's information.              # Read this feature's information.
620              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);              my ($fid, $locations, @aliases) = Tracer::GetLine($ih);
621              # Only proceed if the feature is NOT deleted.              # Only proceed if the feature is NOT deleted.
622              if (exists $assignHash->{$fid}) {              if (!$deleted_features{$fid}) {
623                  # If the feature already exists, delete it. (This should be extremely rare.)                  # If the feature already exists, delete it. (This should be extremely rare.)
624                  if ($fidHash{$fid}) {                  if ($fidHash{$fid}) {
625                      $sap->Delete(Feature => $fid);                      $sap->Delete(Feature => $fid);
626                      $stats->Add(duplicateFid => 1);                      $stats->Add(duplicateFid => 1);
627                    } else {
628                        # It doesn't exist, so record it in the statistics.
629                        $stats->Add($type => 1);
630                  }                  }
631                  # If this is RNA, the alias list is always empty. Sometimes, the functional                  # If this is RNA, the alias list is always empty. Sometimes, the functional
632                  # assignment is found there.                  # assignment is found there.
# Line 611  Line 637 
637                      @aliases = ();                      @aliases = ();
638                  }                  }
639                  # Add the feature to the database.                  # Add the feature to the database.
640                  $self->AddFeature($fid, $assignHash->{$fid}, $locations, \@aliases,                  my $function = $assignHash->{$fid} || "";
641                    $self->AddFeature($fid, $function, $locations, \@aliases,
642                                    $protHash->{$fid}, $evHash->{$fid});                                    $protHash->{$fid}, $evHash->{$fid});
643                  # Denote we've added this feature, so that if a duplicate occurs we're ready.                  # Denote we've added this feature, so that if a duplicate occurs we're ready.
644                  $fidHash{$fid} = 1;                  $fidHash{$fid} = 1;
# Line 760  Line 787 
787    
788  =head3 LoadSubsystems  =head3 LoadSubsystems
789    
790      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems($subsysList);
791    
792    Load the subsystem data into the database. This requires looking through the
793    bindings and using them to connect to the subsystems that already exist in the
794    database. If the subsystem does not exist, its bindings will not be loaded.
795    
796    =over 4
797    
798    =item subsysList
799    
800  Load the subsystem data into the database. This includes all of the subsystems,  Reference to a list of subsystem IDs. If specified, only the subsystems in the
801  their categories, roles, and the bindings that connect them to this genome's features.  list will be processed. This is useful when a particular set of subsystems
802    has been replaced.
803    
804    =back
805    
806  =cut  =cut
807    
808  sub LoadSubsystems {  sub LoadSubsystems {
809      # Get the parameters.      # Get the parameters.
810      my ($self) = @_;      my ($self, $subsysList) = @_;
811      # Get the sapling object.      # Get the sapling object.
812      my $sap = $self->{sap};      my $sap = $self->{sap};
813      # Get the statistics object.      # Get the statistics object.
814      my $stats = $self->{stats};      my $stats = $self->{stats};
815      # Get the genome ID.      # Get the genome ID.
816      my $genome = $self->{genome};      my $genome = $self->{genome};
817      # Connect to the Sapling server so we can get the subsystem variant and role information.      # Compute the subsystem and binding file names.
818      my $sapO = SAPserver->new();      my $subFileName = "$self->{directory}/Subsystems/subsystems";
819        my $bindFileName = "$self->{directory}/Subsystems/bindings";
820        # Get a hash of the molecular machines already connected to this genome.
821        my %machinesOld = map { $_ => 1 } $sap->GetFlat('Uses', 'Uses(from-link) = ?',
822                [$genome], 'to-link');
823        # Only proceed if both exist.
824        if (! -f $subFileName || ! -f $bindFileName) {
825            Trace("Missing subsystem data for $genome.") if T(1);
826            $stats->Add(noSubsystems => 1);
827        } else {
828      # This hash maps subsystem IDs to molecular machine IDs.      # This hash maps subsystem IDs to molecular machine IDs.
829      my %machines;      my %machines;
830      # This hash maps subsystem/role pairs to machine role IDs.      # This hash maps subsystem/role pairs to machine role IDs.
831      my %machineRoles;      my %machineRoles;
832      # The first step is to create the subsystems themselves and connect them to the          # This hash will contain the list of subsystems found in the database.
833      # genome. A list is given in the subsystems file of the Subsystems directory.          my %subsystems;
834      my $ih = Open(undef, "<$self->{directory}/Subsystems/subsystems");          # We loop through the subsystems, looking for the ones already in the
835      # Create the field hash for the subsystem query.          # database. The list is given in the subsystems file of the Subsystems
836      my @fields = qw(Subsystem(id) Subsystem(cluster-based) Subsystem(curator) Subsystem(description)          # directory.
837                      Subsystem(experimental) Subsystem(notes) Subsystem(private) Subsystem(usable)          my $ih = Open(undef, "<$subFileName");
                     Subsystem(version)  
                     Includes(from-link) Includes(to-link) Includes(abbreviation) Includes(auxiliary)  
                     Includes(sequence)  
                     Role(id) Role(hypothetical) Role(role-index));  
     my %fields = map { $_ => $_ } @fields;  
838      # Loop through the subsystems in the file, insuring we have them in the database.      # Loop through the subsystems in the file, insuring we have them in the database.
839      while (! eof $ih) {      while (! eof $ih) {
840          # Get this subsystem.          # Get this subsystem.
# Line 800  Line 842 
842          Trace("Processing subsystem $subsystem variant $variant.") if T(SaplingDataLoader => 3);          Trace("Processing subsystem $subsystem variant $variant.") if T(SaplingDataLoader => 3);
843          # Normalize the subsystem name.          # Normalize the subsystem name.
844          $subsystem = $sap->SubsystemID($subsystem);          $subsystem = $sap->SubsystemID($subsystem);
845                # Insure the subsystem is in the database and is one we're interested in.
846                if ((! $subsysList || (grep { $_ eq $subsystem } @$subsysList)) &&
847                        $sap->Exists(Subsystem => $subsystem)) {
848                    # We have the subsystem in the database. We need to compute the machine
849                    # role IDs and create the molecular machine. First, we need to remember
850                    # the subsystem.
851                    $subsystems{$subsystem} = 1;
852          # Compute this subsystem's MD5.          # Compute this subsystem's MD5.
853          my $subsystemMD5 = ERDB::DigestKey($subsystem);          my $subsystemMD5 = ERDB::DigestKey($subsystem);
854          # Insure the subsystem is in the database.                  my $rolePrefix = "$subsystemMD5:$genome";
         if (! $sap->Exists(Subsystem => $subsystem)) {  
             # The subsystem isn't present, so we need to read it. We get the subsystem,  
             # its roles, and its classification information. The variant will be added  
             # later if we need it.  
             my $roleList = $sapO->get(-objects => "Subsystem Includes Role",  
                                       -filter => {"Subsystem(id)" => ["=", $subsystem] },  
                                       -fields => \%fields);  
             # Only proceed if we found some roles.  
             if (@$roleList > 0) {  
                 # Get the subsystem information from the first role and create the subsystem.  
                 my $roleH = $roleList->[0];  
                 my %subFields = SaplingDataLoader::ExtractFields(Subsystem => $roleH);  
                 $sap->InsertObject('Subsystem', %subFields);  
                 $stats->Add(subsystems => 1);  
                 # Now loop through the roles. The Includes records are always inserted, but the  
                 # roles are only inserted if they don't already exist.  
                 for $roleH (@$roleList) {  
                     # Create the Includes record.  
                     my %incFields = SaplingDataLoader::ExtractFields(Includes => $roleH);  
                     $sap->InsertObject('Includes', %incFields);  
                     # Insure we have the role in place.  
                     my %roleFields = SaplingDataLoader::ExtractFields(Role => $roleH);  
                     my $roleID = $roleFields{id};  
                     delete $roleFields{id};  
                     $self->InsureEntity('Role', $roleID, %roleFields);  
                     # Compute the machine-role ID.  
                     my $machineRoleID = join(":", $subsystemMD5, $genome, $incFields{abbreviation});  
                     $machineRoles{$subsystem}{$roleID} = $machineRoleID;  
                     $stats->Add(subsystemRoles => 1);  
                 }  
             }  
         } else {  
             # We already have the subsystem in the database, but we still need to compute the  
             # machine role IDs. We need information from the sapling server for this.  
             my $subHash = $sapO->subsystem_roles(-ids => $subsystem, -aux => 1, -abbr => 1);  
855              # Loop through the roles.              # Loop through the roles.
856              my $roleList = $subHash->{$subsystem};                  my @roleList = $sap->GetAll('Includes', 'Includes(from-link) = ?',
857              for my $roleTuple (@$roleList) {                          [$subsystem], "to-link abbreviation");
858                    for my $roleTuple (@roleList) {
859                  my ($roleID, $abbr) = @$roleTuple;                  my ($roleID, $abbr) = @$roleTuple;
860                  my $machineRoleID = join(":", $subsystemMD5, $genome, $abbr);                      my $machineRoleID = $rolePrefix . '::' . $abbr;
861                  $machineRoles{$subsystem}{$roleID} = $machineRoleID;                  $machineRoles{$subsystem}{$roleID} = $machineRoleID;
862              }              }
863          }                  # Next we need the variant code and key.
864          # Now we need to connect the variant to the subsystem. First, we compute the real                  my $variantCode = BaseSaplingLoader::Starless($variant);
         # variant code by removing the asterisks.  
         my $variantCode = $variant;  
         $variantCode =~ s/^\*+//;  
         $variantCode =~ s/\*+$//;  
         # Compute the variant key.  
865          my $variantKey = ERDB::DigestKey("$subsystem:$variantCode");          my $variantKey = ERDB::DigestKey("$subsystem:$variantCode");
866          # Insure we have it in the database.                  # Now we create the molecular machine connecting this genome to the
867          if (! $sap->Exists(Variant => $variantKey)) {                  # subsystem variant.
             # Get the variant from the sapling server.  
             my $variantH = $sapO->get(-objects => "Variant",  
                                       -filter => {"Variant(id)" => ["=", $variantKey]},  
                                       -fields => {"Variant(code)" => "code",  
                                                   "Variant(comment)" => "comment",  
                                                   "Variant(type)" => "type",  
                                                   "Variant(role-rule)" => "role-rule"},  
                                       -firstOnly => 1);  
             $self->InsureEntity('Variant', $variantKey, %$variantH);  
             # Connect it to the subsystem.  
             $sap->InsertObject('Describes', from_link => $subsystem, to_link => $variantKey);  
             $stats->Add(subsystemVariants => 1);  
         }  
         # Now we create the molecular machine connecting this genome to the subsystem  
         # variant.  
868          my $machineID = ERDB::DigestKey("$subsystem:$variantCode:$genome");          my $machineID = ERDB::DigestKey("$subsystem:$variantCode:$genome");
869                    # Does it already exist?
870                    if ($machinesOld{$machineID}) {
871                        # Yes. Output a warning.
872                        Trace("Machine $machineID already found for $subsystem and genome $genome.") if T(1);
873                        $stats->Add(duplicateMachine => 1);
874                    } else {
875          $sap->InsertObject('Uses', from_link => $genome, to_link => $machineID);          $sap->InsertObject('Uses', from_link => $genome, to_link => $machineID);
876          $sap->InsertObject('MolecularMachine', id => $machineID, curated => 0, region => '');          $sap->InsertObject('MolecularMachine', id => $machineID, curated => 0, region => '');
877          $sap->InsertObject('IsImplementedBy', from_link => $variantKey, to_link => $machineID);          $sap->InsertObject('IsImplementedBy', from_link => $variantKey, to_link => $machineID);
878                    }
879          # Remember the machine ID.          # Remember the machine ID.
880          $machines{$subsystem} = $machineID;          $machines{$subsystem} = $machineID;
881      }      }
882      # Now we go through the bindings file. This file connects the subsystem roles to          }
883      # the molecular machines.          # Now we go through the bindings file. This file connects the subsystem
884      $ih = Open(undef, "<$self->{directory}/Subsystems/bindings");          # roles to the molecular machines.
885            $ih = Open(undef, "<$bindFileName");
886      # Loop through the bindings.      # Loop through the bindings.
887      while (! eof $ih) {      while (! eof $ih) {
888          # Get the binding data.          # Get the binding data.
889          my ($subsystem, $role, $fid) = Tracer::GetLine($ih);          my ($subsystem, $role, $fid) = Tracer::GetLine($ih);
890          # Normalize the subsystem name.          # Normalize the subsystem name.
891          $subsystem = $sap->SubsystemID($subsystem);          $subsystem = $sap->SubsystemID($subsystem);
892                # Insure the subsystem is in the database.
893                if ($subsystems{$subsystem}) {
894          # Compute the machine role.          # Compute the machine role.
895          my $machineRoleID = $machineRoles{$subsystem}{$role};          my $machineRoleID = $machineRoles{$subsystem}{$role};
896          # Insure it exists.          # Insure it exists.
# Line 901  Line 906 
906          $sap->InsertObject('Contains', from_link => $machineRoleID, to_link => $fid);          $sap->InsertObject('Contains', from_link => $machineRoleID, to_link => $fid);
907      }      }
908  }  }
909        }
910    }
911    
912  =head3 CreateGenome  =head3 CreateGenome
913    
# Line 923  Line 930 
930      my $dir = $self->{directory};      my $dir = $self->{directory};
931      # Get the sapling database.      # Get the sapling database.
932      my $sap = $self->{sap};      my $sap = $self->{sap};
933        # Get the MD5 computer.
934        my $md5C = $self->{md5};
935      # We'll put the genome attributes in here.      # We'll put the genome attributes in here.
936      my %fields;      my %fields;
937      # Check for a basic statistics file.      # Check for a basic statistics file.
# Line 950  Line 959 
959          $fields{'scientific-name'} = $line;          $fields{'scientific-name'} = $line;
960          # Get the taxonomy and extract the domain from it.          # Get the taxonomy and extract the domain from it.
961          $ih = Open(undef, "<$dir/TAXONOMY");          $ih = Open(undef, "<$dir/TAXONOMY");
962          ($fields{domain}) = split /;/, <$ih>, 2;          ($fields{domain}) = split m/;/, <$ih>, 2;
963      }      }
964      # Get the counts from the statistics object.      # Get the counts from the statistics object.
965      my $stats = $self->{stats};      my $stats = $self->{stats};
# Line 958  Line 967 
967      $fields{'dna-size'} = $stats->Ask('dna');      $fields{'dna-size'} = $stats->Ask('dna');
968      $fields{pegs} = $stats->Ask('peg');      $fields{pegs} = $stats->Ask('peg');
969      $fields{rnas} = $stats->Ask('rna');      $fields{rnas} = $stats->Ask('rna');
970        $fields{gc_content} = $stats->Ask('gc_content') * 100 / $stats->Ask('dna');
971      # Get the genetic code. The default is 11.      # Get the genetic code. The default is 11.
972      $fields{'genetic-code'} = 11;      $fields{'genetic-code'} = 11;
973      my $geneticCodeFile = "$dir/GENETIC_CODE";      my $geneticCodeFile = "$dir/GENETIC_CODE";
# Line 968  Line 978 
978      }      }
979      # Use the domain to determine whether or not the genome is prokaryotic.      # Use the domain to determine whether or not the genome is prokaryotic.
980      $fields{prokaryotic} = PROK_FLAG->{$fields{domain}} || 0;      $fields{prokaryotic} = PROK_FLAG->{$fields{domain}} || 0;
981        # Compute the genome MD5.
982        $fields{md5_identifier} = $md5C->CloseGenome();
983      # Finally, add the genome ID.      # Finally, add the genome ID.
984      $fields{id} = $self->{genome};      $fields{id} = $self->{genome};
985      # Create the genome record.      # Create the genome record.
# Line 1099  Line 1111 
1111      # Loop through the lines of the text field.      # Loop through the lines of the text field.
1112      my $text = "";      my $text = "";
1113      my $line = <$ih>;      my $line = <$ih>;
1114      while ($line ne "//\n") {      while (defined($line) && $line ne "//\n") {
1115          $text .= $line;          $text .= $line;
1116          $line = <$ih>;          $line = <$ih>;
1117      }      }

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3