[Bio] / Sprout / SaplingGenomeLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/SaplingGenomeLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Mon Dec 5 22:05:15 2011 UTC revision 1.18, Wed May 8 20:28:44 2013 UTC
# Line 26  Line 26 
26      use SAPserver;      use SAPserver;
27      use Sapling;      use Sapling;
28      use AliasAnalysis;      use AliasAnalysis;
29        use BaseSaplingLoader;
30        use MD5Computer;
31      use base qw(SaplingDataLoader);      use base qw(SaplingDataLoader);
32    
33  =head1 Sapling Genome Loader  =head1 Sapling Genome Loader
# Line 57  Line 59 
59    
60  Name of the directory containing the genome information.  Name of the directory containing the genome information.
61    
 =item disconnected  
   
 True if the application is disconnected from the network - do not  
 attempt to contact a SAP server for more data.  
   
 =item assignHash  
   
 Hash of feature IDs to functional assignments. Deleted features are removed, which  
 means only features listed in this hash can be legally inserted into the database.  
   
62  =back  =back
63    
64  =cut  =cut
65    
66  sub Load {  sub Load {
67      # Get the parameters.      # Get the parameters.
68      my ($sap, $genome, $directory, $disconnected) = @_;      my ($sap, $genome, $directory) = @_;
69      # Create the loader object.      # Create the loader object.
70      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory, $disconnected);      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory);
71      # Load the contigs.      # Load the contigs.
72      Trace("Loading contigs for $genome.") if T(SaplingDataLoader => 2);      Trace("Loading contigs for $genome.") if T(SaplingDataLoader => 2);
73      $loaderObject->LoadContigs();      $loaderObject->LoadContigs();
# Line 144  Line 136 
136      my $subStats = $sap->Delete(Genome => $genome);      my $subStats = $sap->Delete(Genome => $genome);
137      # Accumulate the statistics from the delete.      # Accumulate the statistics from the delete.
138      $stats->Accumulate($subStats);      $stats->Accumulate($subStats);
139        Trace("Statistics for delete of $genome:\n" . $stats->Show()) if T(SaplingDataLoader => 3);
140      # Return the statistics object.      # Return the statistics object.
141      return $stats;      return $stats;
142  }  }
# Line 168  Line 161 
161    
162  =item directory  =item directory
163    
164  Name of the directory containing the genome data files.  Name of the directory containing the genome data files. If omitted, the
165    genome will be deleted from the database.
166    
167  =item RETURN  =item RETURN
168    
# Line 183  Line 177 
177      my ($sap, $genome, $directory) = @_;      my ($sap, $genome, $directory) = @_;
178      # Clear the existing data for the specified genome.      # Clear the existing data for the specified genome.
179      my $stats = ClearGenome($sap, $genome);      my $stats = ClearGenome($sap, $genome);
180      # Load the new expression data from the specified directory.      # Load the new expression data from the specified directory (if one is
181        # specified).
182        if ($directory) {
183      my $newStats = Load($sap, $genome, $directory);      my $newStats = Load($sap, $genome, $directory);
184      # Merge the statistics.      # Merge the statistics.
185      $stats->Accumulate($newStats);      $stats->Accumulate($newStats);
186        }
187      # Return the result.      # Return the result.
188      return $stats;      return $stats;
189  }  }
# Line 196  Line 193 
193    
194  =head3 new  =head3 new
195    
196      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory, $disconnected);      my $loaderObject = SaplingGenomeLoader->new($sap, $genome, $directory);
197    
198  Create a loader object that can be used to facilitate loading genome data from a  Create a loader object that can be used to facilitate loading genome data from a
199  directory.  directory.
# Line 215  Line 212 
212    
213  Name of the directory containing the genome information.  Name of the directory containing the genome information.
214    
 =item disconnected  
   
 Set to a true value if the application should be considered to be disconnected  
 from the network - that is, do not attempt to connect to a Sapling server  
 to load subsystem data.  
   
215  =back  =back
216    
217  The object created contains the following fields.  The object created contains the following fields.
# Line 253  Line 244 
244    
245  sub new {  sub new {
246      # Get the parameters.      # Get the parameters.
247      my ($class, $sap, $genome, $directory, $disconnected) = @_;      my ($class, $sap, $genome, $directory) = @_;
248      # Create the object.      # Create the object.
249      my $retVal = SaplingDataLoader::new($class, $sap, qw(contigs dna pegs rnas));      my $retVal = SaplingDataLoader::new($class, $sap, qw(contigs dna pegs rnas));
250      # Add our specialized data.      # Add our specialized data.
# Line 261  Line 252 
252      $retVal->{directory} = $directory;      $retVal->{directory} = $directory;
253      # Leave the assignment hash undefined until we populate it.      # Leave the assignment hash undefined until we populate it.
254      $retVal->{assignHash} = undef;      $retVal->{assignHash} = undef;
255      $retVal->{disconnected} = defined($disconnected) ? 1 : 0;      # Create an MD5 Computer for this genome.
256        $retVal->{md5} = MD5Computer->new();
257      # Return the result.      # Return the result.
258      return $retVal;      return $retVal;
259  }  }
# Line 287  Line 279 
279      # Get the genome ID.      # Get the genome ID.
280      my $genome = $self->{genome};      my $genome = $self->{genome};
281      # These variables will contain the current contig ID, the current contig length,      # These variables will contain the current contig ID, the current contig length,
282      # the ordinal of the current chunk, the accumulated DNA sequence, and its length.      # the accumulated DNA sequence for the current chunk, and its length.
283      my ($contigID, $contigLen, $ordinal, $chunk, $chunkLen) = (undef, 0, 0, '', 0);      my ($contigID, $contigLen, $chunk, $chunkLen) = (undef, 0, '', 0);
284        # This variable contains a list of the chunks, for use in computing the MD5.
285        my @chunks;
286      # Loop through the contig file.      # Loop through the contig file.
287      while (! eof $ih) {      while (! eof $ih) {
288          # Get the current record.          # Get the current record.
# Line 299  Line 293 
293              my $newContigID = $1;              my $newContigID = $1;
294              # Is there a current contig?              # Is there a current contig?
295              if (defined $contigID) {              if (defined $contigID) {
296                  # Yes. Output the current chunk.                  # Yes. Output the contig.
297                  $self->OutputChunk($contigID, $ordinal, $chunk);                  $self->OutputContig($contigID, $contigLen, $chunk, \@chunks);
                 # Output the contig.  
                 $self->OutputContig($contigID, $contigLen);  
298              }              }
299              # Compute the new contig ID. We need to insure it has a genome ID in front.              # Compute the new contig ID. We need to insure it has a genome ID in front.
300              $self->FixContig(\$newContigID);              $self->FixContig(\$newContigID);
301              # Initialize the new contig.              # Initialize the new contig.
302              $contigID = $newContigID;              $contigID = $newContigID;
303              $contigLen = 0;              $contigLen = 0;
             $ordinal = 0;  
304              $chunk = '';              $chunk = '';
305              $chunkLen = 0;              $chunkLen = 0;
306                @chunks = ();
307          } else {          } else {
308              # Here we have more DNA in the current contig. Are we at the end of              # Here we have more DNA in the current contig. Are we at the end of
309              # the current chunk?              # the current chunk?
# Line 325  Line 317 
317                  # Yes. Create the actual chunk.                  # Yes. Create the actual chunk.
318                  $chunk .= substr($line, 0, $segmentLength - $chunkLen);                  $chunk .= substr($line, 0, $segmentLength - $chunkLen);
319                  # Write it out.                  # Write it out.
320                  $self->OutputChunk($contigID, $ordinal, $chunk);                  $self->OutputChunk($contigID, scalar @chunks, $chunk);
321                  # Set up the new chunk.                  # Set up the new chunk.
322                  $chunk = substr($line, $segmentLength - $chunkLen);                  $chunk = substr($line, $segmentLength - $chunkLen);
                 $ordinal++;  
323                  $chunkLen = length($chunk);                  $chunkLen = length($chunk);
324                    push @chunks, $chunk;
325              }              }
326              # Update the contig length.              # Update the contig length.
327              $contigLen += $lineLen;              $contigLen += $lineLen;
# Line 337  Line 329 
329      }      }
330      # Is there a current contig?      # Is there a current contig?
331      if (defined $contigID) {      if (defined $contigID) {
332          # Yes. Output the residual chunk.          # Yes. Output the contig itself.
333          $self->OutputChunk($contigID, $ordinal, $chunk);          $self->OutputContig($contigID, $contigLen, $chunk, \@chunks);
         # Output the contig itself.  
         $self->OutputContig($contigID, $contigLen);  
334      }      }
335  }  }
336    
# Line 387  Line 377 
377    
378  =head3 OutputContig  =head3 OutputContig
379    
380      $loaderObject->OutputContig($contigID, $contigLen);      $loaderObject->OutputContig($contigID, $contigLen, $chunk, \@chunks);
381    
382  Write out the current contig.  Write out the current contig.
383    
# Line 401  Line 391 
391    
392  Length of the contig in base pairs.  Length of the contig in base pairs.
393    
394    =item chunk
395    
396    Last DNA chunk of the contig.
397    
398    =item chunks
399    
400    Reference to a list of the DNA chunks up to (but not including) the last one.
401    
402  =back  =back
403    
404  =cut  =cut
405    
406  sub OutputContig {  sub OutputContig {
407      # Get the parameters.      # Get the parameters.
408      my ($self, $contigID, $contigLen) = @_;      my ($self, $contigID, $contigLen, $chunk, $chunks) = @_;
409      # Get the sapling object.      # Get the sapling object.
410      my $sap = $self->{sap};      my $sap = $self->{sap};
411        # Get the MD5 computer.
412        my $md5C = $self->{md5};
413        # Output the last chunk.
414        $self->OutputChunk($contigID, scalar @$chunks, $chunk);
415      # Connect the contig to the genome.      # Connect the contig to the genome.
416      $sap->InsertObject('IsMadeUpOf', from_link => $self->{genome}, to_link => $contigID);      $sap->InsertObject('IsMadeUpOf', from_link => $self->{genome}, to_link => $contigID);
417        # Compute the MD5.
418        push @$chunks, $chunk;
419        my $contigMD5 = $md5C->ProcessContig($contigID, $chunks);
420      # Output the contig record.      # Output the contig record.
421      $sap->InsertObject('Contig', id => $contigID, length => $contigLen);      $sap->InsertObject('Contig', id => $contigID, length => $contigLen,
422                md5_identifier => $contigMD5);
423      # Record the contig.      # Record the contig.
424      $self->{stats}->Add(contigs => 1);      $self->{stats}->Add(contigs => 1);
425      $self->{stats}->Add(dna => $contigLen);      $self->{stats}->Add(dna => $contigLen);
# Line 586  Line 592 
592      # This hash will track the features we've created. If a feature is found a second      # This hash will track the features we've created. If a feature is found a second
593      # time, it overwrites the original.      # time, it overwrites the original.
594      my %fidHash;      my %fidHash;
595      # Finally, we need the timestamp hash. The initial feature population      # This hash tracks the deleted features. We don't want to update these.
596        my %deleted_features;
597      # Insure we have a tbl file for this feature type.      # Insure we have a tbl file for this feature type.
598      my $fileName = "$featureDir/$type/tbl";      my $fileName = "$featureDir/$type/tbl";
     my %deleted_features;  
599      if (-f $fileName) {      if (-f $fileName) {
600          # We have one, so we can read through it. First, however, we need to get the list          # We have one, so we can read through it. First, however, we need to get the list
601          # of deleted features and remove them from the assignment hash. This insures          # of deleted features and remove them from the assignment hash. This insures
# Line 618  Line 624 
624                  if ($fidHash{$fid}) {                  if ($fidHash{$fid}) {
625                      $sap->Delete(Feature => $fid);                      $sap->Delete(Feature => $fid);
626                      $stats->Add(duplicateFid => 1);                      $stats->Add(duplicateFid => 1);
627                    } else {
628                        # It doesn't exist, so record it in the statistics.
629                        $stats->Add($type => 1);
630                  }                  }
631                  # If this is RNA, the alias list is always empty. Sometimes, the functional                  # If this is RNA, the alias list is always empty. Sometimes, the functional
632                  # assignment is found there.                  # assignment is found there.
# Line 628  Line 637 
637                      @aliases = ();                      @aliases = ();
638                  }                  }
639                  # Add the feature to the database.                  # Add the feature to the database.
640                  $self->AddFeature($fid, $assignHash->{$fid}, $locations, \@aliases,                  my $function = $assignHash->{$fid} || "";
641                    $self->AddFeature($fid, $function, $locations, \@aliases,
642                                    $protHash->{$fid}, $evHash->{$fid});                                    $protHash->{$fid}, $evHash->{$fid});
643                  # Denote we've added this feature, so that if a duplicate occurs we're ready.                  # Denote we've added this feature, so that if a duplicate occurs we're ready.
644                  $fidHash{$fid} = 1;                  $fidHash{$fid} = 1;
# Line 777  Line 787 
787    
788  =head3 LoadSubsystems  =head3 LoadSubsystems
789    
790      $loaderObject->LoadSubsystems();      $loaderObject->LoadSubsystems($subsysList);
791    
792    Load the subsystem data into the database. This requires looking through the
793    bindings and using them to connect to the subsystems that already exist in the
794    database. If the subsystem does not exist, its bindings will not be loaded.
795    
796    =over 4
797    
798  Load the subsystem data into the database. This includes all of the subsystems,  =item subsysList
799  their categories, roles, and the bindings that connect them to this genome's features.  
800    Reference to a list of subsystem IDs. If specified, only the subsystems in the
801    list will be processed. This is useful when a particular set of subsystems
802    has been replaced.
803    
804    =back
805    
806  =cut  =cut
807    
808  sub LoadSubsystems {  sub LoadSubsystems {
809      # Get the parameters.      # Get the parameters.
810      my ($self) = @_;      my ($self, $subsysList) = @_;
   
     #  
     # If we are running in disconnected mode, do not actually load subsystems.  
     # They rely too much on information from the external sapling.  
     #  
     if ($self->{disconnected})  
     {  
         return;  
     }  
   
811      # Get the sapling object.      # Get the sapling object.
812      my $sap = $self->{sap};      my $sap = $self->{sap};
813      # Get the statistics object.      # Get the statistics object.
814      my $stats = $self->{stats};      my $stats = $self->{stats};
815      # Get the genome ID.      # Get the genome ID.
816      my $genome = $self->{genome};      my $genome = $self->{genome};
817      # Connect to the Sapling server so we can get the subsystem variant and role information.      # Compute the subsystem and binding file names.
818      my $sapO = SAPserver->new();      my $subFileName = "$self->{directory}/Subsystems/subsystems";
819        my $bindFileName = "$self->{directory}/Subsystems/bindings";
820        # Only proceed if both exist.
821        if (! -f $subFileName || ! -f $bindFileName) {
822            Trace("Missing subsystem data for $genome.") if T(1);
823            $stats->Add(noSubsystems => 1);
824        } else {
825      # This hash maps subsystem IDs to molecular machine IDs.      # This hash maps subsystem IDs to molecular machine IDs.
826      my %machines;      my %machines;
827      # This hash maps subsystem/role pairs to machine role IDs.      # This hash maps subsystem/role pairs to machine role IDs.
828      my %machineRoles;      my %machineRoles;
829      # The first step is to create the subsystems themselves and connect them to the          # This hash will contain the list of subsystems found in the database.
830      # genome. A list is given in the subsystems file of the Subsystems directory.          my %subsystems;
831      my $ih = Open(undef, "<$self->{directory}/Subsystems/subsystems");          # We loop through the subsystems, looking for the ones already in the
832      # Create the field hash for the subsystem query.          # database. The list is given in the subsystems file of the Subsystems
833      my @fields = qw(Subsystem(id) Subsystem(cluster-based) Subsystem(curator) Subsystem(description)          # directory.
834                      Subsystem(experimental) Subsystem(notes) Subsystem(private) Subsystem(usable)          my $ih = Open(undef, "<$subFileName");
                     Subsystem(version)  
                     Includes(from-link) Includes(to-link) Includes(abbreviation) Includes(auxiliary)  
                     Includes(sequence)  
                     Role(id) Role(hypothetical) Role(role-index));  
     my %fields = map { $_ => $_ } @fields;  
835      # Loop through the subsystems in the file, insuring we have them in the database.      # Loop through the subsystems in the file, insuring we have them in the database.
836      while (! eof $ih) {      while (! eof $ih) {
837          # Get this subsystem.          # Get this subsystem.
# Line 827  Line 839 
839          Trace("Processing subsystem $subsystem variant $variant.") if T(SaplingDataLoader => 3);          Trace("Processing subsystem $subsystem variant $variant.") if T(SaplingDataLoader => 3);
840          # Normalize the subsystem name.          # Normalize the subsystem name.
841          $subsystem = $sap->SubsystemID($subsystem);          $subsystem = $sap->SubsystemID($subsystem);
842                # Insure the subsystem is in the database and is one we're interested in.
843                if ((! $subsysList || (grep { $_ eq $subsystem } @$subsysList)) &&
844                        $sap->Exists(Subsystem => $subsystem)) {
845                    # We have the subsystem in the database. We need to compute the machine
846                    # role IDs and create the molecular machine. First, we need to remember
847                    # the subsystem.
848                    $subsystems{$subsystem} = 1;
849          # Compute this subsystem's MD5.          # Compute this subsystem's MD5.
850          my $subsystemMD5 = ERDB::DigestKey($subsystem);          my $subsystemMD5 = ERDB::DigestKey($subsystem);
851          # Insure the subsystem is in the database.                  my $rolePrefix = "$subsystemMD5:$genome";
         if (! $sap->Exists(Subsystem => $subsystem)) {  
             # The subsystem isn't present, so we need to read it. We get the subsystem,  
             # its roles, and its classification information. The variant will be added  
             # later if we need it.  
             my $roleList = $sapO->get(-objects => "Subsystem Includes Role",  
                                       -filter => {"Subsystem(id)" => ["=", $subsystem] },  
                                       -fields => \%fields);  
             # Only proceed if we found some roles.  
             if (@$roleList > 0) {  
                 # Get the subsystem information from the first role and create the subsystem.  
                 my $roleH = $roleList->[0];  
                 my %subFields = SaplingDataLoader::ExtractFields(Subsystem => $roleH);  
                 $sap->InsertObject('Subsystem', %subFields);  
                 $stats->Add(subsystems => 1);  
                 # Now loop through the roles. The Includes records are always inserted, but the  
                 # roles are only inserted if they don't already exist.  
                 for $roleH (@$roleList) {  
                     # Create the Includes record.  
                     my %incFields = SaplingDataLoader::ExtractFields(Includes => $roleH);  
                     $sap->InsertObject('Includes', %incFields);  
                     # Insure we have the role in place.  
                     my %roleFields = SaplingDataLoader::ExtractFields(Role => $roleH);  
                     my $roleID = $roleFields{id};  
                     delete $roleFields{id};  
                     $self->InsureEntity('Role', $roleID, %roleFields);  
                     # Compute the machine-role ID.  
                     my $machineRoleID = join(":", $subsystemMD5, $genome, $incFields{abbreviation});  
                     $machineRoles{$subsystem}{$roleID} = $machineRoleID;  
                     $stats->Add(subsystemRoles => 1);  
                 }  
             }  
         } else {  
             # We already have the subsystem in the database, but we still need to compute the  
             # machine role IDs. We need information from the sapling server for this.  
             my $subHash = $sapO->subsystem_roles(-ids => $subsystem, -aux => 1, -abbr => 1);  
852              # Loop through the roles.              # Loop through the roles.
853              my $roleList = $subHash->{$subsystem};                  my @roleList = $sap->GetAll('Includes', 'Includes(from-link) = ?',
854              for my $roleTuple (@$roleList) {                          [$subsystem], "to-link abbreviation");
855                    for my $roleTuple (@roleList) {
856                  my ($roleID, $abbr) = @$roleTuple;                  my ($roleID, $abbr) = @$roleTuple;
857                  my $machineRoleID = join(":", $subsystemMD5, $genome, $abbr);                      my $machineRoleID = $rolePrefix . '::' . $abbr;
858                  $machineRoles{$subsystem}{$roleID} = $machineRoleID;                  $machineRoles{$subsystem}{$roleID} = $machineRoleID;
859              }              }
860          }                  # Next we need the variant code and key.
861          # Now we need to connect the variant to the subsystem. First, we compute the real                  my $variantCode = BaseSaplingLoader::Starless($variant);
         # variant code by removing the asterisks.  
         my $variantCode = $variant;  
         $variantCode =~ s/^\*+//;  
         $variantCode =~ s/\*+$//;  
         # Compute the variant key.  
862          my $variantKey = ERDB::DigestKey("$subsystem:$variantCode");          my $variantKey = ERDB::DigestKey("$subsystem:$variantCode");
863          # Insure we have it in the database.                  # Now we create the molecular machine connecting this genome to the
864          if (! $sap->Exists(Variant => $variantKey)) {                  # subsystem variant.
             # Get the variant from the sapling server.  
             my $variantH = $sapO->get(-objects => "Variant",  
                                       -filter => {"Variant(id)" => ["=", $variantKey]},  
                                       -fields => {"Variant(code)" => "code",  
                                                   "Variant(comment)" => "comment",  
                                                   "Variant(type)" => "type",  
                                                   "Variant(role-rule)" => "role-rule"},  
                                       -firstOnly => 1);  
             $self->InsureEntity('Variant', $variantKey, %$variantH);  
             # Connect it to the subsystem.  
             $sap->InsertObject('Describes', from_link => $subsystem, to_link => $variantKey);  
             $stats->Add(subsystemVariants => 1);  
         }  
         # Now we create the molecular machine connecting this genome to the subsystem  
         # variant.  
865          my $machineID = ERDB::DigestKey("$subsystem:$variantCode:$genome");          my $machineID = ERDB::DigestKey("$subsystem:$variantCode:$genome");
866          $sap->InsertObject('Uses', from_link => $genome, to_link => $machineID);          $sap->InsertObject('Uses', from_link => $genome, to_link => $machineID);
867          $sap->InsertObject('MolecularMachine', id => $machineID, curated => 0, region => '');          $sap->InsertObject('MolecularMachine', id => $machineID, curated => 0, region => '');
# Line 904  Line 869 
869          # Remember the machine ID.          # Remember the machine ID.
870          $machines{$subsystem} = $machineID;          $machines{$subsystem} = $machineID;
871      }      }
872      # Now we go through the bindings file. This file connects the subsystem roles to          }
873      # the molecular machines.          # Now we go through the bindings file. This file connects the subsystem
874      $ih = Open(undef, "<$self->{directory}/Subsystems/bindings");          # roles to the molecular machines.
875            $ih = Open(undef, "<$bindFileName");
876      # Loop through the bindings.      # Loop through the bindings.
877      while (! eof $ih) {      while (! eof $ih) {
878          # Get the binding data.          # Get the binding data.
879          my ($subsystem, $role, $fid) = Tracer::GetLine($ih);          my ($subsystem, $role, $fid) = Tracer::GetLine($ih);
880          # Normalize the subsystem name.          # Normalize the subsystem name.
881          $subsystem = $sap->SubsystemID($subsystem);          $subsystem = $sap->SubsystemID($subsystem);
882                # Insure the subsystem is in the database.
883                if ($subsystems{$subsystem}) {
884          # Compute the machine role.          # Compute the machine role.
885          my $machineRoleID = $machineRoles{$subsystem}{$role};          my $machineRoleID = $machineRoles{$subsystem}{$role};
886          # Insure it exists.          # Insure it exists.
# Line 928  Line 896 
896          $sap->InsertObject('Contains', from_link => $machineRoleID, to_link => $fid);          $sap->InsertObject('Contains', from_link => $machineRoleID, to_link => $fid);
897      }      }
898  }  }
899        }
900    }
901    
902  =head3 CreateGenome  =head3 CreateGenome
903    
# Line 950  Line 920 
920      my $dir = $self->{directory};      my $dir = $self->{directory};
921      # Get the sapling database.      # Get the sapling database.
922      my $sap = $self->{sap};      my $sap = $self->{sap};
923        # Get the MD5 computer.
924        my $md5C = $self->{md5};
925      # We'll put the genome attributes in here.      # We'll put the genome attributes in here.
926      my %fields;      my %fields;
927      # Check for a basic statistics file.      # Check for a basic statistics file.
# Line 977  Line 949 
949          $fields{'scientific-name'} = $line;          $fields{'scientific-name'} = $line;
950          # Get the taxonomy and extract the domain from it.          # Get the taxonomy and extract the domain from it.
951          $ih = Open(undef, "<$dir/TAXONOMY");          $ih = Open(undef, "<$dir/TAXONOMY");
952          ($fields{domain}) = split /;/, <$ih>, 2;          ($fields{domain}) = split m/;/, <$ih>, 2;
953      }      }
954      # Get the counts from the statistics object.      # Get the counts from the statistics object.
955      my $stats = $self->{stats};      my $stats = $self->{stats};
# Line 996  Line 968 
968      }      }
969      # Use the domain to determine whether or not the genome is prokaryotic.      # Use the domain to determine whether or not the genome is prokaryotic.
970      $fields{prokaryotic} = PROK_FLAG->{$fields{domain}} || 0;      $fields{prokaryotic} = PROK_FLAG->{$fields{domain}} || 0;
971        # Compute the genome MD5.
972        $fields{md5_identifier} = $md5C->CloseGenome();
973      # Finally, add the genome ID.      # Finally, add the genome ID.
974      $fields{id} = $self->{genome};      $fields{id} = $self->{genome};
975      # Create the genome record.      # Create the genome record.

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.18

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3