[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.61, Sun Jul 30 01:41:34 2006 UTC revision 1.69, Wed Sep 27 12:34:46 2006 UTC
# Line 120  Line 120 
120                      # an omitted access code can be defaulted to 1.                      # an omitted access code can be defaulted to 1.
121                      for my $genomeLine (@genomeList) {                      for my $genomeLine (@genomeList) {
122                          my ($genomeID, $accessCode) = split("\t", $genomeLine);                          my ($genomeID, $accessCode) = split("\t", $genomeLine);
123                          if (undef $accessCode) {                          if (! defined($accessCode)) {
124                              $accessCode = 1;                              $accessCode = 1;
125                          }                          }
126                          $genomes{$genomeID} = $accessCode;                          $genomes{$genomeID} = $accessCode;
# Line 266  Line 266 
266              my $extra = join " ", @extraData;              my $extra = join " ", @extraData;
267              # Get the full taxonomy.              # Get the full taxonomy.
268              my $taxonomy = $fig->taxonomy_of($genomeID);              my $taxonomy = $fig->taxonomy_of($genomeID);
269                # Open the NMPDR group file for this genome.
270                my $group;
271                if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&
272                    defined($group = <TMP>)) {
273                    # Clean the line ending.
274                    chomp $group;
275                } else {
276                    # No group, so use the default.
277                    $group = $FIG_Config::otherGroup;
278                }
279                close TMP;
280              # Output the genome record.              # Output the genome record.
281              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,
282                               $species, $extra, $taxonomy);                               $group, $species, $extra, $taxonomy);
283              # Now we loop through each of the genome's contigs.              # Now we loop through each of the genome's contigs.
284              my @contigs = $fig->all_contigs($genomeID);              my @contigs = $fig->all_contigs($genomeID);
285              for my $contigID (@contigs) {              for my $contigID (@contigs) {
# Line 449  Line 460 
460      FeatureUpstream      FeatureUpstream
461      IsLocatedIn      IsLocatedIn
462      HasFeature      HasFeature
463        HasRoleInSubsystem
464    
465  =over 4  =over 4
466    
# Line 474  Line 486 
486      my $loadFeatureLink = $self->_TableLoader('FeatureLink');      my $loadFeatureLink = $self->_TableLoader('FeatureLink');
487      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');
488      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');
489      my $loadHasFeature = $self->_TableLoader('HasFeature');      my $loadHasFeature = $self->_TableLoader('HasFeature', $self->PrimaryOnly);
490        my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem', $self->PrimaryOnly);
491      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
492      # locations.      # locations.
493      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 505  Line 518 
518                      $oldFeatureID = $featureID;                      $oldFeatureID = $featureID;
519                      # Count this feature.                      # Count this feature.
520                      $loadFeature->Add("featureIn");                      $loadFeature->Add("featureIn");
521                        # Get the functional assignment.
522                        my $assignment = $fig->function_of($featureID);
523                      # Create the feature record.                      # Create the feature record.
524                      $loadFeature->Put($featureID, 1, $type);                      $loadFeature->Put($featureID, 1, $type, $assignment);
525                      # Link it to the parent genome.                      # Link it to the parent genome.
526                      $loadHasFeature->Put($genomeID, $featureID, $type);                      $loadHasFeature->Put($genomeID, $featureID, $type);
527                      # Create the aliases.                      # Create the aliases.
# Line 531  Line 546 
546                              $loadFeatureUpstream->Put($featureID, $upstream);                              $loadFeatureUpstream->Put($featureID, $upstream);
547                          }                          }
548                      }                      }
549                        # Now we need to find the subsystems this feature participates in.
550                        my @subsystems = $fig->peg_to_subsystems($featureID);
551                        for my $subsystem (@subsystems) {
552                            $loadHasRoleInSubsystem->Put($featureID, $subsystem);
553                        }
554                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
555                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
556                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
# Line 609  Line 629 
629              Trace("Processing features for genome $genomeID.") if T(3);              Trace("Processing features for genome $genomeID.") if T(3);
630              # Get the feature list for this genome.              # Get the feature list for this genome.
631              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
632                # Count the BBHs we find.
633                my $bbhCount = 0;
634              # Loop through the features.              # Loop through the features.
635              for my $featureData (@{$features}) {              for my $featureData (@{$features}) {
636                  # Split the tuple.                  # Split the tuple.
# Line 624  Line 646 
646                      if ($genomeHash->{$targetGenomeID}) {                      if ($genomeHash->{$targetGenomeID}) {
647                          $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,                          $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,
648                                                             $score);                                                             $score);
649                            $bbhCount++;
650                      }                      }
651                  }                  }
652              }              }
653                Trace("$bbhCount BBHs found for $genomeID.") if T(3);
654          }          }
655      }      }
656      # Finish the loads.      # Finish the loads.
# Line 738  Line 762 
762                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
763                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
764                  $loadSubsystem->Put($subsysID, $curator, $notes);                  $loadSubsystem->Put($subsysID, $curator, $notes);
765                  my $class = $fig->subsystem_classification($subsysID);                  my $classList = $fig->subsystem_classification($subsysID);
766                  if ($class) {                  my @classes = @$classList;
767                    if (@classes) {
768                        for my $class (@classes) {
769                      $loadSubsystemClass->Put($subsysID, $class);                      $loadSubsystemClass->Put($subsysID, $class);
770                  }                  }
771                    }
772                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
773                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
774                      # Connect to this role.                      # Connect to this role.
# Line 944  Line 971 
971          my %propertyKeys = ();          my %propertyKeys = ();
972          my $nextID = 1;          my $nextID = 1;
973          # Loop through the genomes.          # Loop through the genomes.
974          for my $genomeID (keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
975              $loadProperty->Add("genomeIn");              $loadProperty->Add("genomeIn");
976              Trace("Generating properties for $genomeID.") if T(3);              Trace("Generating properties for $genomeID.") if T(3);
977              # Get the genome's features. The feature ID is the first field in the              # Get the genome's features. The feature ID is the first field in the
# Line 1370  Line 1397 
1397    
1398      GenomeGroups      GenomeGroups
1399    
1400  There is no direct support for genome groups in FIG, so we access the SEED  Currently, we do not use groups. We used to use them for NMPDR groups,
1401    butThere is no direct support for genome groups in FIG, so we access the SEED
1402  files directly.  files directly.
1403    
1404  =over 4  =over 4
# Line 1396  Line 1424 
1424          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1425      } else {      } else {
1426          Trace("Generating group data.") if T(2);          Trace("Generating group data.") if T(2);
1427          # Loop through the genomes.          # Currently there are no groups.
         my $line;  
         for my $genomeID (keys %{$genomeHash}) {  
             Trace("Processing $genomeID.") if T(3);  
             # Open the NMPDR group file for this genome.  
             if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&  
                 defined($line = <TMP>)) {  
                 # Clean the line ending.  
                 chomp $line;  
                 # Add the group to the table. Note that there can only be one group  
                 # per genome.  
                 $loadGenomeGroups->Put($genomeID, $line);  
             }  
             close TMP;  
         }  
1428      }      }
1429      # Finish the load.      # Finish the load.
1430      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
# Line 1506  Line 1520 
1520  The following relations are loaded by this method.  The following relations are loaded by this method.
1521    
1522      Family      Family
1523      ContainsFeature      IsFamilyForFeature
1524    
1525  The source information for these relations is taken from the C<families_for_protein>,  The source information for these relations is taken from the C<families_for_protein>,
1526  C<family_function>, and C<sz_family> methods of the B<FIG> object.  C<family_function>, and C<sz_family> methods of the B<FIG> object.
# Line 1530  Line 1544 
1544      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1545      # Create load objects for the tables we're loading.      # Create load objects for the tables we're loading.
1546      my $loadFamily = $self->_TableLoader('Family');      my $loadFamily = $self->_TableLoader('Family');
1547      my $loadContainsFeature = $self->_TableLoader('ContainsFeature');      my $loadIsFamilyForFeature = $self->_TableLoader('IsFamilyForFeature');
1548      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1549          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1550      } else {      } else {
# Line 1542  Line 1556 
1556              Trace("Processing features for $genomeID.") if T(2);              Trace("Processing features for $genomeID.") if T(2);
1557              # Loop through this genome's PEGs.              # Loop through this genome's PEGs.
1558              for my $fid ($fig->all_features($genomeID, "peg")) {              for my $fid ($fig->all_features($genomeID, "peg")) {
1559                  $loadContainsFeature->Add("features", 1);                  $loadIsFamilyForFeature->Add("features", 1);
1560                  # Get this feature's families.                  # Get this feature's families.
1561                  my @families = $fig->families_for_protein($fid);                  my @families = $fig->families_for_protein($fid);
1562                  # Loop through the families, connecting them to the feature.                  # Loop through the families, connecting them to the feature.
1563                  for my $family (@families) {                  for my $family (@families) {
1564                      $loadContainsFeature->Put($family, $fid);                      $loadIsFamilyForFeature->Put($family, $fid);
1565                      # If this is a new family, create a record for it.                      # If this is a new family, create a record for it.
1566                      if (! exists $familyHash{$family}) {                      if (! exists $familyHash{$family}) {
1567                            $familyHash{$family} = 1;
1568                          $loadFamily->Add("families", 1);                          $loadFamily->Add("families", 1);
1569                          my $size = $fig->sz_family($family);                          my $size = $fig->sz_family($family);
1570                          my $func = $fig->family_function($family);                          my $func = $fig->family_function($family);
# Line 1564  Line 1579 
1579      return $retVal;      return $retVal;
1580  }  }
1581    
1582    
1583    
1584  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1585    
1586  =head3 TableLoader  =head3 TableLoader

Legend:
Removed from v.1.61  
changed lines
  Added in v.1.69

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3