[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.62, Sun Jul 30 05:44:57 2006 UTC revision 1.68, Sun Sep 24 17:14:16 2006 UTC
# Line 120  Line 120 
120                      # an omitted access code can be defaulted to 1.                      # an omitted access code can be defaulted to 1.
121                      for my $genomeLine (@genomeList) {                      for my $genomeLine (@genomeList) {
122                          my ($genomeID, $accessCode) = split("\t", $genomeLine);                          my ($genomeID, $accessCode) = split("\t", $genomeLine);
123                          if (undef $accessCode) {                          if (! defined($accessCode)) {
124                              $accessCode = 1;                              $accessCode = 1;
125                          }                          }
126                          $genomes{$genomeID} = $accessCode;                          $genomes{$genomeID} = $accessCode;
# Line 266  Line 266 
266              my $extra = join " ", @extraData;              my $extra = join " ", @extraData;
267              # Get the full taxonomy.              # Get the full taxonomy.
268              my $taxonomy = $fig->taxonomy_of($genomeID);              my $taxonomy = $fig->taxonomy_of($genomeID);
269                # Open the NMPDR group file for this genome.
270                my $group;
271                if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&
272                    defined($group = <TMP>)) {
273                    # Clean the line ending.
274                    chomp $group;
275                } else {
276                    # No group, so use the default.
277                    $group = $FIG_Config::otherGroup;
278                }
279                close TMP;
280              # Output the genome record.              # Output the genome record.
281              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,
282                               $species, $extra, $taxonomy);                               $group, $species, $extra, $taxonomy);
283              # Now we loop through each of the genome's contigs.              # Now we loop through each of the genome's contigs.
284              my @contigs = $fig->all_contigs($genomeID);              my @contigs = $fig->all_contigs($genomeID);
285              for my $contigID (@contigs) {              for my $contigID (@contigs) {
# Line 505  Line 516 
516                      $oldFeatureID = $featureID;                      $oldFeatureID = $featureID;
517                      # Count this feature.                      # Count this feature.
518                      $loadFeature->Add("featureIn");                      $loadFeature->Add("featureIn");
519                        # Get the functional assignment.
520                        my $assignment = $fig->function_of($featureID);
521                      # Create the feature record.                      # Create the feature record.
522                      $loadFeature->Put($featureID, 1, $type);                      $loadFeature->Put($featureID, 1, $type, $assignment);
523                      # Link it to the parent genome.                      # Link it to the parent genome.
524                      $loadHasFeature->Put($genomeID, $featureID, $type);                      $loadHasFeature->Put($genomeID, $featureID, $type);
525                      # Create the aliases.                      # Create the aliases.
# Line 609  Line 622 
622              Trace("Processing features for genome $genomeID.") if T(3);              Trace("Processing features for genome $genomeID.") if T(3);
623              # Get the feature list for this genome.              # Get the feature list for this genome.
624              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
625                # Count the BBHs we find.
626                my $bbhCount = 0;
627              # Loop through the features.              # Loop through the features.
628              for my $featureData (@{$features}) {              for my $featureData (@{$features}) {
629                  # Split the tuple.                  # Split the tuple.
# Line 624  Line 639 
639                      if ($genomeHash->{$targetGenomeID}) {                      if ($genomeHash->{$targetGenomeID}) {
640                          $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,                          $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,
641                                                             $score);                                                             $score);
642                            $bbhCount++;
643                      }                      }
644                  }                  }
645              }              }
646                Trace("$bbhCount BBHs found for $genomeID.") if T(3);
647          }          }
648      }      }
649      # Finish the loads.      # Finish the loads.
# Line 738  Line 755 
755                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
756                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
757                  $loadSubsystem->Put($subsysID, $curator, $notes);                  $loadSubsystem->Put($subsysID, $curator, $notes);
758                  my $class = $fig->subsystem_classification($subsysID);                  my $classList = $fig->subsystem_classification($subsysID);
759                  if ($class) {                  my @classes = @$classList;
760                    if (@classes) {
761                        for my $class (@classes) {
762                      $loadSubsystemClass->Put($subsysID, $class);                      $loadSubsystemClass->Put($subsysID, $class);
763                  }                  }
764                    }
765                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
766                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
767                      # Connect to this role.                      # Connect to this role.
# Line 944  Line 964 
964          my %propertyKeys = ();          my %propertyKeys = ();
965          my $nextID = 1;          my $nextID = 1;
966          # Loop through the genomes.          # Loop through the genomes.
967          for my $genomeID (keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
968              $loadProperty->Add("genomeIn");              $loadProperty->Add("genomeIn");
969              Trace("Generating properties for $genomeID.") if T(3);              Trace("Generating properties for $genomeID.") if T(3);
970              # Get the genome's features. The feature ID is the first field in the              # Get the genome's features. The feature ID is the first field in the
# Line 1370  Line 1390 
1390    
1391      GenomeGroups      GenomeGroups
1392    
1393  There is no direct support for genome groups in FIG, so we access the SEED  Currently, we do not use groups. We used to use them for NMPDR groups,
1394    butThere is no direct support for genome groups in FIG, so we access the SEED
1395  files directly.  files directly.
1396    
1397  =over 4  =over 4
# Line 1396  Line 1417 
1417          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1418      } else {      } else {
1419          Trace("Generating group data.") if T(2);          Trace("Generating group data.") if T(2);
1420          # Loop through the genomes.          # Currently there are no groups.
         my $line;  
         for my $genomeID (keys %{$genomeHash}) {  
             Trace("Processing $genomeID.") if T(3);  
             # Open the NMPDR group file for this genome.  
             if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&  
                 defined($line = <TMP>)) {  
                 # Clean the line ending.  
                 chomp $line;  
                 # Add the group to the table. Note that there can only be one group  
                 # per genome.  
                 $loadGenomeGroups->Put($genomeID, $line);  
             }  
             close TMP;  
         }  
1421      }      }
1422      # Finish the load.      # Finish the load.
1423      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
# Line 1506  Line 1513 
1513  The following relations are loaded by this method.  The following relations are loaded by this method.
1514    
1515      Family      Family
1516      ContainsFeature      IsFamilyForFeature
1517    
1518  The source information for these relations is taken from the C<families_for_protein>,  The source information for these relations is taken from the C<families_for_protein>,
1519  C<family_function>, and C<sz_family> methods of the B<FIG> object.  C<family_function>, and C<sz_family> methods of the B<FIG> object.
# Line 1530  Line 1537 
1537      my $genomeHash = $self->{genomes};      my $genomeHash = $self->{genomes};
1538      # Create load objects for the tables we're loading.      # Create load objects for the tables we're loading.
1539      my $loadFamily = $self->_TableLoader('Family');      my $loadFamily = $self->_TableLoader('Family');
1540      my $loadContainsFeature = $self->_TableLoader('ContainsFeature');      my $loadIsFamilyForFeature = $self->_TableLoader('IsFamilyForFeature');
1541      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1542          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1543      } else {      } else {
# Line 1542  Line 1549 
1549              Trace("Processing features for $genomeID.") if T(2);              Trace("Processing features for $genomeID.") if T(2);
1550              # Loop through this genome's PEGs.              # Loop through this genome's PEGs.
1551              for my $fid ($fig->all_features($genomeID, "peg")) {              for my $fid ($fig->all_features($genomeID, "peg")) {
1552                  $loadContainsFeature->Add("features", 1);                  $loadIsFamilyForFeature->Add("features", 1);
1553                  # Get this feature's families.                  # Get this feature's families.
1554                  my @families = $fig->families_for_protein($fid);                  my @families = $fig->families_for_protein($fid);
1555                  # Loop through the families, connecting them to the feature.                  # Loop through the families, connecting them to the feature.
1556                  for my $family (@families) {                  for my $family (@families) {
1557                      $loadContainsFeature->Put($family, $fid);                      $loadIsFamilyForFeature->Put($family, $fid);
1558                      # If this is a new family, create a record for it.                      # If this is a new family, create a record for it.
1559                      if (! exists $familyHash{$family}) {                      if (! exists $familyHash{$family}) {
1560                          $familyHash{$family} = 1;                          $familyHash{$family} = 1;

Legend:
Removed from v.1.62  
changed lines
  Added in v.1.68

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3