[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.57, Sat Jul 15 08:09:13 2006 UTC revision 1.68, Sun Sep 24 17:14:16 2006 UTC
# Line 120  Line 120 
120                      # an omitted access code can be defaulted to 1.                      # an omitted access code can be defaulted to 1.
121                      for my $genomeLine (@genomeList) {                      for my $genomeLine (@genomeList) {
122                          my ($genomeID, $accessCode) = split("\t", $genomeLine);                          my ($genomeID, $accessCode) = split("\t", $genomeLine);
123                          if (undef $accessCode) {                          if (! defined($accessCode)) {
124                              $accessCode = 1;                              $accessCode = 1;
125                          }                          }
126                          $genomes{$genomeID} = $accessCode;                          $genomes{$genomeID} = $accessCode;
# Line 266  Line 266 
266              my $extra = join " ", @extraData;              my $extra = join " ", @extraData;
267              # Get the full taxonomy.              # Get the full taxonomy.
268              my $taxonomy = $fig->taxonomy_of($genomeID);              my $taxonomy = $fig->taxonomy_of($genomeID);
269                # Open the NMPDR group file for this genome.
270                my $group;
271                if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&
272                    defined($group = <TMP>)) {
273                    # Clean the line ending.
274                    chomp $group;
275                } else {
276                    # No group, so use the default.
277                    $group = $FIG_Config::otherGroup;
278                }
279                close TMP;
280              # Output the genome record.              # Output the genome record.
281              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID), $genus,
282                               $species, $extra, $taxonomy);                               $group, $species, $extra, $taxonomy);
283              # Now we loop through each of the genome's contigs.              # Now we loop through each of the genome's contigs.
284              my @contigs = $fig->all_contigs($genomeID);              my @contigs = $fig->all_contigs($genomeID);
285              for my $contigID (@contigs) {              for my $contigID (@contigs) {
# Line 505  Line 516 
516                      $oldFeatureID = $featureID;                      $oldFeatureID = $featureID;
517                      # Count this feature.                      # Count this feature.
518                      $loadFeature->Add("featureIn");                      $loadFeature->Add("featureIn");
519                        # Get the functional assignment.
520                        my $assignment = $fig->function_of($featureID);
521                      # Create the feature record.                      # Create the feature record.
522                      $loadFeature->Put($featureID, 1, $type);                      $loadFeature->Put($featureID, 1, $type, $assignment);
523                      # Link it to the parent genome.                      # Link it to the parent genome.
524                      $loadHasFeature->Put($genomeID, $featureID, $type);                      $loadHasFeature->Put($genomeID, $featureID, $type);
525                      # Create the aliases.                      # Create the aliases.
# Line 609  Line 622 
622              Trace("Processing features for genome $genomeID.") if T(3);              Trace("Processing features for genome $genomeID.") if T(3);
623              # Get the feature list for this genome.              # Get the feature list for this genome.
624              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
625                # Count the BBHs we find.
626                my $bbhCount = 0;
627              # Loop through the features.              # Loop through the features.
628              for my $featureData (@{$features}) {              for my $featureData (@{$features}) {
629                  # Split the tuple.                  # Split the tuple.
# Line 624  Line 639 
639                      if ($genomeHash->{$targetGenomeID}) {                      if ($genomeHash->{$targetGenomeID}) {
640                          $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,                          $loadIsBidirectionalBestHitOf->Put($featureID, $targetID, $targetGenomeID,
641                                                             $score);                                                             $score);
642                            $bbhCount++;
643                      }                      }
644                  }                  }
645              }              }
646                Trace("$bbhCount BBHs found for $genomeID.") if T(3);
647          }          }
648      }      }
649      # Finish the loads.      # Finish the loads.
# Line 738  Line 755 
755                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
756                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
757                  $loadSubsystem->Put($subsysID, $curator, $notes);                  $loadSubsystem->Put($subsysID, $curator, $notes);
758                  my $class = $fig->subsystem_classification($subsysID);                  my $classList = $fig->subsystem_classification($subsysID);
759                  if ($class) {                  my @classes = @$classList;
760                    if (@classes) {
761                        for my $class (@classes) {
762                      $loadSubsystemClass->Put($subsysID, $class);                      $loadSubsystemClass->Put($subsysID, $class);
763                  }                  }
764                    }
765                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
766                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
767                      # Connect to this role.                      # Connect to this role.
# Line 944  Line 964 
964          my %propertyKeys = ();          my %propertyKeys = ();
965          my $nextID = 1;          my $nextID = 1;
966          # Loop through the genomes.          # Loop through the genomes.
967          for my $genomeID (keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
968              $loadProperty->Add("genomeIn");              $loadProperty->Add("genomeIn");
969              Trace("Generating properties for $genomeID.") if T(3);              Trace("Generating properties for $genomeID.") if T(3);
970              # Get the genome's features. The feature ID is the first field in the              # Get the genome's features. The feature ID is the first field in the
# Line 1226  Line 1246 
1246      } else {      } else {
1247          Trace("Generating external data.") if T(2);          Trace("Generating external data.") if T(2);
1248          # We loop through the files one at a time. First, the organism file.          # We loop through the files one at a time. First, the organism file.
1249          Open(\*ORGS, "<$FIG_Config::global/ext_org.table");          Open(\*ORGS, "sort +0 -1 -u -t\"\t\" $FIG_Config::global/ext_org.table |");
1250          my $orgLine;          my $orgLine;
1251          while (defined($orgLine = <ORGS>)) {          while (defined($orgLine = <ORGS>)) {
1252              # Clean the input line.              # Clean the input line.
# Line 1238  Line 1258 
1258          close ORGS;          close ORGS;
1259          # Now the function file.          # Now the function file.
1260          my $funcLine;          my $funcLine;
1261          Open(\*FUNCS, "<$FIG_Config::global/ext_func.table");          Open(\*FUNCS, "sort +0 -1 -u -t\"\t\" $FIG_Config::global/ext_func.table |");
1262          while (defined($funcLine = <FUNCS>)) {          while (defined($funcLine = <FUNCS>)) {
1263              # Clean the line ending.              # Clean the line ending.
1264              chomp $funcLine;              chomp $funcLine;
# Line 1370  Line 1390 
1390    
1391      GenomeGroups      GenomeGroups
1392    
1393  There is no direct support for genome groups in FIG, so we access the SEED  Currently, we do not use groups. We used to use them for NMPDR groups,
1394    butThere is no direct support for genome groups in FIG, so we access the SEED
1395  files directly.  files directly.
1396    
1397  =over 4  =over 4
# Line 1396  Line 1417 
1417          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1418      } else {      } else {
1419          Trace("Generating group data.") if T(2);          Trace("Generating group data.") if T(2);
1420          # Loop through the genomes.          # Currently there are no groups.
         my $line;  
         for my $genomeID (keys %{$genomeHash}) {  
             Trace("Processing $genomeID.") if T(3);  
             # Open the NMPDR group file for this genome.  
             if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&  
                 defined($line = <TMP>)) {  
                 # Clean the line ending.  
                 chomp $line;  
                 # Add the group to the table. Note that there can only be one group  
                 # per genome.  
                 $loadGenomeGroups->Put($genomeID, $line);  
             }  
             close TMP;  
         }  
1421      }      }
1422      # Finish the load.      # Finish the load.
1423      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
# Line 1459  Line 1466 
1466          # Get the database handle.          # Get the database handle.
1467          my $dbh = $fig->db_handle();          my $dbh = $fig->db_handle();
1468          # Ask for the synonyms.          # Ask for the synonyms.
1469          my $sth = $dbh->prepare_command("SELECT syn_id, maps_to FROM peg_synonyms ORDER BY syn_id");          my $sth = $dbh->prepare_command("SELECT maps_to, syn_id FROM peg_synonyms ORDER BY maps_to");
1470          my $result = $sth->execute();          my $result = $sth->execute();
1471          if (! defined($result)) {          if (! defined($result)) {
1472              Confess("Database error in Synonym load: " . $sth->errstr());              Confess("Database error in Synonym load: " . $sth->errstr());
# Line 1497  Line 1504 
1504      return $retVal;      return $retVal;
1505  }  }
1506    
1507    =head3 LoadFamilyData
1508    
1509    C<< my $stats = $spl->LoadFamilyData(); >>
1510    
1511    Load the protein families into Sprout.
1512    
1513    The following relations are loaded by this method.
1514    
1515        Family
1516        IsFamilyForFeature
1517    
1518    The source information for these relations is taken from the C<families_for_protein>,
1519    C<family_function>, and C<sz_family> methods of the B<FIG> object.
1520    
1521    =over 4
1522    
1523    =item RETURNS
1524    
1525    Returns a statistics object for the loads.
1526    
1527    =back
1528    
1529    =cut
1530    #: Return Type $%;
1531    sub LoadFamilyData {
1532        # Get this object instance.
1533        my ($self) = @_;
1534        # Get the FIG object.
1535        my $fig = $self->{fig};
1536        # Get the genome hash.
1537        my $genomeHash = $self->{genomes};
1538        # Create load objects for the tables we're loading.
1539        my $loadFamily = $self->_TableLoader('Family');
1540        my $loadIsFamilyForFeature = $self->_TableLoader('IsFamilyForFeature');
1541        if ($self->{options}->{loadOnly}) {
1542            Trace("Loading from existing files.") if T(2);
1543        } else {
1544            Trace("Generating family data.") if T(2);
1545            # Create a hash for the family IDs.
1546            my %familyHash = ();
1547            # Loop through the genomes.
1548            for my $genomeID (sort keys %{$genomeHash}) {
1549                Trace("Processing features for $genomeID.") if T(2);
1550                # Loop through this genome's PEGs.
1551                for my $fid ($fig->all_features($genomeID, "peg")) {
1552                    $loadIsFamilyForFeature->Add("features", 1);
1553                    # Get this feature's families.
1554                    my @families = $fig->families_for_protein($fid);
1555                    # Loop through the families, connecting them to the feature.
1556                    for my $family (@families) {
1557                        $loadIsFamilyForFeature->Put($family, $fid);
1558                        # If this is a new family, create a record for it.
1559                        if (! exists $familyHash{$family}) {
1560                            $familyHash{$family} = 1;
1561                            $loadFamily->Add("families", 1);
1562                            my $size = $fig->sz_family($family);
1563                            my $func = $fig->family_function($family);
1564                            $loadFamily->Put($family, $size, $func);
1565                        }
1566                    }
1567                }
1568            }
1569        }
1570        # Finish the load.
1571        my $retVal = $self->_FinishAll();
1572        return $retVal;
1573    }
1574    
1575  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1576    

Legend:
Removed from v.1.57  
changed lines
  Added in v.1.68

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3