[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.56, Fri Jul 14 01:37:07 2006 UTC revision 1.62, Sun Jul 30 05:44:57 2006 UTC
# Line 489  Line 489 
489              # Get the feature list for this genome.              # Get the feature list for this genome.
490              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
491              # Sort and count the list.              # Sort and count the list.
492              my @featureData = sort { $a->[0] cmp $b->[0] } @{$features};              my @featureTuples = sort { $a->[0] cmp $b->[0] } @{$features};
493              my $count = scalar @featureData;              my $count = scalar @featureTuples;
494              Trace("$count features found for genome $genomeID.") if T(3);              Trace("$count features found for genome $genomeID.") if T(3);
495              # Set up for our duplicate-feature check.              # Set up for our duplicate-feature check.
496              my $oldFeatureID = "";              my $oldFeatureID = "";
497              # Loop through the features.              # Loop through the features.
498              for my $featureData (@{$features}) {              for my $featureTuple (@featureTuples) {
499                  # Split the tuple.                  # Split the tuple.
500                  my ($featureID, $locations, undef, $type) = @{$featureData};                  my ($featureID, $locations, undef, $type) = @{$featureTuple};
501                  # Check for duplicates.                  # Check for duplicates.
502                  if ($featureID eq $oldFeatureID) {                  if ($featureID eq $oldFeatureID) {
503                      Trace("Duplicate feature $featureID found.") if T(1);                      Trace("Duplicate feature $featureID found.") if T(1);
# Line 858  Line 858 
858                      }                      }
859                  }                  }
860              }              }
861            }
862              # Now we loop through the diagrams. We need to create the diagram records              # Now we loop through the diagrams. We need to create the diagram records
863              # and link each diagram to its roles. Note that only roles which occur              # and link each diagram to its roles. Note that only roles which occur
864              # in subsystems (and therefore appear in the %ecToRoles hash) are              # in subsystems (and therefore appear in the %ecToRoles hash) are
# Line 891  Line 892 
892                  }                  }
893              }              }
894          }          }
     }  
895      # Finish the load.      # Finish the load.
896      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
897      return $retVal;      return $retVal;
# Line 1226  Line 1226 
1226      } else {      } else {
1227          Trace("Generating external data.") if T(2);          Trace("Generating external data.") if T(2);
1228          # We loop through the files one at a time. First, the organism file.          # We loop through the files one at a time. First, the organism file.
1229          Open(\*ORGS, "<$FIG_Config::global/ext_org.table");          Open(\*ORGS, "sort +0 -1 -u -t\"\t\" $FIG_Config::global/ext_org.table |");
1230          my $orgLine;          my $orgLine;
1231          while (defined($orgLine = <ORGS>)) {          while (defined($orgLine = <ORGS>)) {
1232              # Clean the input line.              # Clean the input line.
# Line 1238  Line 1238 
1238          close ORGS;          close ORGS;
1239          # Now the function file.          # Now the function file.
1240          my $funcLine;          my $funcLine;
1241          Open(\*FUNCS, "<$FIG_Config::global/ext_func.table");          Open(\*FUNCS, "sort +0 -1 -u -t\"\t\" $FIG_Config::global/ext_func.table |");
1242          while (defined($funcLine = <FUNCS>)) {          while (defined($funcLine = <FUNCS>)) {
1243              # Clean the line ending.              # Clean the line ending.
1244              chomp $funcLine;              chomp $funcLine;
# Line 1459  Line 1459 
1459          # Get the database handle.          # Get the database handle.
1460          my $dbh = $fig->db_handle();          my $dbh = $fig->db_handle();
1461          # Ask for the synonyms.          # Ask for the synonyms.
1462          my $sth = $dbh->prepare_command("SELECT syn_id, maps_to FROM peg_synonyms ORDER BY syn_id");          my $sth = $dbh->prepare_command("SELECT maps_to, syn_id FROM peg_synonyms ORDER BY maps_to");
1463          my $result = $sth->execute();          my $result = $sth->execute();
1464          if (! defined($result)) {          if (! defined($result)) {
1465              Confess("Database error in Synonym load: " . $sth->errstr());              Confess("Database error in Synonym load: " . $sth->errstr());
# Line 1497  Line 1497 
1497      return $retVal;      return $retVal;
1498  }  }
1499    
1500    =head3 LoadFamilyData
1501    
1502    C<< my $stats = $spl->LoadFamilyData(); >>
1503    
1504    Load the protein families into Sprout.
1505    
1506    The following relations are loaded by this method.
1507    
1508        Family
1509        ContainsFeature
1510    
1511    The source information for these relations is taken from the C<families_for_protein>,
1512    C<family_function>, and C<sz_family> methods of the B<FIG> object.
1513    
1514    =over 4
1515    
1516    =item RETURNS
1517    
1518    Returns a statistics object for the loads.
1519    
1520    =back
1521    
1522    =cut
1523    #: Return Type $%;
1524    sub LoadFamilyData {
1525        # Get this object instance.
1526        my ($self) = @_;
1527        # Get the FIG object.
1528        my $fig = $self->{fig};
1529        # Get the genome hash.
1530        my $genomeHash = $self->{genomes};
1531        # Create load objects for the tables we're loading.
1532        my $loadFamily = $self->_TableLoader('Family');
1533        my $loadContainsFeature = $self->_TableLoader('ContainsFeature');
1534        if ($self->{options}->{loadOnly}) {
1535            Trace("Loading from existing files.") if T(2);
1536        } else {
1537            Trace("Generating family data.") if T(2);
1538            # Create a hash for the family IDs.
1539            my %familyHash = ();
1540            # Loop through the genomes.
1541            for my $genomeID (sort keys %{$genomeHash}) {
1542                Trace("Processing features for $genomeID.") if T(2);
1543                # Loop through this genome's PEGs.
1544                for my $fid ($fig->all_features($genomeID, "peg")) {
1545                    $loadContainsFeature->Add("features", 1);
1546                    # Get this feature's families.
1547                    my @families = $fig->families_for_protein($fid);
1548                    # Loop through the families, connecting them to the feature.
1549                    for my $family (@families) {
1550                        $loadContainsFeature->Put($family, $fid);
1551                        # If this is a new family, create a record for it.
1552                        if (! exists $familyHash{$family}) {
1553                            $familyHash{$family} = 1;
1554                            $loadFamily->Add("families", 1);
1555                            my $size = $fig->sz_family($family);
1556                            my $func = $fig->family_function($family);
1557                            $loadFamily->Put($family, $size, $func);
1558                        }
1559                    }
1560                }
1561            }
1562        }
1563        # Finish the load.
1564        my $retVal = $self->_FinishAll();
1565        return $retVal;
1566    }
1567    
1568  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1569    

Legend:
Removed from v.1.56  
changed lines
  Added in v.1.62

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3