[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.40, Thu Jun 8 15:37:32 2006 UTC revision 1.53, Mon Jul 10 21:55:06 2006 UTC
# Line 340  Line 340 
340      my $fig = $self->{fig};      my $fig = $self->{fig};
341      # Get the genome hash.      # Get the genome hash.
342      my $genomeFilter = $self->{genomes};      my $genomeFilter = $self->{genomes};
343      my $genomeCount = (keys %{$genomeFilter});      # Set up an ID counter for the PCHs.
344      my $featureCount = $genomeCount * 4000;      my $pchID = 0;
345      # Start the loads.      # Start the loads.
346      my $loadCoupling = $self->_TableLoader('Coupling');      my $loadCoupling = $self->_TableLoader('Coupling');
347      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);
# Line 375  Line 375 
375                  for my $coupleData (@couplings) {                  for my $coupleData (@couplings) {
376                      my ($peg2, $score) = @{$coupleData};                      my ($peg2, $score) = @{$coupleData};
377                      # Compute the coupling ID.                      # Compute the coupling ID.
378                      my $coupleID = Sprout::CouplingID($peg1, $peg2);                      my $coupleID = $self->{erdb}->CouplingID($peg1, $peg2);
379                      if (! exists $dupHash{$coupleID}) {                      if (! exists $dupHash{$coupleID}) {
380                          $loadCoupling->Add("couplingIn");                          $loadCoupling->Add("couplingIn");
381                          # Here we have a new coupling to store in the load files.                          # Here we have a new coupling to store in the load files.
# Line 411  Line 411 
411                              }                              }
412                          }                          }
413                          for my $evidenceID (keys %evidenceMap) {                          for my $evidenceID (keys %evidenceMap) {
414                                # Get the ID for this evidence.
415                                $pchID++;
416                              # Create the evidence record.                              # Create the evidence record.
417                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};
418                              $loadPCH->Put($evidenceID, $usage);                              $loadPCH->Put($pchID, $usage);
419                              # Connect it to the coupling.                              # Connect it to the coupling.
420                              $loadIsEvidencedBy->Put($coupleID, $evidenceID);                              $loadIsEvidencedBy->Put($coupleID, $pchID);
421                              # Connect it to the features.                              # Connect it to the features.
422                              $loadUsesAsEvidence->Put($evidenceID, $peg3, 1);                              $loadUsesAsEvidence->Put($pchID, $peg3, 1);
423                              $loadUsesAsEvidence->Put($evidenceID, $peg4, 2);                              $loadUsesAsEvidence->Put($pchID, $peg4, 2);
424                          }                          }
425                      }                      }
426                  }                  }
# Line 634  Line 636 
636  The following relations are loaded by this method.  The following relations are loaded by this method.
637    
638      Subsystem      Subsystem
639        SubsystemClass
640      Role      Role
641      RoleEC      RoleEC
642      SSCell      SSCell
# Line 696  Line 699 
699      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly);      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly);
700      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly);      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly);
701      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly);      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly);
702        my $loadSubsystemClass = $self->_TableLoader('SubsystemClass', $self->PrimaryOnly);
703      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
704          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
705      } else {      } else {
# Line 721  Line 725 
725                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
726                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
727                  $loadSubsystem->Put($subsysID, $curator, $notes);                  $loadSubsystem->Put($subsysID, $curator, $notes);
728                    my $class = $fig->subsystem_classification($subsysID);
729                    if ($class) {
730                        $loadSubsystemClass->Put($subsysID, $class);
731                    }
732                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
733                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
734                      # Connect to this role.                      # Connect to this role.
# Line 785  Line 793 
793                          if ($pegCount > 0) {                          if ($pegCount > 0) {
794                              Trace("$pegCount PEGs in $cellCount cells for $genomeID.") if T(3);                              Trace("$pegCount PEGs in $cellCount cells for $genomeID.") if T(3);
795                              $loadParticipatesIn->Put($genomeID, $subsysID, $variantCode);                              $loadParticipatesIn->Put($genomeID, $subsysID, $variantCode);
                             # Partition the PEGs found into clusters.  
                             my @clusters = $fig->compute_clusters(\@pegsFound, $sub);  
796                              # Create a hash mapping PEG IDs to cluster numbers.                              # Create a hash mapping PEG IDs to cluster numbers.
797                              # We default to -1 for all of them.                              # We default to -1 for all of them.
798                              my %clusterOf = map { $_ => -1 } @pegsFound;                              my %clusterOf = map { $_ => -1 } @pegsFound;
799                                # Partition the PEGs found into clusters.
800                                my @clusters = $fig->compute_clusters([keys %clusterOf], $sub);
801                              for (my $i = 0; $i <= $#clusters; $i++) {                              for (my $i = 0; $i <= $#clusters; $i++) {
802                                  my $subList = $clusters[$i];                                  my $subList = $clusters[$i];
803                                  for my $peg (@{$subList}) {                                  for my $peg (@{$subList}) {
# Line 1042  Line 1050 
1050                  # Get the annotation tuple.                  # Get the annotation tuple.
1051                  my ($peg, $timestamp, $user, $text) = @{$tuple};                  my ($peg, $timestamp, $user, $text) = @{$tuple};
1052                  # Here we fix up the annotation text. "\r" is removed,                  # Here we fix up the annotation text. "\r" is removed,
1053                  # and "\t" and "\n" are escaped. Note we use the "s"                  # and "\t" and "\n" are escaped. Note we use the "gs"
1054                  # modifier so that new-lines inside the text do not                  # modifier so that new-lines inside the text do not
1055                  # stop the substitution search.                  # stop the substitution search.
1056                  $text =~ s/\r//gs;                  $text =~ s/\r//gs;
# Line 1396  Line 1404 
1404      return $retVal;      return $retVal;
1405  }  }
1406    
1407    =head3 LoadSynonymData
1408    
1409    C<< my $stats = $spl->LoadSynonymData(); >>
1410    
1411    Load the synonym groups into Sprout.
1412    
1413    The following relations are loaded by this method.
1414    
1415        SynonymGroup
1416        IsSynonymGroupFor
1417    
1418    The source information for these relations is taken from the C<maps_to_id> method
1419    of the B<FIG> object. The process starts from the features, so it is possible
1420    that there will be duplicates in the SynonymGroup load file, since the relationship
1421    is one-to-many toward the features. The automatic sort on primary entity relations
1422    will fix this for us.
1423    
1424    =over 4
1425    
1426    =item RETURNS
1427    
1428    Returns a statistics object for the loads.
1429    
1430    =back
1431    
1432    =cut
1433    #: Return Type $%;
1434    sub LoadSynonymData {
1435        # Get this object instance.
1436        my ($self) = @_;
1437        # Get the FIG object.
1438        my $fig = $self->{fig};
1439        # Get the genome hash.
1440        my $genomeHash = $self->{genomes};
1441        # Create a load object for the table we're loading.
1442        my $loadSynonymGroup = $self->_TableLoader('SynonymGroup');
1443        my $loadIsSynonymGroupFor = $self->_TableLoader('IsSynonymGroupFor');
1444        if ($self->{options}->{loadOnly}) {
1445            Trace("Loading from existing files.") if T(2);
1446        } else {
1447            Trace("Generating synonym group data.") if T(2);
1448            # Loop through the genomes.
1449            for my $genomeID (sort keys %{$genomeHash}) {
1450                Trace("Processing $genomeID.") if T(3);
1451                # Get all of the features for this genome. The only method that does this is
1452                # all_features_detailed, which returns extra baggage that we discard.
1453                my $featureData = $fig->all_features_detailed($genomeID);
1454                my @fids = map { $_->[0] } @{$featureData};
1455                Trace(scalar(@fids) . " features found for genome $genomeID.") if T(3);
1456                # Loop through the feature IDs.
1457                for my $fid (@fids) {
1458                    # Get the group for this feature.
1459                    my $synonym = $fig->maps_to_id($fid);
1460                    # Only proceed if the synonym is a real group.
1461                    if ($synonym ne $fid) {
1462                        $loadSynonymGroup->Put($synonym);
1463                        $loadIsSynonymGroupFor->Put($synonym, $fid);
1464                    }
1465                }
1466            }
1467        }
1468        # Finish the load.
1469        my $retVal = $self->_FinishAll();
1470        return $retVal;
1471    }
1472    
1473    
1474  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1475    
1476  =head3 TableLoader  =head3 TableLoader
# Line 1463  Line 1538 
1538      my $retVal = Stats->new();      my $retVal = Stats->new();
1539      # Get the loader list.      # Get the loader list.
1540      my $loadList = $self->{loaders};      my $loadList = $self->{loaders};
1541        # Create a hash to hold the statistics objects, keyed on relation name.
1542        my %loaderHash = ();
1543      # Loop through the list, finishing the loads. Note that if the finish fails, we die      # Loop through the list, finishing the loads. Note that if the finish fails, we die
1544      # ignominiously. At some future point, we want to make the loads restartable.      # ignominiously. At some future point, we want to make the loads more restartable.
1545      while (my $loader = pop @{$loadList}) {      while (my $loader = pop @{$loadList}) {
1546          # Get the relation name.          # Get the relation name.
1547          my $relName = $loader->RelName;          my $relName = $loader->RelName;
# Line 1475  Line 1552 
1552              # Here we really need to finish.              # Here we really need to finish.
1553              Trace("Finishing $relName.") if T(2);              Trace("Finishing $relName.") if T(2);
1554              my $stats = $loader->Finish();              my $stats = $loader->Finish();
1555                $loaderHash{$relName} = $stats;
1556            }
1557        }
1558        # Now we loop through again, actually loading the tables. We want to finish before
1559        # loading so that if something goes wrong at this point, all the load files are usable
1560        # and we don't have to redo all that work.
1561        for my $relName (sort keys %loaderHash) {
1562            # Get the statistics for this relation.
1563            my $stats = $loaderHash{$relName};
1564            # Check for a database load.
1565              if ($self->{options}->{dbLoad}) {              if ($self->{options}->{dbLoad}) {
1566                  # Here we want to use the load file just created to load the database.                  # Here we want to use the load file just created to load the database.
1567                  Trace("Loading relation $relName.") if T(2);                  Trace("Loading relation $relName.") if T(2);
# Line 1485  Line 1572 
1572              $retVal->Accumulate($stats);              $retVal->Accumulate($stats);
1573              Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);              Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);
1574          }          }
     }  
1575      # Return the load statistics.      # Return the load statistics.
1576      return $retVal;      return $retVal;
1577  }  }

Legend:
Removed from v.1.40  
changed lines
  Added in v.1.53

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3