[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.40, Thu Jun 8 15:37:32 2006 UTC revision 1.54, Thu Jul 13 08:47:34 2006 UTC
# Line 340  Line 340 
340      my $fig = $self->{fig};      my $fig = $self->{fig};
341      # Get the genome hash.      # Get the genome hash.
342      my $genomeFilter = $self->{genomes};      my $genomeFilter = $self->{genomes};
343      my $genomeCount = (keys %{$genomeFilter});      # Set up an ID counter for the PCHs.
344      my $featureCount = $genomeCount * 4000;      my $pchID = 0;
345      # Start the loads.      # Start the loads.
346      my $loadCoupling = $self->_TableLoader('Coupling');      my $loadCoupling = $self->_TableLoader('Coupling');
347      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);
# Line 375  Line 375 
375                  for my $coupleData (@couplings) {                  for my $coupleData (@couplings) {
376                      my ($peg2, $score) = @{$coupleData};                      my ($peg2, $score) = @{$coupleData};
377                      # Compute the coupling ID.                      # Compute the coupling ID.
378                      my $coupleID = Sprout::CouplingID($peg1, $peg2);                      my $coupleID = $self->{erdb}->CouplingID($peg1, $peg2);
379                      if (! exists $dupHash{$coupleID}) {                      if (! exists $dupHash{$coupleID}) {
380                          $loadCoupling->Add("couplingIn");                          $loadCoupling->Add("couplingIn");
381                          # Here we have a new coupling to store in the load files.                          # Here we have a new coupling to store in the load files.
# Line 411  Line 411 
411                              }                              }
412                          }                          }
413                          for my $evidenceID (keys %evidenceMap) {                          for my $evidenceID (keys %evidenceMap) {
414                                # Get the ID for this evidence.
415                                $pchID++;
416                              # Create the evidence record.                              # Create the evidence record.
417                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};
418                              $loadPCH->Put($evidenceID, $usage);                              $loadPCH->Put($pchID, $usage);
419                              # Connect it to the coupling.                              # Connect it to the coupling.
420                              $loadIsEvidencedBy->Put($coupleID, $evidenceID);                              $loadIsEvidencedBy->Put($coupleID, $pchID);
421                              # Connect it to the features.                              # Connect it to the features.
422                              $loadUsesAsEvidence->Put($evidenceID, $peg3, 1);                              $loadUsesAsEvidence->Put($pchID, $peg3, 1);
423                              $loadUsesAsEvidence->Put($evidenceID, $peg4, 2);                              $loadUsesAsEvidence->Put($pchID, $peg4, 2);
424                          }                          }
425                      }                      }
426                  }                  }
# Line 486  Line 488 
488              $loadFeature->Add("genomeIn");              $loadFeature->Add("genomeIn");
489              # Get the feature list for this genome.              # Get the feature list for this genome.
490              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
491                my $count = scalar @{$features};
492                Trace("$count features found for genome $genomeID.") if T(3);
493              # Loop through the features.              # Loop through the features.
494              for my $featureData (@{$features}) {              for my $featureData (@{$features}) {
495                  $loadFeature->Add("featureIn");                  $loadFeature->Add("featureIn");
# Line 634  Line 638 
638  The following relations are loaded by this method.  The following relations are loaded by this method.
639    
640      Subsystem      Subsystem
641        SubsystemClass
642      Role      Role
643      RoleEC      RoleEC
644      SSCell      SSCell
# Line 696  Line 701 
701      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly);      my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly);
702      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly);      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly);
703      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly);      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly);
704        my $loadSubsystemClass = $self->_TableLoader('SubsystemClass', $self->PrimaryOnly);
705      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
706          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
707      } else {      } else {
# Line 721  Line 727 
727                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
728                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
729                  $loadSubsystem->Put($subsysID, $curator, $notes);                  $loadSubsystem->Put($subsysID, $curator, $notes);
730                    my $class = $fig->subsystem_classification($subsysID);
731                    if ($class) {
732                        $loadSubsystemClass->Put($subsysID, $class);
733                    }
734                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.                  # Connect it to its roles. Each role is a column in the subsystem spreadsheet.
735                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
736                      # Connect to this role.                      # Connect to this role.
# Line 785  Line 795 
795                          if ($pegCount > 0) {                          if ($pegCount > 0) {
796                              Trace("$pegCount PEGs in $cellCount cells for $genomeID.") if T(3);                              Trace("$pegCount PEGs in $cellCount cells for $genomeID.") if T(3);
797                              $loadParticipatesIn->Put($genomeID, $subsysID, $variantCode);                              $loadParticipatesIn->Put($genomeID, $subsysID, $variantCode);
                             # Partition the PEGs found into clusters.  
                             my @clusters = $fig->compute_clusters(\@pegsFound, $sub);  
798                              # Create a hash mapping PEG IDs to cluster numbers.                              # Create a hash mapping PEG IDs to cluster numbers.
799                              # We default to -1 for all of them.                              # We default to -1 for all of them.
800                              my %clusterOf = map { $_ => -1 } @pegsFound;                              my %clusterOf = map { $_ => -1 } @pegsFound;
801                                # Partition the PEGs found into clusters.
802                                my @clusters = $fig->compute_clusters([keys %clusterOf], $sub);
803                              for (my $i = 0; $i <= $#clusters; $i++) {                              for (my $i = 0; $i <= $#clusters; $i++) {
804                                  my $subList = $clusters[$i];                                  my $subList = $clusters[$i];
805                                  for my $peg (@{$subList}) {                                  for my $peg (@{$subList}) {
# Line 1042  Line 1052 
1052                  # Get the annotation tuple.                  # Get the annotation tuple.
1053                  my ($peg, $timestamp, $user, $text) = @{$tuple};                  my ($peg, $timestamp, $user, $text) = @{$tuple};
1054                  # Here we fix up the annotation text. "\r" is removed,                  # Here we fix up the annotation text. "\r" is removed,
1055                  # and "\t" and "\n" are escaped. Note we use the "s"                  # and "\t" and "\n" are escaped. Note we use the "gs"
1056                  # modifier so that new-lines inside the text do not                  # modifier so that new-lines inside the text do not
1057                  # stop the substitution search.                  # stop the substitution search.
1058                  $text =~ s/\r//gs;                  $text =~ s/\r//gs;
# Line 1396  Line 1406 
1406      return $retVal;      return $retVal;
1407  }  }
1408    
1409    =head3 LoadSynonymData
1410    
1411    C<< my $stats = $spl->LoadSynonymData(); >>
1412    
1413    Load the synonym groups into Sprout.
1414    
1415    The following relations are loaded by this method.
1416    
1417        SynonymGroup
1418        IsSynonymGroupFor
1419    
1420    The source information for these relations is taken from the C<maps_to_id> method
1421    of the B<FIG> object. The process starts from the features, so it is possible
1422    that there will be duplicates in the SynonymGroup load file, since the relationship
1423    is one-to-many toward the features. The automatic sort on primary entity relations
1424    will fix this for us.
1425    
1426    =over 4
1427    
1428    =item RETURNS
1429    
1430    Returns a statistics object for the loads.
1431    
1432    =back
1433    
1434    =cut
1435    #: Return Type $%;
1436    sub LoadSynonymData {
1437        # Get this object instance.
1438        my ($self) = @_;
1439        # Get the FIG object.
1440        my $fig = $self->{fig};
1441        # Get the genome hash.
1442        my $genomeHash = $self->{genomes};
1443        # Create a load object for the table we're loading.
1444        my $loadSynonymGroup = $self->_TableLoader('SynonymGroup');
1445        my $loadIsSynonymGroupFor = $self->_TableLoader('IsSynonymGroupFor');
1446        if ($self->{options}->{loadOnly}) {
1447            Trace("Loading from existing files.") if T(2);
1448        } else {
1449            Trace("Generating synonym group data.") if T(2);
1450            # Loop through the genomes.
1451            for my $genomeID (sort keys %{$genomeHash}) {
1452                Trace("Processing $genomeID.") if T(3);
1453                # Get all of the features for this genome. The only method that does this is
1454                # all_features_detailed, which returns extra baggage that we discard.
1455                my $featureData = $fig->all_features_detailed($genomeID);
1456                my @fids = map { $_->[0] } @{$featureData};
1457                Trace(scalar(@fids) . " features found for genome $genomeID.") if T(3);
1458                # Loop through the feature IDs.
1459                for my $fid (@fids) {
1460                    # Get the group for this feature.
1461                    my $synonym = $fig->maps_to_id($fid);
1462                    # Only proceed if the synonym is a real group.
1463                    if ($synonym ne $fid) {
1464                        $loadSynonymGroup->Put($synonym);
1465                        $loadIsSynonymGroupFor->Put($synonym, $fid);
1466                    }
1467                }
1468            }
1469        }
1470        # Finish the load.
1471        my $retVal = $self->_FinishAll();
1472        return $retVal;
1473    }
1474    
1475    
1476  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1477    
1478  =head3 TableLoader  =head3 TableLoader
# Line 1463  Line 1540 
1540      my $retVal = Stats->new();      my $retVal = Stats->new();
1541      # Get the loader list.      # Get the loader list.
1542      my $loadList = $self->{loaders};      my $loadList = $self->{loaders};
1543        # Create a hash to hold the statistics objects, keyed on relation name.
1544        my %loaderHash = ();
1545      # Loop through the list, finishing the loads. Note that if the finish fails, we die      # Loop through the list, finishing the loads. Note that if the finish fails, we die
1546      # ignominiously. At some future point, we want to make the loads restartable.      # ignominiously. At some future point, we want to make the loads more restartable.
1547      while (my $loader = pop @{$loadList}) {      while (my $loader = pop @{$loadList}) {
1548          # Get the relation name.          # Get the relation name.
1549          my $relName = $loader->RelName;          my $relName = $loader->RelName;
# Line 1475  Line 1554 
1554              # Here we really need to finish.              # Here we really need to finish.
1555              Trace("Finishing $relName.") if T(2);              Trace("Finishing $relName.") if T(2);
1556              my $stats = $loader->Finish();              my $stats = $loader->Finish();
1557                $loaderHash{$relName} = $stats;
1558            }
1559        }
1560        # Now we loop through again, actually loading the tables. We want to finish before
1561        # loading so that if something goes wrong at this point, all the load files are usable
1562        # and we don't have to redo all that work.
1563        for my $relName (sort keys %loaderHash) {
1564            # Get the statistics for this relation.
1565            my $stats = $loaderHash{$relName};
1566            # Check for a database load.
1567              if ($self->{options}->{dbLoad}) {              if ($self->{options}->{dbLoad}) {
1568                  # Here we want to use the load file just created to load the database.                  # Here we want to use the load file just created to load the database.
1569                  Trace("Loading relation $relName.") if T(2);                  Trace("Loading relation $relName.") if T(2);
# Line 1485  Line 1574 
1574              $retVal->Accumulate($stats);              $retVal->Accumulate($stats);
1575              Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);              Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);
1576          }          }
     }  
1577      # Return the load statistics.      # Return the load statistics.
1578      return $retVal;      return $retVal;
1579  }  }

Legend:
Removed from v.1.40  
changed lines
  Added in v.1.54

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3