[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.46, Mon Jun 26 19:13:16 2006 UTC revision 1.56, Fri Jul 14 01:37:07 2006 UTC
# Line 136  Line 136 
136      # We only need it if load-only is NOT specified.      # We only need it if load-only is NOT specified.
137      if (! $options->{loadOnly}) {      if (! $options->{loadOnly}) {
138          if (! defined $subsysFile || $subsysFile eq '') {          if (! defined $subsysFile || $subsysFile eq '') {
139              # Here we want all the NMPDR subsystems. First we get the whole list.              # Here we want all the usable subsystems. First we get the whole list.
140              my @subs = $fig->all_subsystems();              my @subs = $fig->all_subsystems();
141              # Loop through, checking for the NMPDR file.              # Loop through, checking for usability.
142              for my $sub (@subs) {              for my $sub (@subs) {
143                  if (-e "$FIG_Config::data/Subsystems/$sub/NMPDR") {                  if ($fig->usable_subsystem($sub)) {
144                      $subsystems{$sub} = 1;                      $subsystems{$sub} = 1;
145                  }                  }
146              }              }
# Line 340  Line 340 
340      my $fig = $self->{fig};      my $fig = $self->{fig};
341      # Get the genome hash.      # Get the genome hash.
342      my $genomeFilter = $self->{genomes};      my $genomeFilter = $self->{genomes};
343      my $genomeCount = (keys %{$genomeFilter});      # Set up an ID counter for the PCHs.
344      my $featureCount = $genomeCount * 4000;      my $pchID = 0;
345      # Start the loads.      # Start the loads.
346      my $loadCoupling = $self->_TableLoader('Coupling');      my $loadCoupling = $self->_TableLoader('Coupling');
347      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);
# Line 375  Line 375 
375                  for my $coupleData (@couplings) {                  for my $coupleData (@couplings) {
376                      my ($peg2, $score) = @{$coupleData};                      my ($peg2, $score) = @{$coupleData};
377                      # Compute the coupling ID.                      # Compute the coupling ID.
378                      my $coupleID = Sprout::CouplingID($peg1, $peg2);                      my $coupleID = $self->{erdb}->CouplingID($peg1, $peg2);
379                      if (! exists $dupHash{$coupleID}) {                      if (! exists $dupHash{$coupleID}) {
380                          $loadCoupling->Add("couplingIn");                          $loadCoupling->Add("couplingIn");
381                          # Here we have a new coupling to store in the load files.                          # Here we have a new coupling to store in the load files.
# Line 411  Line 411 
411                              }                              }
412                          }                          }
413                          for my $evidenceID (keys %evidenceMap) {                          for my $evidenceID (keys %evidenceMap) {
414                                # Get the ID for this evidence.
415                                $pchID++;
416                              # Create the evidence record.                              # Create the evidence record.
417                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};
418                              $loadPCH->Put($evidenceID, $usage);                              $loadPCH->Put($pchID, $usage);
419                              # Connect it to the coupling.                              # Connect it to the coupling.
420                              $loadIsEvidencedBy->Put($coupleID, $evidenceID);                              $loadIsEvidencedBy->Put($coupleID, $pchID);
421                              # Connect it to the features.                              # Connect it to the features.
422                              $loadUsesAsEvidence->Put($evidenceID, $peg3, 1);                              $loadUsesAsEvidence->Put($pchID, $peg3, 1);
423                              $loadUsesAsEvidence->Put($evidenceID, $peg4, 2);                              $loadUsesAsEvidence->Put($pchID, $peg4, 2);
424                          }                          }
425                      }                      }
426                  }                  }
# Line 486  Line 488 
488              $loadFeature->Add("genomeIn");              $loadFeature->Add("genomeIn");
489              # Get the feature list for this genome.              # Get the feature list for this genome.
490              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
491                # Sort and count the list.
492                my @featureData = sort { $a->[0] cmp $b->[0] } @{$features};
493                my $count = scalar @featureData;
494                Trace("$count features found for genome $genomeID.") if T(3);
495                # Set up for our duplicate-feature check.
496                my $oldFeatureID = "";
497              # Loop through the features.              # Loop through the features.
498              for my $featureData (@{$features}) {              for my $featureData (@{$features}) {
                 $loadFeature->Add("featureIn");  
499                  # Split the tuple.                  # Split the tuple.
500                  my ($featureID, $locations, undef, $type) = @{$featureData};                  my ($featureID, $locations, undef, $type) = @{$featureData};
501                    # Check for duplicates.
502                    if ($featureID eq $oldFeatureID) {
503                        Trace("Duplicate feature $featureID found.") if T(1);
504                    } else {
505                        $oldFeatureID = $featureID;
506                        # Count this feature.
507                        $loadFeature->Add("featureIn");
508                  # Create the feature record.                  # Create the feature record.
509                  $loadFeature->Put($featureID, 1, $type);                  $loadFeature->Put($featureID, 1, $type);
510                  # Link it to the parent genome.                  # Link it to the parent genome.
# Line 546  Line 560 
560              }              }
561          }          }
562      }      }
563        }
564      # Finish the loads.      # Finish the loads.
565      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
566      return $retVal;      return $retVal;
# Line 1414  Line 1429 
1429      IsSynonymGroupFor      IsSynonymGroupFor
1430    
1431  The source information for these relations is taken from the C<maps_to_id> method  The source information for these relations is taken from the C<maps_to_id> method
1432  of the B<FIG> object. The process starts from the features, so it is possible  of the B<FIG> object. Unfortunately, to make this work, we need to use direct
1433  that there will be duplicates in the SynonymGroup load file, since the relationship  SQL against the FIG database.
 is one-to-many toward the features. The automatic sort on primary entity relations  
 will fix this for us.  
1434    
1435  =over 4  =over 4
1436    
# Line 1443  Line 1456 
1456          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1457      } else {      } else {
1458          Trace("Generating synonym group data.") if T(2);          Trace("Generating synonym group data.") if T(2);
1459          # Loop through the genomes.          # Get the database handle.
1460          for my $genomeID (sort keys %{$genomeHash}) {          my $dbh = $fig->db_handle();
1461              Trace("Processing $genomeID.") if T(3);          # Ask for the synonyms.
1462              # Get all of the features for this genome. The only method that does this is          my $sth = $dbh->prepare_command("SELECT syn_id, maps_to FROM peg_synonyms ORDER BY syn_id");
1463              # all_features_detailed, which returns extra baggage that we discard.          my $result = $sth->execute();
1464              my $featureData = $fig->all_features_detailed($genomeID);          if (! defined($result)) {
1465              my @fids = map { $_->[0] } @{$featureData};              Confess("Database error in Synonym load: " . $sth->errstr());
1466              Trace(scalar(@fids) . " features found for genome $genomeID.") if T(3);          } else {
1467              # Loop through the feature IDs.              # Remember the current synonym.
1468              for my $fid (@fids) {              my $current_syn = "";
1469                  # Get the group for this feature.              # Count the features.
1470                  my $synonym = $fig->maps_to_id($fid);              my $featureCount = 0;
1471                  # Only proceed if the synonym is a real group.              # Loop through the synonym/peg pairs.
1472                  if ($synonym ne $fid) {              while (my @row = $sth->fetchrow()) {
1473                      $loadSynonymGroup->Put($synonym);                  # Get the synonym ID and feature ID.
1474                      $loadIsSynonymGroupFor->Put($synonym, $fid);                  my ($syn_id, $peg) = @row;
1475                    # Insure it's for one of our genomes.
1476                    my $genomeID = FIG::genome_of($peg);
1477                    if (exists $genomeHash->{$genomeID}) {
1478                        # Verify the synonym.
1479                        if ($syn_id ne $current_syn) {
1480                            # It's new, so put it in the group table.
1481                            $loadSynonymGroup->Put($syn_id);
1482                            $current_syn = $syn_id;
1483                        }
1484                        # Connect the synonym to the peg.
1485                        $loadIsSynonymGroupFor->Put($syn_id, $peg);
1486                        # Count this feature.
1487                        $featureCount++;
1488                        if ($featureCount % 1000 == 0) {
1489                            Trace("$featureCount features processed.") if T(3);
1490                        }
1491                  }                  }
1492              }              }
1493          }          }
# Line 1536  Line 1565 
1565      my $retVal = Stats->new();      my $retVal = Stats->new();
1566      # Get the loader list.      # Get the loader list.
1567      my $loadList = $self->{loaders};      my $loadList = $self->{loaders};
1568        # Create a hash to hold the statistics objects, keyed on relation name.
1569        my %loaderHash = ();
1570      # Loop through the list, finishing the loads. Note that if the finish fails, we die      # Loop through the list, finishing the loads. Note that if the finish fails, we die
1571      # ignominiously. At some future point, we want to make the loads restartable.      # ignominiously. At some future point, we want to make the loads more restartable.
1572      while (my $loader = pop @{$loadList}) {      while (my $loader = pop @{$loadList}) {
1573          # Get the relation name.          # Get the relation name.
1574          my $relName = $loader->RelName;          my $relName = $loader->RelName;
# Line 1548  Line 1579 
1579              # Here we really need to finish.              # Here we really need to finish.
1580              Trace("Finishing $relName.") if T(2);              Trace("Finishing $relName.") if T(2);
1581              my $stats = $loader->Finish();              my $stats = $loader->Finish();
1582                $loaderHash{$relName} = $stats;
1583            }
1584        }
1585        # Now we loop through again, actually loading the tables. We want to finish before
1586        # loading so that if something goes wrong at this point, all the load files are usable
1587        # and we don't have to redo all that work.
1588        for my $relName (sort keys %loaderHash) {
1589            # Get the statistics for this relation.
1590            my $stats = $loaderHash{$relName};
1591            # Check for a database load.
1592              if ($self->{options}->{dbLoad}) {              if ($self->{options}->{dbLoad}) {
1593                  # Here we want to use the load file just created to load the database.                  # Here we want to use the load file just created to load the database.
1594                  Trace("Loading relation $relName.") if T(2);                  Trace("Loading relation $relName.") if T(2);
# Line 1558  Line 1599 
1599              $retVal->Accumulate($stats);              $retVal->Accumulate($stats);
1600              Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);              Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);
1601          }          }
     }  
1602      # Return the load statistics.      # Return the load statistics.
1603      return $retVal;      return $retVal;
1604  }  }

Legend:
Removed from v.1.46  
changed lines
  Added in v.1.56

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3