[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.48, Fri Jul 7 00:24:16 2006 UTC revision 1.56, Fri Jul 14 01:37:07 2006 UTC
# Line 136  Line 136 
136      # We only need it if load-only is NOT specified.      # We only need it if load-only is NOT specified.
137      if (! $options->{loadOnly}) {      if (! $options->{loadOnly}) {
138          if (! defined $subsysFile || $subsysFile eq '') {          if (! defined $subsysFile || $subsysFile eq '') {
139              # Here we want all the NMPDR subsystems. First we get the whole list.              # Here we want all the usable subsystems. First we get the whole list.
140              my @subs = $fig->all_subsystems();              my @subs = $fig->all_subsystems();
141              # Loop through, checking for the NMPDR file.              # Loop through, checking for usability.
142              for my $sub (@subs) {              for my $sub (@subs) {
143                  if (-e "$FIG_Config::data/Subsystems/$sub/NMPDR") {                  if ($fig->usable_subsystem($sub)) {
144                      $subsystems{$sub} = 1;                      $subsystems{$sub} = 1;
145                  }                  }
146              }              }
# Line 340  Line 340 
340      my $fig = $self->{fig};      my $fig = $self->{fig};
341      # Get the genome hash.      # Get the genome hash.
342      my $genomeFilter = $self->{genomes};      my $genomeFilter = $self->{genomes};
343      my $genomeCount = (keys %{$genomeFilter});      # Set up an ID counter for the PCHs.
344      my $featureCount = $genomeCount * 4000;      my $pchID = 0;
345      # Start the loads.      # Start the loads.
346      my $loadCoupling = $self->_TableLoader('Coupling');      my $loadCoupling = $self->_TableLoader('Coupling');
347      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);      my $loadIsEvidencedBy = $self->_TableLoader('IsEvidencedBy', $self->PrimaryOnly);
# Line 411  Line 411 
411                              }                              }
412                          }                          }
413                          for my $evidenceID (keys %evidenceMap) {                          for my $evidenceID (keys %evidenceMap) {
414                                # Get the ID for this evidence.
415                                $pchID++;
416                              # Create the evidence record.                              # Create the evidence record.
417                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};                              my ($peg3, $peg4, $usage) = @{$evidenceMap{$evidenceID}};
418                              $loadPCH->Put($evidenceID, $usage);                              $loadPCH->Put($pchID, $usage);
419                              # Connect it to the coupling.                              # Connect it to the coupling.
420                              $loadIsEvidencedBy->Put($coupleID, $evidenceID);                              $loadIsEvidencedBy->Put($coupleID, $pchID);
421                              # Connect it to the features.                              # Connect it to the features.
422                              $loadUsesAsEvidence->Put($evidenceID, $peg3, 1);                              $loadUsesAsEvidence->Put($pchID, $peg3, 1);
423                              $loadUsesAsEvidence->Put($evidenceID, $peg4, 2);                              $loadUsesAsEvidence->Put($pchID, $peg4, 2);
424                          }                          }
425                      }                      }
426                  }                  }
# Line 486  Line 488 
488              $loadFeature->Add("genomeIn");              $loadFeature->Add("genomeIn");
489              # Get the feature list for this genome.              # Get the feature list for this genome.
490              my $features = $fig->all_features_detailed($genomeID);              my $features = $fig->all_features_detailed($genomeID);
491                # Sort and count the list.
492                my @featureData = sort { $a->[0] cmp $b->[0] } @{$features};
493                my $count = scalar @featureData;
494                Trace("$count features found for genome $genomeID.") if T(3);
495                # Set up for our duplicate-feature check.
496                my $oldFeatureID = "";
497              # Loop through the features.              # Loop through the features.
498              for my $featureData (@{$features}) {              for my $featureData (@{$features}) {
                 $loadFeature->Add("featureIn");  
499                  # Split the tuple.                  # Split the tuple.
500                  my ($featureID, $locations, undef, $type) = @{$featureData};                  my ($featureID, $locations, undef, $type) = @{$featureData};
501                    # Check for duplicates.
502                    if ($featureID eq $oldFeatureID) {
503                        Trace("Duplicate feature $featureID found.") if T(1);
504                    } else {
505                        $oldFeatureID = $featureID;
506                        # Count this feature.
507                        $loadFeature->Add("featureIn");
508                  # Create the feature record.                  # Create the feature record.
509                  $loadFeature->Put($featureID, 1, $type);                  $loadFeature->Put($featureID, 1, $type);
510                  # Link it to the parent genome.                  # Link it to the parent genome.
# Line 546  Line 560 
560              }              }
561          }          }
562      }      }
563        }
564      # Finish the loads.      # Finish the loads.
565      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
566      return $retVal;      return $retVal;
# Line 1414  Line 1429 
1429      IsSynonymGroupFor      IsSynonymGroupFor
1430    
1431  The source information for these relations is taken from the C<maps_to_id> method  The source information for these relations is taken from the C<maps_to_id> method
1432  of the B<FIG> object. The process starts from the features, so it is possible  of the B<FIG> object. Unfortunately, to make this work, we need to use direct
1433  that there will be duplicates in the SynonymGroup load file, since the relationship  SQL against the FIG database.
 is one-to-many toward the features. The automatic sort on primary entity relations  
 will fix this for us.  
1434    
1435  =over 4  =over 4
1436    
# Line 1443  Line 1456 
1456          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1457      } else {      } else {
1458          Trace("Generating synonym group data.") if T(2);          Trace("Generating synonym group data.") if T(2);
1459          # Loop through the genomes.          # Get the database handle.
1460          for my $genomeID (sort keys %{$genomeHash}) {          my $dbh = $fig->db_handle();
1461              Trace("Processing $genomeID.") if T(3);          # Ask for the synonyms.
1462              # Get all of the features for this genome. The only method that does this is          my $sth = $dbh->prepare_command("SELECT syn_id, maps_to FROM peg_synonyms ORDER BY syn_id");
1463              # all_features_detailed, which returns extra baggage that we discard.          my $result = $sth->execute();
1464              my $featureData = $fig->all_features_detailed($genomeID);          if (! defined($result)) {
1465              my @fids = map { $_->[0] } @{$featureData};              Confess("Database error in Synonym load: " . $sth->errstr());
1466              Trace(scalar(@fids) . " features found for genome $genomeID.") if T(3);          } else {
1467              # Loop through the feature IDs.              # Remember the current synonym.
1468              for my $fid (@fids) {              my $current_syn = "";
1469                  # Get the group for this feature.              # Count the features.
1470                  my $synonym = $fig->maps_to_id($fid);              my $featureCount = 0;
1471                  # Only proceed if the synonym is a real group.              # Loop through the synonym/peg pairs.
1472                  if ($synonym ne $fid) {              while (my @row = $sth->fetchrow()) {
1473                      $loadSynonymGroup->Put($synonym);                  # Get the synonym ID and feature ID.
1474                      $loadIsSynonymGroupFor->Put($synonym, $fid);                  my ($syn_id, $peg) = @row;
1475                    # Insure it's for one of our genomes.
1476                    my $genomeID = FIG::genome_of($peg);
1477                    if (exists $genomeHash->{$genomeID}) {
1478                        # Verify the synonym.
1479                        if ($syn_id ne $current_syn) {
1480                            # It's new, so put it in the group table.
1481                            $loadSynonymGroup->Put($syn_id);
1482                            $current_syn = $syn_id;
1483                        }
1484                        # Connect the synonym to the peg.
1485                        $loadIsSynonymGroupFor->Put($syn_id, $peg);
1486                        # Count this feature.
1487                        $featureCount++;
1488                        if ($featureCount % 1000 == 0) {
1489                            Trace("$featureCount features processed.") if T(3);
1490                        }
1491                  }                  }
1492              }              }
1493          }          }
# Line 1570  Line 1599 
1599          $retVal->Accumulate($stats);          $retVal->Accumulate($stats);
1600          Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);          Trace("Statistics for $relName:\n" . $stats->Show()) if T(2);
1601          }          }
     }  
1602      # Return the load statistics.      # Return the load statistics.
1603      return $retVal;      return $retVal;
1604  }  }

Legend:
Removed from v.1.48  
changed lines
  Added in v.1.56

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3