375 |
for my $coupleData (@couplings) { |
for my $coupleData (@couplings) { |
376 |
my ($peg2, $score) = @{$coupleData}; |
my ($peg2, $score) = @{$coupleData}; |
377 |
# Compute the coupling ID. |
# Compute the coupling ID. |
378 |
my $coupleID = Sprout::CouplingID($peg1, $peg2); |
my $coupleID = $self->{erdb}->CouplingID($peg1, $peg2); |
379 |
if (! exists $dupHash{$coupleID}) { |
if (! exists $dupHash{$coupleID}) { |
380 |
$loadCoupling->Add("couplingIn"); |
$loadCoupling->Add("couplingIn"); |
381 |
# Here we have a new coupling to store in the load files. |
# Here we have a new coupling to store in the load files. |
634 |
The following relations are loaded by this method. |
The following relations are loaded by this method. |
635 |
|
|
636 |
Subsystem |
Subsystem |
637 |
|
SubsystemClass |
638 |
Role |
Role |
639 |
RoleEC |
RoleEC |
640 |
SSCell |
SSCell |
697 |
my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly); |
my $loadConsistsOfGenomes = $self->_TableLoader('ConsistsOfGenomes', $self->PrimaryOnly); |
698 |
my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly); |
my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset', $self->PrimaryOnly); |
699 |
my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly); |
my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset', $self->PrimaryOnly); |
700 |
|
my $loadSubsystemClass = $self->_TableLoader('SubsystemClass', $self->PrimaryOnly); |
701 |
if ($self->{options}->{loadOnly}) { |
if ($self->{options}->{loadOnly}) { |
702 |
Trace("Loading from existing files.") if T(2); |
Trace("Loading from existing files.") if T(2); |
703 |
} else { |
} else { |
723 |
my $curator = $sub->get_curator(); |
my $curator = $sub->get_curator(); |
724 |
my $notes = $sub->get_notes(); |
my $notes = $sub->get_notes(); |
725 |
$loadSubsystem->Put($subsysID, $curator, $notes); |
$loadSubsystem->Put($subsysID, $curator, $notes); |
726 |
|
my $class = $fig->subsystem_classification($subsysID); |
727 |
|
if ($class) { |
728 |
|
$loadSubsystemClass->Put($subsysID, $class); |
729 |
|
} |
730 |
# Connect it to its roles. Each role is a column in the subsystem spreadsheet. |
# Connect it to its roles. Each role is a column in the subsystem spreadsheet. |
731 |
for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) { |
for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) { |
732 |
# Connect to this role. |
# Connect to this role. |
791 |
if ($pegCount > 0) { |
if ($pegCount > 0) { |
792 |
Trace("$pegCount PEGs in $cellCount cells for $genomeID.") if T(3); |
Trace("$pegCount PEGs in $cellCount cells for $genomeID.") if T(3); |
793 |
$loadParticipatesIn->Put($genomeID, $subsysID, $variantCode); |
$loadParticipatesIn->Put($genomeID, $subsysID, $variantCode); |
|
# Partition the PEGs found into clusters. |
|
|
my @clusters = $fig->compute_clusters(\@pegsFound, $sub); |
|
794 |
# Create a hash mapping PEG IDs to cluster numbers. |
# Create a hash mapping PEG IDs to cluster numbers. |
795 |
# We default to -1 for all of them. |
# We default to -1 for all of them. |
796 |
my %clusterOf = map { $_ => -1 } @pegsFound; |
my %clusterOf = map { $_ => -1 } @pegsFound; |
797 |
|
# Partition the PEGs found into clusters. |
798 |
|
my @clusters = $fig->compute_clusters([keys %clusterOf], $sub); |
799 |
for (my $i = 0; $i <= $#clusters; $i++) { |
for (my $i = 0; $i <= $#clusters; $i++) { |
800 |
my $subList = $clusters[$i]; |
my $subList = $clusters[$i]; |
801 |
for my $peg (@{$subList}) { |
for my $peg (@{$subList}) { |
1048 |
# Get the annotation tuple. |
# Get the annotation tuple. |
1049 |
my ($peg, $timestamp, $user, $text) = @{$tuple}; |
my ($peg, $timestamp, $user, $text) = @{$tuple}; |
1050 |
# Here we fix up the annotation text. "\r" is removed, |
# Here we fix up the annotation text. "\r" is removed, |
1051 |
# and "\t" and "\n" are escaped. Note we use the "s" |
# and "\t" and "\n" are escaped. Note we use the "gs" |
1052 |
# modifier so that new-lines inside the text do not |
# modifier so that new-lines inside the text do not |
1053 |
# stop the substitution search. |
# stop the substitution search. |
1054 |
$text =~ s/\r//gs; |
$text =~ s/\r//gs; |
1402 |
return $retVal; |
return $retVal; |
1403 |
} |
} |
1404 |
|
|
1405 |
|
=head3 LoadSynonymData |
1406 |
|
|
1407 |
|
C<< my $stats = $spl->LoadSynonymData(); >> |
1408 |
|
|
1409 |
|
Load the synonym groups into Sprout. |
1410 |
|
|
1411 |
|
The following relations are loaded by this method. |
1412 |
|
|
1413 |
|
SynonymGroup |
1414 |
|
IsSynonymGroupFor |
1415 |
|
|
1416 |
|
The source information for these relations is taken from the C<maps_to_id> method |
1417 |
|
of the B<FIG> object. The process starts from the features, so it is possible |
1418 |
|
that there will be duplicates in the SynonymGroup load file, since the relationship |
1419 |
|
is one-to-many toward the features. The automatic sort on primary entity relations |
1420 |
|
will fix this for us. |
1421 |
|
|
1422 |
|
=over 4 |
1423 |
|
|
1424 |
|
=item RETURNS |
1425 |
|
|
1426 |
|
Returns a statistics object for the loads. |
1427 |
|
|
1428 |
|
=back |
1429 |
|
|
1430 |
|
=cut |
1431 |
|
#: Return Type $%; |
1432 |
|
sub LoadSynonymData { |
1433 |
|
# Get this object instance. |
1434 |
|
my ($self) = @_; |
1435 |
|
# Get the FIG object. |
1436 |
|
my $fig = $self->{fig}; |
1437 |
|
# Get the genome hash. |
1438 |
|
my $genomeHash = $self->{genomes}; |
1439 |
|
# Create a load object for the table we're loading. |
1440 |
|
my $loadSynonymGroup = $self->_TableLoader('SynonymGroup'); |
1441 |
|
my $loadIsSynonymGroupFor = $self->_TableLoader('IsSynonymGroupFor'); |
1442 |
|
if ($self->{options}->{loadOnly}) { |
1443 |
|
Trace("Loading from existing files.") if T(2); |
1444 |
|
} else { |
1445 |
|
Trace("Generating synonym group data.") if T(2); |
1446 |
|
# Loop through the genomes. |
1447 |
|
for my $genomeID (sort keys %{$genomeHash}) { |
1448 |
|
Trace("Processing $genomeID.") if T(3); |
1449 |
|
# Get all of the features for this genome. The only method that does this is |
1450 |
|
# all_features_detailed, which returns extra baggage that we discard. |
1451 |
|
my $featureData = $fig->all_features_detailed($genomeID); |
1452 |
|
my @fids = map { $_->[0] } @{$featureData}; |
1453 |
|
Trace(scalar(@fids) . " features found for genome $genomeID.") if T(3); |
1454 |
|
# Loop through the feature IDs. |
1455 |
|
for my $fid (@fids) { |
1456 |
|
# Get the group for this feature. |
1457 |
|
my $synonym = $fig->maps_to_id($fid); |
1458 |
|
# Only proceed if the synonym is a real group. |
1459 |
|
if ($synonym ne $fid) { |
1460 |
|
$loadSynonymGroup->Put($synonym); |
1461 |
|
$loadIsSynonymGroupFor->Put($synonym, $fid); |
1462 |
|
} |
1463 |
|
} |
1464 |
|
} |
1465 |
|
} |
1466 |
|
# Finish the load. |
1467 |
|
my $retVal = $self->_FinishAll(); |
1468 |
|
return $retVal; |
1469 |
|
} |
1470 |
|
|
1471 |
|
|
1472 |
=head2 Internal Utility Methods |
=head2 Internal Utility Methods |
1473 |
|
|
1474 |
=head3 TableLoader |
=head3 TableLoader |