163 |
Confess("Invalid subsystem parameter in SproutLoad constructor."); |
Confess("Invalid subsystem parameter in SproutLoad constructor."); |
164 |
} |
} |
165 |
} |
} |
166 |
|
# Go through the subsys hash again, creating the keyword list for each subsystem. |
167 |
|
for my $subsystem (keys %subsystems) { |
168 |
|
my $name = $subsystem; |
169 |
|
$name =~ s/_/ /g; |
170 |
|
my $classes = $fig->subsystem_classification($subsystem); |
171 |
|
my @classList = map { " $_" } @{$classes}; |
172 |
|
$name .= join("", @classList); |
173 |
|
$subsystems{$subsystem} = $name; |
174 |
|
} |
175 |
} |
} |
176 |
# Get the data directory from the Sprout object. |
# Get the data directory from the Sprout object. |
177 |
my ($directory) = $sprout->LoadInfo(); |
my ($directory) = $sprout->LoadInfo(); |
484 |
sub LoadFeatureData { |
sub LoadFeatureData { |
485 |
# Get this object instance. |
# Get this object instance. |
486 |
my ($self) = @_; |
my ($self) = @_; |
487 |
# Get the FIG object. |
# Get the FIG and Sprout objects. |
488 |
my $fig = $self->{fig}; |
my $fig = $self->{fig}; |
489 |
|
my $sprout = $self->{sprout}; |
490 |
# Get the table of genome IDs. |
# Get the table of genome IDs. |
491 |
my $genomeHash = $self->{genomes}; |
my $genomeHash = $self->{genomes}; |
492 |
# Create load objects for each of the tables we're loading. |
# Create load objects for each of the tables we're loading. |
498 |
my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream'); |
my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream'); |
499 |
my $loadHasFeature = $self->_TableLoader('HasFeature', $self->PrimaryOnly); |
my $loadHasFeature = $self->_TableLoader('HasFeature', $self->PrimaryOnly); |
500 |
my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem', $self->PrimaryOnly); |
my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem', $self->PrimaryOnly); |
501 |
|
# Get the subsystem hash. |
502 |
|
my $subHash = $self->{subsystems}; |
503 |
# Get the maximum sequence size. We need this later for splitting up the |
# Get the maximum sequence size. We need this later for splitting up the |
504 |
# locations. |
# locations. |
505 |
my $chunkSize = $self->{sprout}->MaxSegment(); |
my $chunkSize = $self->{sprout}->MaxSegment(); |
532 |
$loadFeature->Add("featureIn"); |
$loadFeature->Add("featureIn"); |
533 |
# Get the functional assignment. |
# Get the functional assignment. |
534 |
my $assignment = $fig->function_of($featureID); |
my $assignment = $fig->function_of($featureID); |
535 |
# Create the feature record. |
# Begin building the keywords. |
536 |
$loadFeature->Put($featureID, 1, $type, $assignment); |
my $keywords = "$assignment $genomeID"; |
537 |
# Link it to the parent genome. |
# Link this feature to the parent genome. |
538 |
$loadHasFeature->Put($genomeID, $featureID, $type); |
$loadHasFeature->Put($genomeID, $featureID, $type); |
539 |
# Create the aliases. |
# Create the aliases. |
540 |
for my $alias ($fig->feature_aliases($featureID)) { |
for my $alias ($fig->feature_aliases($featureID)) { |
541 |
$loadFeatureAlias->Put($featureID, $alias); |
$loadFeatureAlias->Put($featureID, $alias); |
542 |
|
$keywords .= " $alias"; |
543 |
} |
} |
544 |
# Get the links. |
# Get the links. |
545 |
my @links = $fig->fid_links($featureID); |
my @links = $fig->fid_links($featureID); |
560 |
} |
} |
561 |
} |
} |
562 |
# Now we need to find the subsystems this feature participates in. |
# Now we need to find the subsystems this feature participates in. |
563 |
|
# We also add the subsystems to the keyword list. Before we do that, |
564 |
|
# we must convert underscores to spaces and tack on the classifications. |
565 |
my @subsystems = $fig->peg_to_subsystems($featureID); |
my @subsystems = $fig->peg_to_subsystems($featureID); |
566 |
for my $subsystem (@subsystems) { |
for my $subsystem (@subsystems) { |
567 |
|
# Only proceed if we like this subsystem. |
568 |
|
if (exists $subHash->{$subsystem}) { |
569 |
|
# Store the has-role link. |
570 |
$loadHasRoleInSubsystem->Put($featureID, $subsystem, $genomeID, $type); |
$loadHasRoleInSubsystem->Put($featureID, $subsystem, $genomeID, $type); |
571 |
|
# Save the subsystem's keyword data. |
572 |
|
my $subKeywords = $subHash->{$subsystem}; |
573 |
|
$keywords .= " $subKeywords"; |
574 |
|
} |
575 |
|
} |
576 |
|
# The final task is to add virulence and essentiality attributes. |
577 |
|
if ($fig->virulent($featureID)) { |
578 |
|
$keywords .= " virulent"; |
579 |
|
} |
580 |
|
if ($fig->essential($featureID)) { |
581 |
|
$keywords .= " essential"; |
582 |
} |
} |
583 |
|
# Clean the keyword list. |
584 |
|
my $cleanWords = $sprout->CleanKeywords($keywords); |
585 |
|
# Create the feature record. |
586 |
|
$loadFeature->Put($featureID, 1, $type, $assignment, $cleanWords); |
587 |
# This part is the roughest. We need to relate the features to contig |
# This part is the roughest. We need to relate the features to contig |
588 |
# locations, and the locations must be split so that none of them exceed |
# locations, and the locations must be split so that none of them exceed |
589 |
# the maximum segment size. This simplifies the genes_in_region processing |
# the maximum segment size. This simplifies the genes_in_region processing |
795 |
my $curator = $sub->get_curator(); |
my $curator = $sub->get_curator(); |
796 |
my $notes = $sub->get_notes(); |
my $notes = $sub->get_notes(); |
797 |
$loadSubsystem->Put($subsysID, $curator, $notes); |
$loadSubsystem->Put($subsysID, $curator, $notes); |
798 |
|
# Now for the classification string. This comes back as a list |
799 |
|
# reference and we convert it to a space-delimited string. |
800 |
my $classList = $fig->subsystem_classification($subsysID); |
my $classList = $fig->subsystem_classification($subsysID); |
801 |
my @classes = @$classList; |
my $classString = join(" ", grep { $_ } @$classList); |
802 |
if (@classes) { |
$loadSubsystemClass->Put($subsysID, $classString); |
|
for my $class (@classes) { |
|
|
$loadSubsystemClass->Put($subsysID, $class); |
|
|
} |
|
|
} |
|
803 |
# Connect it to its roles. Each role is a column in the subsystem spreadsheet. |
# Connect it to its roles. Each role is a column in the subsystem spreadsheet. |
804 |
for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) { |
for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) { |
805 |
# Connect to this role. |
# Connect to this role. |
1016 |
# Get all attributes for this feature. We do this one feature at a time |
# Get all attributes for this feature. We do this one feature at a time |
1017 |
# to insure we do not get any genome attributes. |
# to insure we do not get any genome attributes. |
1018 |
my @attributeList = $fig->get_attributes($fid, '', '', ''); |
my @attributeList = $fig->get_attributes($fid, '', '', ''); |
1019 |
|
# Add essentiality and virulence attributes. |
1020 |
|
if ($fig->essential($fid)) { |
1021 |
|
push @attributeList, [$fid, 'essential', 1, '']; |
1022 |
|
} |
1023 |
|
if ($fig->virulent($fid)) { |
1024 |
|
push @attributeList, [$fid, 'virulent', 1, '']; |
1025 |
|
} |
1026 |
if (scalar @attributeList) { |
if (scalar @attributeList) { |
1027 |
$featureCount++; |
$featureCount++; |
1028 |
} |
} |