30 |
$stats->Accumulate($spl->LoadFeatureData()); |
$stats->Accumulate($spl->LoadFeatureData()); |
31 |
print $stats->Show(); |
print $stats->Show(); |
32 |
|
|
|
This module makes use of the internal Sprout property C<_erdb>. |
|
|
|
|
33 |
It is worth noting that the FIG object does not need to be a real one. Any object |
It is worth noting that the FIG object does not need to be a real one. Any object |
34 |
that implements the FIG methods for data retrieval could be used. So, for example, |
that implements the FIG methods for data retrieval could be used. So, for example, |
35 |
this object could be used to copy data from one Sprout database to another, or |
this object could be used to copy data from one Sprout database to another, or |
78 |
=item subsysFile |
=item subsysFile |
79 |
|
|
80 |
Either the name of the file containing the list of trusted subsystems or a reference |
Either the name of the file containing the list of trusted subsystems or a reference |
81 |
to a list of subsystem names. If nothing is specified, all known subsystems will be |
to a list of subsystem names. If nothing is specified, all NMPDR subsystems will be |
82 |
considered trusted. Only subsystem data related to the trusted subsystems is loaded. |
considered trusted. (A subsystem is considered NMPDR if it has a file named C<NMPDR> |
83 |
|
in its data directory.) Only subsystem data related to the trusted subsystems is loaded. |
84 |
|
|
85 |
=item options |
=item options |
86 |
|
|
93 |
sub new { |
sub new { |
94 |
# Get the parameters. |
# Get the parameters. |
95 |
my ($class, $sprout, $fig, $genomeFile, $subsysFile, $options) = @_; |
my ($class, $sprout, $fig, $genomeFile, $subsysFile, $options) = @_; |
96 |
# Load the list of genomes into a hash. |
# Create the genome hash. |
97 |
my %genomes; |
my %genomes = (); |
98 |
|
# We only need it if load-only is NOT specified. |
99 |
|
if (! $options->{loadOnly}) { |
100 |
if (! defined($genomeFile) || $genomeFile eq '') { |
if (! defined($genomeFile) || $genomeFile eq '') { |
101 |
# Here we want all the complete genomes and an access code of 1. |
# Here we want all the complete genomes and an access code of 1. |
102 |
my @genomeList = $fig->genomes(1); |
my @genomeList = $fig->genomes(1); |
130 |
Confess("Invalid genome parameter ($type) in SproutLoad constructor."); |
Confess("Invalid genome parameter ($type) in SproutLoad constructor."); |
131 |
} |
} |
132 |
} |
} |
133 |
|
} |
134 |
# Load the list of trusted subsystems. |
# Load the list of trusted subsystems. |
135 |
my %subsystems = (); |
my %subsystems = (); |
136 |
|
# We only need it if load-only is NOT specified. |
137 |
|
if (! $options->{loadOnly}) { |
138 |
if (! defined $subsysFile || $subsysFile eq '') { |
if (! defined $subsysFile || $subsysFile eq '') { |
139 |
# Here we want all the subsystems. |
# Here we want all the NMPDR subsystems. First we get the whole list. |
140 |
%subsystems = map { $_ => 1 } $fig->all_subsystems(); |
my @subs = $fig->all_subsystems(); |
141 |
|
# Loop through, checking for the NMPDR file. |
142 |
|
for my $sub (@subs) { |
143 |
|
if (-e "$FIG_Config::data/Subsystems/$sub/NMPDR") { |
144 |
|
$subsystems{$sub} = 1; |
145 |
|
} |
146 |
|
} |
147 |
} else { |
} else { |
148 |
my $type = ref $subsysFile; |
my $type = ref $subsysFile; |
149 |
if ($type eq 'ARRAY') { |
if ($type eq 'ARRAY') { |
163 |
Confess("Invalid subsystem parameter in SproutLoad constructor."); |
Confess("Invalid subsystem parameter in SproutLoad constructor."); |
164 |
} |
} |
165 |
} |
} |
166 |
|
} |
167 |
# Get the data directory from the Sprout object. |
# Get the data directory from the Sprout object. |
168 |
my ($directory) = $sprout->LoadInfo(); |
my ($directory) = $sprout->LoadInfo(); |
169 |
# Create the Sprout load object. |
# Create the Sprout load object. |
173 |
subsystems => \%subsystems, |
subsystems => \%subsystems, |
174 |
sprout => $sprout, |
sprout => $sprout, |
175 |
loadDirectory => $directory, |
loadDirectory => $directory, |
176 |
erdb => $sprout->{_erdb}, |
erdb => $sprout, |
177 |
loaders => [], |
loaders => [], |
178 |
options => $options |
options => $options |
179 |
}; |
}; |
261 |
$loadGenome->Add("genomeIn"); |
$loadGenome->Add("genomeIn"); |
262 |
# The access code comes in via the genome hash. |
# The access code comes in via the genome hash. |
263 |
my $accessCode = $genomeHash->{$genomeID}; |
my $accessCode = $genomeHash->{$genomeID}; |
264 |
# Get the genus, species, and strain from the scientific name. Note that we append |
# Get the genus, species, and strain from the scientific name. |
|
# the genome ID to the strain. In some cases this is the totality of the strain name. |
|
265 |
my ($genus, $species, @extraData) = split / /, $self->{fig}->genus_species($genomeID); |
my ($genus, $species, @extraData) = split / /, $self->{fig}->genus_species($genomeID); |
266 |
my $extra = join " ", @extraData, "[$genomeID]"; |
my $extra = join " ", @extraData; |
267 |
# Get the full taxonomy. |
# Get the full taxonomy. |
268 |
my $taxonomy = $fig->taxonomy_of($genomeID); |
my $taxonomy = $fig->taxonomy_of($genomeID); |
269 |
# Output the genome record. |
# Output the genome record. |
446 |
FeatureTranslation |
FeatureTranslation |
447 |
FeatureUpstream |
FeatureUpstream |
448 |
IsLocatedIn |
IsLocatedIn |
449 |
|
HasFeature |
450 |
|
|
451 |
=over 4 |
=over 4 |
452 |
|
|
472 |
my $loadFeatureLink = $self->_TableLoader('FeatureLink'); |
my $loadFeatureLink = $self->_TableLoader('FeatureLink'); |
473 |
my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation'); |
my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation'); |
474 |
my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream'); |
my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream'); |
475 |
|
my $loadHasFeature = $self->_TableLoader('HasFeature'); |
476 |
# Get the maximum sequence size. We need this later for splitting up the |
# Get the maximum sequence size. We need this later for splitting up the |
477 |
# locations. |
# locations. |
478 |
my $chunkSize = $self->{sprout}->MaxSegment(); |
my $chunkSize = $self->{sprout}->MaxSegment(); |
493 |
my ($featureID, $locations, undef, $type) = @{$featureData}; |
my ($featureID, $locations, undef, $type) = @{$featureData}; |
494 |
# Create the feature record. |
# Create the feature record. |
495 |
$loadFeature->Put($featureID, 1, $type); |
$loadFeature->Put($featureID, 1, $type); |
496 |
|
# Link it to the parent genome. |
497 |
|
$loadHasFeature->Put($genomeID, $featureID, $type); |
498 |
# Create the aliases. |
# Create the aliases. |
499 |
for my $alias ($fig->feature_aliases($featureID)) { |
for my $alias ($fig->feature_aliases($featureID)) { |
500 |
$loadFeatureAlias->Put($featureID, $alias); |
$loadFeatureAlias->Put($featureID, $alias); |
711 |
my ($genomeID, $roleID); |
my ($genomeID, $roleID); |
712 |
my %roleData = (); |
my %roleData = (); |
713 |
for my $subsysID (@subsysIDs) { |
for my $subsysID (@subsysIDs) { |
|
Trace("Creating subsystem $subsysID.") if T(3); |
|
|
$loadSubsystem->Add("subsystemIn"); |
|
714 |
# Get the subsystem object. |
# Get the subsystem object. |
715 |
my $sub = $fig->get_subsystem($subsysID); |
my $sub = $fig->get_subsystem($subsysID); |
716 |
|
# Only proceed if the subsystem has a spreadsheet. |
717 |
|
if (! $sub->{empty_ss}) { |
718 |
|
Trace("Creating subsystem $subsysID.") if T(3); |
719 |
|
$loadSubsystem->Add("subsystemIn"); |
720 |
# Create the subsystem record. |
# Create the subsystem record. |
721 |
my $curator = $sub->get_curator(); |
my $curator = $sub->get_curator(); |
722 |
my $notes = $sub->get_notes(); |
my $notes = $sub->get_notes(); |
764 |
# part of the spreadsheet cell ID. |
# part of the spreadsheet cell ID. |
765 |
for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) { |
for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) { |
766 |
# Get the features in the spreadsheet cell for this genome and role. |
# Get the features in the spreadsheet cell for this genome and role. |
767 |
my @pegs = $sub->get_pegs_from_cell($row, $col); |
my @pegs = grep { !$fig->is_deleted_fid($_) } $sub->get_pegs_from_cell($row, $col); |
768 |
# Only proceed if features exist. |
# Only proceed if features exist. |
769 |
if (@pegs > 0) { |
if (@pegs > 0) { |
770 |
# Create the spreadsheet cell. |
# Create the spreadsheet cell. |
817 |
# Connect the subset to the subsystem. |
# Connect the subset to the subsystem. |
818 |
$loadHasRoleSubset->Put($subsysID, $actualID); |
$loadHasRoleSubset->Put($subsysID, $actualID); |
819 |
# Connect the subset to its roles. |
# Connect the subset to its roles. |
820 |
my @roles = $sub->get_subset($subsetID); |
my @roles = $sub->get_subsetC_roles($subsetID); |
821 |
for my $roleID (@roles) { |
for my $roleID (@roles) { |
822 |
$loadConsistsOfRoles->Put($actualID, $roleID); |
$loadConsistsOfRoles->Put($actualID, $roleID); |
823 |
} |
} |
870 |
} |
} |
871 |
} |
} |
872 |
} |
} |
873 |
|
} |
874 |
# Finish the load. |
# Finish the load. |
875 |
my $retVal = $self->_FinishAll(); |
my $retVal = $self->_FinishAll(); |
876 |
return $retVal; |
return $retVal; |
1032 |
# Loop through the genomes. |
# Loop through the genomes. |
1033 |
for my $genomeID (sort keys %{$genomeHash}) { |
for my $genomeID (sort keys %{$genomeHash}) { |
1034 |
Trace("Processing $genomeID.") if T(3); |
Trace("Processing $genomeID.") if T(3); |
|
# Get the genome's PEGs. |
|
|
my @pegs = $fig->pegs_of($genomeID); |
|
|
for my $peg (@pegs) { |
|
|
Trace("Processing $peg.") if T(4); |
|
1035 |
# Create a hash of timestamps. We use this to prevent duplicate time stamps |
# Create a hash of timestamps. We use this to prevent duplicate time stamps |
1036 |
# from showing up for a single PEG's annotations. |
# from showing up for a single PEG's annotations. |
1037 |
my %seenTimestamps = (); |
my %seenTimestamps = (); |
1038 |
# Loop through the annotations. |
# Get the genome's annotations. |
1039 |
for my $tuple ($fig->feature_annotations($peg, "raw")) { |
my @annotations = $fig->read_all_annotations($genomeID); |
1040 |
my ($fid, $timestamp, $user, $text) = @{$tuple}; |
Trace("Processing annotations.") if T(2); |
1041 |
|
for my $tuple (@annotations) { |
1042 |
|
# Get the annotation tuple. |
1043 |
|
my ($peg, $timestamp, $user, $text) = @{$tuple}; |
1044 |
# Here we fix up the annotation text. "\r" is removed, |
# Here we fix up the annotation text. "\r" is removed, |
1045 |
# and "\t" and "\n" are escaped. Note we use the "s" |
# and "\t" and "\n" are escaped. Note we use the "s" |
1046 |
# modifier so that new-lines inside the text do not |
# modifier so that new-lines inside the text do not |
1055 |
# Here it's a number. We need to insure the one we use to form |
# Here it's a number. We need to insure the one we use to form |
1056 |
# the key is unique. |
# the key is unique. |
1057 |
my $keyStamp = $timestamp; |
my $keyStamp = $timestamp; |
1058 |
while ($seenTimestamps{$keyStamp}) { |
while ($seenTimestamps{"$peg:$keyStamp"}) { |
1059 |
$keyStamp++; |
$keyStamp++; |
1060 |
} |
} |
|
$seenTimestamps{$keyStamp} = 1; |
|
1061 |
my $annotationID = "$peg:$keyStamp"; |
my $annotationID = "$peg:$keyStamp"; |
1062 |
|
$seenTimestamps{$annotationID} = 1; |
1063 |
# Insure the user exists. |
# Insure the user exists. |
1064 |
if (! $users{$user}) { |
if (! $users{$user}) { |
1065 |
$loadSproutUser->Put($user, "SEED user"); |
$loadSproutUser->Put($user, "SEED user"); |
1077 |
} |
} |
1078 |
} |
} |
1079 |
} |
} |
|
} |
|
1080 |
# Finish the load. |
# Finish the load. |
1081 |
my $retVal = $self->_FinishAll(); |
my $retVal = $self->_FinishAll(); |
1082 |
return $retVal; |
return $retVal; |