74 |
Map of roles to EC numbers for the Hope reactions. This object is not loaded |
Map of roles to EC numbers for the Hope reactions. This object is not loaded |
75 |
until it is needed. |
until it is needed. |
76 |
|
|
77 |
|
=item rows |
78 |
|
|
79 |
|
Map of spreadsheet rows, keyed by genome ID. Each row is a list of cells. Each |
80 |
|
cell is a list of feature ID. |
81 |
|
|
82 |
|
=item featureData |
83 |
|
|
84 |
|
Hash mapping feature IDs to assigned functions. |
85 |
|
|
86 |
=back |
=back |
87 |
|
|
88 |
=cut |
=cut |
197 |
reactionHash => undef, |
reactionHash => undef, |
198 |
# Version number. |
# Version number. |
199 |
version => $version, |
version => $version, |
200 |
|
# Row hash, initially undefined. |
201 |
|
rows => undef, |
202 |
|
# Map of feature IDs to functional assignments |
203 |
|
featureData => {}, |
204 |
}; |
}; |
205 |
# Bless and return it. |
# Bless and return it. |
206 |
bless $retVal, $class; |
bless $retVal, $class; |
269 |
my ($self, $rowIndex) = @_; |
my ($self, $rowIndex) = @_; |
270 |
# Get the genome ID for the specified row's genome. |
# Get the genome ID for the specified row's genome. |
271 |
my $genomeID = $self->{genomes}->[$rowIndex]->[0]; |
my $genomeID = $self->{genomes}->[$rowIndex]->[0]; |
272 |
# Read the row from the database. We won't get exactly what we want. Instead, we'll |
# Get the row hash. |
273 |
# get a list of triplets, each consisting of a role name, a feature ID, and a cluster |
my $rowHash = $self->_get_spreadsheet(); |
274 |
# number. We need to convert this into a list of lists and stash the clustering information |
# Declare the return variable. |
275 |
# in the color hash. |
my @retVal; |
276 |
my @rowData = $self->{sprout}->GetAll([qw(Subsystem HasSSCell IsGenomeOf IsRoleOf ContainsFeature)], |
# If this genome does not exist for the subsystem, all the cells are empty. |
277 |
"Subsystem(id) = ? AND IsGenomeOf(from-link) = ?", |
if (! exists $rowHash->{$genomeID}) { |
278 |
[$self->{name}, $genomeID], |
@retVal = map { [] } @{$self->{roles}}; |
279 |
[qw(IsRoleOf(from-link) ContainsFeature(to-link) |
} else { |
280 |
ContainsFeature(cluster-number))]); |
# Here we just return the row. |
281 |
# Now we do the conversion. We must first create an array of empty lists, one per |
push @retVal, @{$rowHash->{$genomeID}}; |
|
# row index. |
|
|
my @retVal = map { [] } @{$self->{roles}}; |
|
|
# Get the hash for converting role IDs to role indexes. |
|
|
my $roleHash = $self->{roleHash}; |
|
|
# Now we stash all the feature IDs in the appropriate columns of the row list. |
|
|
for my $rowDatum (@rowData) { |
|
|
# Get the role ID, the peg ID, and the cluster number. |
|
|
my ($role, $peg, $cluster) = @{$rowDatum}; |
|
|
# Put the peg in the role's peg list. |
|
|
push @{$retVal[$roleHash->{$role}]}, $peg; |
|
|
# Put the cluster number in the color hash. |
|
|
$self->{colorHash}->{$peg} = $cluster; |
|
282 |
} |
} |
283 |
# Return the result. |
# Return the result. |
284 |
return \@retVal; |
return \@retVal; |
308 |
sub get_roles_for_genome { |
sub get_roles_for_genome { |
309 |
# Get the parameters. |
# Get the parameters. |
310 |
my ($self, $genome_id) = @_; |
my ($self, $genome_id) = @_; |
311 |
# This next statement gets all of the nonempty cells for the genome's row and memorizes |
# Get the subsystem's spreadsheet. |
312 |
# the roles by rolling them into a hash. The query connects four relationship tables on |
my $rowHash = $self->_get_spreadsheet(); |
313 |
# a single common key-- the spreadsheet cell ID. The IsGenomeOf table insures the cell is for the |
# Declare the return variable. |
314 |
# correct genome. The HasSSCell table insures that it belongs to the correct subsystem. The |
my @retVal; |
315 |
# ContainsFeature table insures that it contains at least one feature. Finally, IsRoleOf tells |
# Only proceed if this genome exists for this subsyste, |
316 |
# us the cell's role. If a cell has more than one feature, the result list from the query will return |
if (exists $rowHash->{$genome_id}) { |
317 |
# one instance of the role for every distinct feature. The hash collapses the duplicates automatically. |
# Get the role list. |
318 |
my %retVal = map { $_ => 1 } $self->{sprout}->GetFlat([qw(ContainsFeature HasSSCell IsGenomeOf IsRoleOf)], |
my $roles = $self->{roles}; |
319 |
"HasSSCell(from-link) = ? AND IsGenomeOf(from-link) = ?", |
# Get the row's cell list. |
320 |
[$self->{name}, $genome_id], 'IsRoleOf(from-link)'); |
my $row = $rowHash->{$genome_id}; |
321 |
|
# Loop through the cells. We'll save the role name for each |
322 |
|
# nonempty cell. |
323 |
|
my $cols = scalar @$roles; |
324 |
|
for (my $i = 0; $i < $cols; $i++) { |
325 |
|
my $cell = $row->[$i]; |
326 |
|
if (scalar @$cell) { |
327 |
|
push @retVal, $roles->[$i]; |
328 |
|
} |
329 |
|
} |
330 |
|
} |
331 |
# Return the result. |
# Return the result. |
332 |
return keys %retVal; |
return @retVal; |
333 |
} |
} |
334 |
|
|
335 |
=head3 get_abbr_for_role |
=head3 get_abbr_for_role |
894 |
Return the cluster number for the specified PEG, or C<-1> if the |
Return the cluster number for the specified PEG, or C<-1> if the |
895 |
cluster number for the PEG is unknown or it is not clustered. |
cluster number for the PEG is unknown or it is not clustered. |
896 |
|
|
|
The cluster number is read into the color hash by the |
|
|
L</get_pegs_from_cell> method. If the incoming PEG IDs do not |
|
|
come from the most recent cell retrievals, the information returned |
|
|
will be invalid. This is a serious design flaw which needs to be |
|
|
fixed soon. |
|
|
|
|
897 |
=over 4 |
=over 4 |
898 |
|
|
899 |
=item pegID |
=item pegID |
913 |
my ($self, $pegID) = @_; |
my ($self, $pegID) = @_; |
914 |
# Declare the return variable. |
# Declare the return variable. |
915 |
my $retVal = -1; |
my $retVal = -1; |
916 |
|
# Insure we have a color hash. |
917 |
|
$self->_get_spreadsheet(); |
918 |
# Check for a cluster number in the color hash. |
# Check for a cluster number in the color hash. |
919 |
if (exists $self->{colorHash}->{$pegID}) { |
if (exists $self->{colorHash}->{$pegID}) { |
920 |
$retVal = $self->{colorHash}->{$pegID}; |
$retVal = $self->{colorHash}->{$pegID}; |
923 |
return $retVal; |
return $retVal; |
924 |
} |
} |
925 |
|
|
926 |
|
|
927 |
=head3 get_pegs_from_cell |
=head3 get_pegs_from_cell |
928 |
|
|
929 |
my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); |
my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); |
975 |
$genomeID = $genomeList->[$rowstr]->[0]; |
$genomeID = $genomeList->[$rowstr]->[0]; |
976 |
} |
} |
977 |
} |
} |
978 |
# Construct the spreadsheet cell ID from the information we have. |
# Get the spreadsheet. |
979 |
my $cellID = $sprout->DigestKey($self->{name} . ":$genomeID:$colIdx"); |
my $rowHash = $self->_get_spreadsheet(); |
980 |
# Get the list of PEG IDs and cluster numbers for the indicated cell. |
# Delcare the return variable. |
981 |
my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?', |
my @retVal; |
982 |
[$cellID], ['ContainsFeature(to-link)', |
# Only proceed if this genome is in this subsystem. |
983 |
'ContainsFeature(cluster-number)']); |
if (exists $rowHash->{$genomeID}) { |
984 |
# Copy the pegs into the return list, and save the cluster numbers in the color hash. |
# Push the cell's contents into the return list. |
985 |
my @retVal = (); |
push @retVal, @{$rowHash->{$genomeID}->[$colIdx]}; |
|
for my $pegEntry (@pegList) { |
|
|
my ($peg, $cluster) = @{$pegEntry}; |
|
|
$self->{colorHash}->{$peg} = $cluster; |
|
|
push @retVal, $peg; |
|
986 |
} |
} |
987 |
# Return the list. If the spreadsheet cell was empty or non-existent, we'll end |
# Return the list. If the spreadsheet cell was empty or non-existent, we'll end |
988 |
# up returning an empty list. |
# up returning an empty list. |
1283 |
return ($type, $fh); |
return ($type, $fh); |
1284 |
} |
} |
1285 |
|
|
1286 |
|
=head3 get_hope_scenario_names |
1287 |
|
|
1288 |
|
my @names = $sub->get_hope_scenario_names(); |
1289 |
|
|
1290 |
|
Return a list of the names for the scenarios associated with this |
1291 |
|
subsystem. |
1292 |
|
|
1293 |
|
=cut |
1294 |
|
|
1295 |
|
sub get_hope_scenario_names { |
1296 |
|
# Get the parameters. |
1297 |
|
my ($self) = @_; |
1298 |
|
# Get the names from the database. |
1299 |
|
my $sprout = $self->{sprout}; |
1300 |
|
my @retVal = $sprout->GetFlat("HasScenario", |
1301 |
|
"HasScenario(from-link) = ? ORDER BY HasScenario(to-link)", |
1302 |
|
[$self->{name}], 'to-link'); |
1303 |
|
# Return the result. |
1304 |
|
return @retVal; |
1305 |
|
} |
1306 |
|
|
1307 |
|
=head3 get_hope_input_compounds |
1308 |
|
|
1309 |
|
my @compounds = $sub->get_hope_input_compounds($name); |
1310 |
|
|
1311 |
|
Return a list of the input compounds for the named hope scenario. |
1312 |
|
|
1313 |
|
=over 4 |
1314 |
|
|
1315 |
|
=item name |
1316 |
|
|
1317 |
|
Name of a Hope scenario attached to this subsystem. |
1318 |
|
|
1319 |
|
=item RETURN |
1320 |
|
|
1321 |
|
Returns a list of compound IDs. |
1322 |
|
|
1323 |
|
=back |
1324 |
|
|
1325 |
|
=cut |
1326 |
|
|
1327 |
|
sub get_hope_input_compounds { |
1328 |
|
# Get the parameters. |
1329 |
|
my ($self, $name) = @_; |
1330 |
|
# Ask for the compounds. |
1331 |
|
my @retVal = $self->{sprout}->GetFlat("IsInputFor", "IsInputFor(to-link) = ?", |
1332 |
|
[$name], "IsInputFor(from-link)"); |
1333 |
|
# Return the result. |
1334 |
|
return @retVal; |
1335 |
|
} |
1336 |
|
|
1337 |
|
=head3 get_hope_output_compounds |
1338 |
|
|
1339 |
|
my ($main, $aux) = $sub->get_hope_output_compounds($name); |
1340 |
|
|
1341 |
|
Return a list of the output compounds for the named hope scenario. |
1342 |
|
|
1343 |
|
=over 4 |
1344 |
|
|
1345 |
|
=item name |
1346 |
|
|
1347 |
|
Name of the relevant scenario. |
1348 |
|
|
1349 |
|
=item RETURN |
1350 |
|
|
1351 |
|
Returns two lists of compound IDs: one for the main outputs and one for the |
1352 |
|
auxiliary outputs. |
1353 |
|
|
1354 |
|
=back |
1355 |
|
|
1356 |
|
=cut |
1357 |
|
|
1358 |
|
sub get_hope_output_compounds { |
1359 |
|
# Get the parameters. |
1360 |
|
my ($self, $name) = @_; |
1361 |
|
# Ask for the compounds. |
1362 |
|
my $sprout = $self->{sprout}; |
1363 |
|
my @pairs = $sprout->GetAll("IsOutputOf", "IsOutputOf(to-link) = ?", |
1364 |
|
[$name], "from-link auxiliary"); |
1365 |
|
# We now have a list of pairs in the form [name, aux-flag]. We put each |
1366 |
|
# name in the list indicated by its aux-flag. |
1367 |
|
my @retVal = ([], []); |
1368 |
|
for my $pair (@pairs) { |
1369 |
|
push @{$retVal[$pair->[1]]}, $pair->[0]; |
1370 |
|
} |
1371 |
|
# Return the result. |
1372 |
|
return @retVal; |
1373 |
|
} |
1374 |
|
|
1375 |
|
=head3 get_hope_map_ids |
1376 |
|
|
1377 |
|
my @mapIDs = $sub->get_hope_map_ids($name); |
1378 |
|
|
1379 |
|
Return a list of the ID numbers for the diagrams associated with the named |
1380 |
|
scenario. |
1381 |
|
|
1382 |
|
=over 4 |
1383 |
|
|
1384 |
|
=item name |
1385 |
|
|
1386 |
|
Name of the relevant scenario. |
1387 |
|
|
1388 |
|
=item RETURN |
1389 |
|
|
1390 |
|
Returns a list of the ID numbers for the KEGG diagrams associated with this |
1391 |
|
scenario. These are different from the diagram IDs, all of which begin with |
1392 |
|
the string "map". This recognizes a design incompatability between SEED and |
1393 |
|
Sprout. |
1394 |
|
|
1395 |
|
=back |
1396 |
|
|
1397 |
|
=cut |
1398 |
|
|
1399 |
|
sub get_hope_map_ids { |
1400 |
|
# Get the parameters. |
1401 |
|
my ($self, $name) = @_; |
1402 |
|
# Get the map IDs. |
1403 |
|
my @diagrams = $self->{sprout}->GetFlat('IsOnDiagram', "IsOnDiagram(from-link) = ?", |
1404 |
|
[$name], 'to-link'); |
1405 |
|
# Modify and return the result. |
1406 |
|
my @retVal = map { /(\d+)/ } @diagrams; |
1407 |
|
return @retVal; |
1408 |
|
} |
1409 |
|
|
1410 |
|
=head3 all_functions |
1411 |
|
|
1412 |
|
my $pegRoles = $sub->all_functions(); |
1413 |
|
|
1414 |
|
Return a hash of all the features in the subsystem. The hash maps each |
1415 |
|
feature ID to its functional assignment. |
1416 |
|
|
1417 |
|
=cut |
1418 |
|
|
1419 |
|
sub all_functions { |
1420 |
|
# Get the parameters. |
1421 |
|
my ($self) = @_; |
1422 |
|
# Insure we have a spreadsheet. |
1423 |
|
$self->_get_spreadsheet(); |
1424 |
|
# Return the feature hash. |
1425 |
|
return $self->{featureData}; |
1426 |
|
} |
1427 |
|
|
1428 |
|
=head2 Internal Utility Methods |
1429 |
|
|
1430 |
|
=head3 _get_spreadsheet |
1431 |
|
|
1432 |
|
my $hash = $sub->_get_spreadsheet(); |
1433 |
|
|
1434 |
|
Return a reference to a hash mapping each of the subsystem's genomes to |
1435 |
|
their spreadsheet rows. Each row is a list of cells, and each cell is a |
1436 |
|
list of feature IDs. This method also creates the color hash that maps PEGs |
1437 |
|
to cluster numbers. |
1438 |
|
|
1439 |
|
=cut |
1440 |
|
|
1441 |
|
sub _get_spreadsheet { |
1442 |
|
# Get the parameters. |
1443 |
|
my ($self) = @_; |
1444 |
|
# Do we already have a spreadsheet? |
1445 |
|
my $retVal = $self->{rows}; |
1446 |
|
if (! defined $retVal) { |
1447 |
|
# We don't, so we have to create one. Start with an empty hash. |
1448 |
|
$retVal = {}; |
1449 |
|
# Ask for all the subsystem's cells and their features. |
1450 |
|
my $query = $self->{sprout}->Get("HasSSCell SSCell ContainsFeature Feature", |
1451 |
|
"HasSSCell(from-link) = ?", |
1452 |
|
[$self->{name}]); |
1453 |
|
# Loop through the features. |
1454 |
|
while (my $feature = $query->Fetch()) { |
1455 |
|
# Get the column number, the feature ID, and the cluster number. |
1456 |
|
my $featureID = $feature->PrimaryValue('ContainsFeature(to-link)'); |
1457 |
|
my $cluster = $feature->PrimaryValue('ContainsFeature(cluster-number)'); |
1458 |
|
my $column = $feature->PrimaryValue('SSCell(column-number)'); |
1459 |
|
my $role = $feature->PrimaryValue('Feature(assignment)'); |
1460 |
|
# Compute the genome. |
1461 |
|
my $genomeID = FIG::genome_of($featureID); |
1462 |
|
# If we don't have this genome in the hash, create it. |
1463 |
|
if (! exists $retVal->{$genomeID}) { |
1464 |
|
# The initial value is a list of empty lists. Features |
1465 |
|
# are then pushed into each individual list. |
1466 |
|
my @row = map { [] } @{$self->{roles}}; |
1467 |
|
# Put this list of null lists in the hash. |
1468 |
|
$retVal->{$genomeID} = \@row; |
1469 |
|
} |
1470 |
|
# Get this row. We know now that it exists. |
1471 |
|
my $row = $retVal->{$genomeID}; |
1472 |
|
# Add this feature to the appropriate cell in the row. |
1473 |
|
push @{$row->[$column]}, $featureID; |
1474 |
|
# Put it in the color hash and the feature data hash. |
1475 |
|
$self->{colorHash}->{$featureID} = $cluster; |
1476 |
|
$self->{featureData}->{$featureID} = $role; |
1477 |
|
} |
1478 |
|
# Save the row hash. |
1479 |
|
$self->{rows} = $retVal; |
1480 |
|
} |
1481 |
|
# Return the result. |
1482 |
|
return $retVal; |
1483 |
|
} |
1484 |
|
|
1485 |
|
=head3 get_col |
1486 |
|
|
1487 |
|
my $cellArray = $sub->get_col($idx); |
1488 |
|
|
1489 |
|
Return an array of the cells in the specified column of the subsystem |
1490 |
|
spreadsheet. Each cell is a reference to a list of the features for the |
1491 |
|
corresponding row in the specified column. |
1492 |
|
|
1493 |
|
=over 4 |
1494 |
|
|
1495 |
|
=item idx |
1496 |
|
|
1497 |
|
Index of the desired column. |
1498 |
|
|
1499 |
|
=item RETURN |
1500 |
|
|
1501 |
|
Returns a reference to a list containing the spreadsheet column's cells, in |
1502 |
|
row order. |
1503 |
|
|
1504 |
|
=back |
1505 |
|
|
1506 |
|
=cut |
1507 |
|
|
1508 |
|
sub get_col { |
1509 |
|
# Get the parameters. |
1510 |
|
my ($self, $idx) = @_; |
1511 |
|
# Declare the return variable. |
1512 |
|
my @retVal; |
1513 |
|
# Get the subsystem spreadsheet. |
1514 |
|
my $sheet = $self->_get_spreadsheet(); |
1515 |
|
# Loop through the row list. |
1516 |
|
for my $rowPair (@{$self->{genomes}}) { |
1517 |
|
# Get the genome for this row. Each row pair is [genomeID, variantCode]. |
1518 |
|
my ($genomeID) = @$rowPair; |
1519 |
|
# Get the genome's row in the spreadsheet. |
1520 |
|
my $rowList = $sheet->{$genomeID}; |
1521 |
|
# Push this column's cell into the output list. |
1522 |
|
push @retVal, $rowList->[$idx]; |
1523 |
|
} |
1524 |
|
# Return the result. |
1525 |
|
return \@retVal; |
1526 |
|
} |
1527 |
|
|
1528 |
1; |
1; |