[Bio] / Sprout / SproutSubsys.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.20, Wed Oct 15 11:46:57 2008 UTC revision 1.21, Mon Jan 19 21:44:01 2009 UTC
# Line 74  Line 74 
74  Map of roles to EC numbers for the Hope reactions. This object is not loaded  Map of roles to EC numbers for the Hope reactions. This object is not loaded
75  until it is needed.  until it is needed.
76    
77    =item rows
78    
79    Map of spreadsheet rows, keyed by genome ID. Each row is a list of cells. Each
80    cell is a list of feature ID.
81    
82    =item featureData
83    
84    Hash mapping feature IDs to assigned functions.
85    
86  =back  =back
87    
88  =cut  =cut
# Line 188  Line 197 
197                      reactionHash => undef,                      reactionHash => undef,
198                      # Version number.                      # Version number.
199                      version => $version,                      version => $version,
200                        # Row hash, initially undefined.
201                        rows => undef,
202                        # Map of feature IDs to functional assignments
203                        featureData => {},
204                  };                  };
205          # Bless and return it.          # Bless and return it.
206          bless $retVal, $class;          bless $retVal, $class;
# Line 256  Line 269 
269      my ($self, $rowIndex) = @_;      my ($self, $rowIndex) = @_;
270      # Get the genome ID for the specified row's genome.      # Get the genome ID for the specified row's genome.
271      my $genomeID = $self->{genomes}->[$rowIndex]->[0];      my $genomeID = $self->{genomes}->[$rowIndex]->[0];
272      # Read the row from the database. We won't get exactly what we want. Instead, we'll      # Get the row hash.
273      # get a list of triplets, each consisting of a role name, a feature ID, and a cluster      my $rowHash = $self->_get_spreadsheet();
274      # number. We need to convert this into a list of lists and stash the clustering information      # Declare the return variable.
275      # in the color hash.      my @retVal;
276      my @rowData = $self->{sprout}->GetAll([qw(Subsystem HasSSCell IsGenomeOf IsRoleOf ContainsFeature)],      # If this genome does not exist for the subsystem, all the cells are empty.
277                                            "Subsystem(id) = ? AND IsGenomeOf(from-link) = ?",      if (! exists $rowHash->{$genomeID}) {
278                                            [$self->{name}, $genomeID],          @retVal = map { [] } @{$self->{roles}};
279                                            [qw(IsRoleOf(from-link) ContainsFeature(to-link)      } else {
280                                                ContainsFeature(cluster-number))]);          # Here we just return the row.
281      # Now we do the conversion. We must first create an array of empty lists, one per          push @retVal, @{$rowHash->{$genomeID}};
     # row index.  
     my @retVal = map { [] } @{$self->{roles}};  
     # Get the hash for converting role IDs to role indexes.  
     my $roleHash = $self->{roleHash};  
     # Now we stash all the feature IDs in the appropriate columns of the row list.  
     for my $rowDatum (@rowData) {  
         # Get the role ID, the peg ID, and the cluster number.  
         my ($role, $peg, $cluster) = @{$rowDatum};  
         # Put the peg in the role's peg list.  
         push @{$retVal[$roleHash->{$role}]}, $peg;  
         # Put the cluster number in the color hash.  
         $self->{colorHash}->{$peg} = $cluster;  
282      }      }
283      # Return the result.      # Return the result.
284      return \@retVal;      return \@retVal;
# Line 307  Line 308 
308  sub get_roles_for_genome {  sub get_roles_for_genome {
309      # Get the parameters.      # Get the parameters.
310      my ($self, $genome_id) = @_;      my ($self, $genome_id) = @_;
311      # This next statement gets all of the nonempty cells for the genome's row and memorizes      # Get the subsystem's spreadsheet.
312      # the roles by rolling them into a hash. The query connects four relationship tables on      my $rowHash = $self->_get_spreadsheet();
313      # a single common key-- the spreadsheet cell ID. The IsGenomeOf table insures the cell is for the      # Declare the return variable.
314      # correct genome. The HasSSCell table insures that it belongs to the correct subsystem. The      my @retVal;
315      # ContainsFeature table insures that it contains at least one feature. Finally, IsRoleOf tells      # Only proceed if this genome exists for this subsyste,
316      # us the cell's role. If a cell has more than one feature, the result list from the query will return      if (exists $rowHash->{$genome_id}) {
317      # one instance of the role for every distinct feature. The hash collapses the duplicates automatically.          # Get the role list.
318      my %retVal = map { $_ => 1 } $self->{sprout}->GetFlat([qw(ContainsFeature HasSSCell IsGenomeOf IsRoleOf)],          my $roles = $self->{roles};
319                                                            "HasSSCell(from-link) = ? AND IsGenomeOf(from-link) = ?",          # Get the row's cell list.
320                                                            [$self->{name}, $genome_id], 'IsRoleOf(from-link)');          my $row = $rowHash->{$genome_id};
321            # Loop through the cells. We'll save the role name for each
322            # nonempty cell.
323            my $cols = scalar @$roles;
324            for (my $i = 0; $i < $cols; $i++) {
325                my $cell = $row->[$i];
326                if (scalar @$cell) {
327                    push @retVal, $roles->[$i];
328                }
329            }
330        }
331      # Return the result.      # Return the result.
332      return keys %retVal;      return @retVal;
333  }  }
334    
335  =head3 get_abbr_for_role  =head3 get_abbr_for_role
# Line 883  Line 894 
894  Return the cluster number for the specified PEG, or C<-1> if the  Return the cluster number for the specified PEG, or C<-1> if the
895  cluster number for the PEG is unknown or it is not clustered.  cluster number for the PEG is unknown or it is not clustered.
896    
 The cluster number is read into the color hash by the  
 L</get_pegs_from_cell> method. If the incoming PEG IDs do not  
 come from the most recent cell retrievals, the information returned  
 will be invalid. This is a serious design flaw which needs to be  
 fixed soon.  
   
897  =over 4  =over 4
898    
899  =item pegID  =item pegID
# Line 908  Line 913 
913      my ($self, $pegID) = @_;      my ($self, $pegID) = @_;
914      # Declare the return variable.      # Declare the return variable.
915      my $retVal = -1;      my $retVal = -1;
916        # Insure we have a color hash.
917        $self->_get_spreadsheet();
918      # Check for a cluster number in the color hash.      # Check for a cluster number in the color hash.
919      if (exists $self->{colorHash}->{$pegID}) {      if (exists $self->{colorHash}->{$pegID}) {
920          $retVal = $self->{colorHash}->{$pegID};          $retVal = $self->{colorHash}->{$pegID};
# Line 916  Line 923 
923      return $retVal;      return $retVal;
924  }  }
925    
926    
927  =head3 get_pegs_from_cell  =head3 get_pegs_from_cell
928    
929      my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr);      my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr);
# Line 967  Line 975 
975              $genomeID = $genomeList->[$rowstr]->[0];              $genomeID = $genomeList->[$rowstr]->[0];
976          }          }
977      }      }
978      # Construct the spreadsheet cell ID from the information we have.      # Get the spreadsheet.
979      my $cellID = $sprout->DigestKey($self->{name} . ":$genomeID:$colIdx");      my $rowHash = $self->_get_spreadsheet();
980      # Get the list of PEG IDs and cluster numbers for the indicated cell.      # Delcare the return variable.
981      my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?',      my @retVal;
982                                    [$cellID], ['ContainsFeature(to-link)',      # Only proceed if this genome is in this subsystem.
983                                                'ContainsFeature(cluster-number)']);      if (exists $rowHash->{$genomeID}) {
984      # Copy the pegs into the return list, and save the cluster numbers in the color hash.          # Push the cell's contents into the return list.
985      my @retVal = ();          push @retVal, @{$rowHash->{$genomeID}->[$colIdx]};
     for my $pegEntry (@pegList) {  
         my ($peg, $cluster) = @{$pegEntry};  
         $self->{colorHash}->{$peg} = $cluster;  
         push @retVal, $peg;  
986      }      }
987      # Return the list. If the spreadsheet cell was empty or non-existent, we'll end      # Return the list. If the spreadsheet cell was empty or non-existent, we'll end
988      # up returning an empty list.      # up returning an empty list.
# Line 1279  Line 1283 
1283      return ($type, $fh);      return ($type, $fh);
1284  }  }
1285    
1286    =head3 get_hope_scenario_names
1287    
1288        my @names = $sub->get_hope_scenario_names();
1289    
1290    Return a list of the names for the scenarios associated with this
1291    subsystem.
1292    
1293    =cut
1294    
1295    sub get_hope_scenario_names {
1296        # Get the parameters.
1297        my ($self) = @_;
1298        # Get the names from the database.
1299        my $sprout = $self->{sprout};
1300        my @retVal = $sprout->GetFlat("HasScenario",
1301                                      "HasScenario(from-link) = ? ORDER BY HasScenario(to-link)",
1302                                      [$self->{name}], 'to-link');
1303        # Return the result.
1304        return @retVal;
1305    }
1306    
1307    =head3 get_hope_input_compounds
1308    
1309        my @compounds = $sub->get_hope_input_compounds($name);
1310    
1311    Return a list of the input compounds for the named hope scenario.
1312    
1313    =over 4
1314    
1315    =item name
1316    
1317    Name of a Hope scenario attached to this subsystem.
1318    
1319    =item RETURN
1320    
1321    Returns a list of compound IDs.
1322    
1323    =back
1324    
1325    =cut
1326    
1327    sub get_hope_input_compounds {
1328        # Get the parameters.
1329        my ($self, $name) = @_;
1330        # Ask for the compounds.
1331        my @retVal = $self->{sprout}->GetFlat("IsInputFor", "IsInputFor(to-link) = ?",
1332                                              [$name], "IsInputFor(from-link)");
1333        # Return the result.
1334        return @retVal;
1335    }
1336    
1337    =head3 get_hope_output_compounds
1338    
1339        my ($main, $aux) = $sub->get_hope_output_compounds($name);
1340    
1341    Return a list of the output compounds for the named hope scenario.
1342    
1343    =over 4
1344    
1345    =item name
1346    
1347    Name of the relevant scenario.
1348    
1349    =item RETURN
1350    
1351    Returns two lists of compound IDs: one for the main outputs and one for the
1352    auxiliary outputs.
1353    
1354    =back
1355    
1356    =cut
1357    
1358    sub get_hope_output_compounds {
1359        # Get the parameters.
1360        my ($self, $name) = @_;
1361        # Ask for the compounds.
1362        my $sprout = $self->{sprout};
1363        my @pairs = $sprout->GetAll("IsOutputOf", "IsOutputOf(to-link) = ?",
1364                                    [$name], "from-link auxiliary");
1365        # We now have a list of pairs in the form [name, aux-flag]. We put each
1366        # name in the list indicated by its aux-flag.
1367        my @retVal = ([], []);
1368        for my $pair (@pairs) {
1369            push @{$retVal[$pair->[1]]}, $pair->[0];
1370        }
1371        # Return the result.
1372        return @retVal;
1373    }
1374    
1375    =head3 get_hope_map_ids
1376    
1377        my @mapIDs = $sub->get_hope_map_ids($name);
1378    
1379    Return a list of the ID numbers for the diagrams associated with the named
1380    scenario.
1381    
1382    =over 4
1383    
1384    =item name
1385    
1386    Name of the relevant scenario.
1387    
1388    =item RETURN
1389    
1390    Returns a list of the ID numbers for the KEGG diagrams associated with this
1391    scenario. These are different from the diagram IDs, all of which begin with
1392    the string "map". This recognizes a design incompatability between SEED and
1393    Sprout.
1394    
1395    =back
1396    
1397    =cut
1398    
1399    sub get_hope_map_ids {
1400        # Get the parameters.
1401        my ($self, $name) = @_;
1402        # Get the map IDs.
1403        my @diagrams = $self->{sprout}->GetFlat('IsOnDiagram', "IsOnDiagram(from-link) = ?",
1404                                                [$name], 'to-link');
1405        # Modify and return the result.
1406        my @retVal = map { /(\d+)/ } @diagrams;
1407        return @retVal;
1408    }
1409    
1410    =head3 all_functions
1411    
1412        my $pegRoles = $sub->all_functions();
1413    
1414    Return a hash of all the features in the subsystem. The hash maps each
1415    feature ID to its functional assignment.
1416    
1417    =cut
1418    
1419    sub all_functions {
1420        # Get the parameters.
1421        my ($self) = @_;
1422        # Insure we have a spreadsheet.
1423        $self->_get_spreadsheet();
1424        # Return the feature hash.
1425        return $self->{featureData};
1426    }
1427    
1428    =head2 Internal Utility Methods
1429    
1430    =head3 _get_spreadsheet
1431    
1432        my $hash = $sub->_get_spreadsheet();
1433    
1434    Return a reference to a hash mapping each of the subsystem's genomes to
1435    their spreadsheet rows. Each row is a list of cells, and each cell is a
1436    list of feature IDs. This method also creates the color hash that maps PEGs
1437    to cluster numbers.
1438    
1439    =cut
1440    
1441    sub _get_spreadsheet {
1442        # Get the parameters.
1443        my ($self) = @_;
1444        # Do we already have a spreadsheet?
1445        my $retVal = $self->{rows};
1446        if (! defined $retVal) {
1447            # We don't, so we have to create one. Start with an empty hash.
1448            $retVal = {};
1449            # Ask for all the subsystem's cells and their features.
1450            my $query = $self->{sprout}->Get("HasSSCell SSCell ContainsFeature Feature",
1451                                             "HasSSCell(from-link) = ?",
1452                                             [$self->{name}]);
1453            # Loop through the features.
1454            while (my $feature = $query->Fetch()) {
1455                # Get the column number, the feature ID, and the cluster number.
1456                my $featureID = $feature->PrimaryValue('ContainsFeature(to-link)');
1457                my $cluster = $feature->PrimaryValue('ContainsFeature(cluster-number)');
1458                my $column = $feature->PrimaryValue('SSCell(column-number)');
1459                my $role = $feature->PrimaryValue('Feature(assignment)');
1460                # Compute the genome.
1461                my $genomeID = FIG::genome_of($featureID);
1462                # If we don't have this genome in the hash, create it.
1463                if (! exists $retVal->{$genomeID}) {
1464                    # The initial value is a list of empty lists. Features
1465                    # are then pushed into each individual list.
1466                    my @row = map { [] } @{$self->{roles}};
1467                    # Put this list of null lists in the hash.
1468                    $retVal->{$genomeID} = \@row;
1469                }
1470                # Get this row. We know now that it exists.
1471                my $row = $retVal->{$genomeID};
1472                # Add this feature to the appropriate cell in the row.
1473                push @{$row->[$column]}, $featureID;
1474                # Put it in the color hash and the feature data hash.
1475                $self->{colorHash}->{$featureID} = $cluster;
1476                $self->{featureData}->{$featureID} = $role;
1477            }
1478            # Save the row hash.
1479            $self->{rows} = $retVal;
1480        }
1481        # Return the result.
1482        return $retVal;
1483    }
1484    
1485    =head3 get_col
1486    
1487        my $cellArray = $sub->get_col($idx);
1488    
1489    Return an array of the cells in the specified column of the subsystem
1490    spreadsheet. Each cell is a reference to a list of the features for the
1491    corresponding row in the specified column.
1492    
1493    =over 4
1494    
1495    =item idx
1496    
1497    Index of the desired column.
1498    
1499    =item RETURN
1500    
1501    Returns a reference to a list containing the spreadsheet column's cells, in
1502    row order.
1503    
1504    =back
1505    
1506    =cut
1507    
1508    sub get_col {
1509        # Get the parameters.
1510        my ($self, $idx) = @_;
1511        # Declare the return variable.
1512        my @retVal;
1513        # Get the subsystem spreadsheet.
1514        my $sheet = $self->_get_spreadsheet();
1515        # Loop through the row list.
1516        for my $rowPair (@{$self->{genomes}}) {
1517            # Get the genome for this row. Each row pair is [genomeID, variantCode].
1518            my ($genomeID) = @$rowPair;
1519            # Get the genome's row in the spreadsheet.
1520            my $rowList = $sheet->{$genomeID};
1521            # Push this column's cell into the output list.
1522            push @retVal, $rowList->[$idx];
1523        }
1524        # Return the result.
1525        return \@retVal;
1526    }
1527    
1528  1;  1;

Legend:
Removed from v.1.20  
changed lines
  Added in v.1.21

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3