#!/usr/bin/perl -w package SproutSubsys; use strict; use Tracer; use PageBuilder; use FIG; use Sprout; =head1 Sprout Subsystem Object =head2 Introduction This object emulates the capabilities of the FIG-style C object, but uses Sprout methods to retrieve the data. This object can be dropped in place of the UnvSubsys object to create subsystem displays for the Sprout rather than the SEED. The structure created by the constructor contains the following data members. =over 4 =item name Name of the subsystem. This is needed for any further database accesses required. =item curator Name of the subsystem's official curator. =item notes General notes about the subsystem. =item sprout Sprout object for accessing the database. This is a genuine Sprout object, not an SFXlate object. =item genomeHash Map of genome IDs to row indices. =item genomes List of [genomeID, variantCode] tuples in row order. =item roleHash Map of role IDs and abbreviations to column indices. In other words, plugging either a full-blown role ID or its abbreviation into this hash will return the role's column index. =item roles List of [roleID, abbreviation] tuples in column order. =item reactionHash Map of role IDs to a list of the reactions catalyzed by the role. =item colorHash Map of PEG IDs to cluster numbers. This is used to create color maps for display of a subsystem's PEGs. =back =cut #: Constructor SproutSubsys->new(); =head2 Public Methods =head3 new C<< my $sub = Subsystem->new($subName, $sprout); >> Load the subsystem. =over 4 =item subName Name of the desired subsystem. =item sprout Sprout or SFXlate object for accessing the Sprout data store. =back =cut sub new { # Get the parameters. my ($class, $subName, $sprout) = @_; # Insure we have a Sprout object. if (ref $sprout eq 'SFXlate') { $sprout = $sprout->{sprout}; } # Declare the return value. my $retVal; # Get the subsystem's data fields. my ($curator, $notes) = $sprout->GetEntityValues('Subsystem', $subName, ['Subsystem(curator)', 'Subsystem(notes)']); # Only proceed if we found the subsystem. if (defined $curator) { # Get the genome IDs and variant codes for the rows. The list returned # by GetAll will be a list of 2-tuples, each consisting of a genome ID # and a subsystem variant code. my @genomes = $sprout->GetAll(['ParticipatesIn'], 'ParticipatesIn(to-link) = ? ORDER BY ParticipatesIn(variant-code), ParticipatesIn(from-link)', [$subName], ['ParticipatesIn(from-link)', 'ParticipatesIn(variant-code)']); # Create the genome ID directory. This is a hash that maps a genome ID to its # row index. my $idx = 0; my %genomeHash = map { $_->[0] => $idx++ } @genomes; # Get the role IDs and abbreviations. The list returned by GetAll will be # a list of 2-tuples, each consisting of a role ID and abbreviation. The # 2-tuples will be ordered by the spreadsheet column number. my @roles = $sprout->GetAll(['OccursInSubsystem', 'Role'], 'OccursInSubsystem(to-link) = ? ORDER BY OccursInSubsystem(column-number)', [$subName], ['OccursInSubsystem(from-link)', 'Role(abbr)']); # Now we need to create the role ID directory and the reaction hash. # The role ID directory maps role IDs and their abbreviations to column numbers. # The reaction hash maps a role ID to a list of the IDs for the reactions it # catalyzes. my %roleHash = (); my %reactionHash = (); for ($idx = 0; $idx <= $#roles; $idx++) { # Get the role ID and abbreviation for this column's role. my ($roleID, $abbr) = @{$roles[$idx]}; # Put them both in the role directory. $roleHash{$roleID} = $idx; $roleHash{$abbr} = $idx; # Get this role's reactions. my @reactions = $sprout->GetFlat(['Catalyzes'], 'Catalyzes(from-link) = ?', [$roleID], 'Catalyzes(to-link)'); # Put them in the reaction hash. if (@reactions > 0) { $reactionHash{$roleID} = \@reactions; } } # Create the subsystem object. $retVal = { # Name of the subsystem. This is needed for any further database # accesses required. name => $subName, # Name of the subsystem's official curator. curator => $curator, # General notes about the subsystem. notes => $notes, # Sprout object for accessing the database. sprout => $sprout, # Map of genome IDs to row indices. genomeHash => \%genomeHash, # List of [genomeID, variantCode] tuples in row order. genomes => \@genomes, # Map of role IDs and abbreviations to column indices. roleHash => \%roleHash, # List of [roleID, abbreviation] tuples in column order. roles => \@roles, # Map of PEG IDs to cluster numbers. colorHash => {}, # Map of role IDs to reactions. reactionHash => \%reactionHash, }; # Bless and return it. bless $retVal, $class; } return $retVal; } =head3 get_genomes C<< my @genomeList = $sub->get_genomes(); >> Return a list of the genome IDs for this subsystem. Each genome corresponds to a row in the subsystem spreadsheet. Indexing into this list returns the ID of the genome in the specified row. =cut sub get_genomes { # Get the parameters. my ($self) = @_; # Return a list of the genome IDs. The "genomes" member contains a 2-tuple # with the genome ID followed by the variant code. We only return the # genome IDs. my @retVal = map { $_->[0] } @{$self->{genomes}}; return @retVal; } =head3 get_variant_code C<< my $code = $sub->get_variant_code($gidx); >> Return the variant code for the specified genome. Each subsystem has multiple variants which involve slightly different chemical reactions, and each variant has an associated variant code. When a genome is connected to the spreadsheet, the subsystem variant used by the genome must be specified. =over 4 =item gidx Row index for the genome whose variant code is desired. =item RETURN Returns the variant code for the specified genome. =back =cut sub get_variant_code { # Get the parameters. my ($self, $gidx) = @_; # Extract the variant code for the specified row index. It is the second # element of the tuple from the "genomes" member. my $retVal = $self->{genomes}->{$gidx}->[1]; return $retVal; } =head3 get_curator C<< my $userName = $sub->get_curator(); >> Return the name of this subsystem's official curator. =cut sub get_curator { # Get the parameters. my ($self) = @_; # Return the curator member. return $self->{curator}; } =head3 get_notes C<< my $text = $sub->get_notes(); >> Return the descriptive notes for this subsystem. =cut sub get_notes { # Get the parameters. my ($self) = @_; # Return the notes member. return $self->{notes}; } =head3 get_roles C<< my @roles = $sub->get_roles(); >> Return a list of the subsystem's roles. Each role corresponds to a column in the subsystem spreadsheet. The list entry at a specified position in the list will contain the ID of that column's role. =cut sub get_roles { # Get the parameters. my ($self) = @_; # Return the list of role IDs. The role IDs are stored as the first # element of each 2-tuple in the "roles" member. my @retVal = map { $_->[0] } @{$self->{roles}}; return @retVal; } =head3 get_reactions C<< my $reactHash = $sub->get_reactions(); >> Return a reference to a hash that maps each role ID to a list of the reactions catalyzed by the role. =cut sub get_reactions { # Get the parameters. my ($self) = @_; # Return the reaction hash member. return $self->{reactionHash}; } =head3 get_subset_namesC C<< my @subsetNames = $sub->get_subset_namesC(); >> Return a list of the names for all the column (role) subsets. Given a subset name, you can use the L method to get the roles in the subset. =cut sub get_subset_namesC { # Get the parameters. my ($self) = @_; # Get the sprout object and use it to retrieve the subset names. my $sprout = $self->{sprout}; my @subsets = $sprout->GetFlat(['HasRoleSubset'], 'HasRoleSubset(from-link) = ?', [$self->{name}], 'HasRoleSubset(to-link)'); # The sprout subset names are prefixed by the subsystem name. We need to pull the # prefix off before we return the results. The prefixing character is a colon (:), # so we search for the last colon to get ourselves the true subset name. my @retVal = map { $_ =~ /:([^:]+)$/; $1 } @subsets; return @retVal; } =head3 get_role_abbr C<< my $abbr = $sub->get_role_abbr($ridx); >> Return the abbreviation for the role in the specified column. The abbreviation is a shortened identifier that is not necessarily unique, but is more likely to fit in a column heading. =over 4 =item ridx Column index for the role whose abbreviation is desired. =item RETURN Returns an abbreviated name for the role corresponding to the indexed column. =back =cut sub get_role_abbr { # Get the parameters. my ($self, $ridx) = @_; # Return the role abbreviation. The abbreviation is the second element # in the 2-tuple for the specified column in the "roles" member. my $retVal = $self->{roles}->[$ridx]->[1]; return $retVal; } =head3 get_role_index C<< my $idx = $sub->get_role_index($role); >> Return the column index for the role with the specified ID. =over 4 =item role ID (full name) or abbreviation of the role whose column index is desired. =item RETURN Returns the column index for the role with the specified name or abbreviation. =back =cut sub get_role_index { # Get the parameters. my ($self, $role) = @_; # The role index is directly available from the "roleHash" member. my $retVal = $self->{roleHash}->{$role}; return $retVal; } =head3 get_subsetC_roles C<< my @roles = $sub->get_subsetC_roles($subname); >> Return the names of the roles contained in the specified role (column) subset. =over 4 =item subname Name of the role subset whose roles are desired. =item RETURN Returns a list of the role names for the columns in the named subset. =back =cut sub get_subsetC_roles { # Get the parameters. my ($self, $subname) = @_; # Get the sprout object. We need it to be able to get the subset data. my $sprout = $self->{sprout}; # Convert the subset name to Sprout format. In Sprout, the subset name is # prefixed by the subsystem name in order to get a unique subset ID. my $subsetID = $self->{name} . ":$subname"; # Get a list of the role names for this subset. my @roleNames = $sprout->GetFlat(['ConsistsOfRoles'], 'ConsistsOfRoles(from-link) = ?', [$subsetID], 'ConsistsOfRoles(to-link)'); # Sort them by column number. We get the column number from the role hash. my $roleHash = $self->{roleHash}; my @retVal = sort { $roleHash->{$a} <=> $roleHash->{$b} } @roleNames; # Return the sorted list. return @retVal; } =head3 get_genome_index C<< my $idx = $sub->get_genome_index($genome); >> Return the row index for the genome with the specified ID. =over 4 =item genome ID of the genome whose row index is desired. =item RETURN Returns the row index for the genome with the specified ID, or an undefined value if the genome does not participate in the subsystem. =back =cut sub get_genome_index { # Get the parameters. my ($self, $genome) = @_; # Get the genome row index from the "genomeHash" member. my $retVal = $self->{genomeHash}->{$genome}; return $retVal; } =head3 get_cluster_number C<< my $number = $sub->get_cluster_number($pegID); >> Return the cluster number for the specified PEG, or C<-1> if the cluster number for the PEG is unknown or it is not clustered. The cluster number is read into the color hash by the L method. If the incoming PEG IDs do not come from the most recent cell retrievals, the information returned will be invalid. This is a serious design flaw which needs to be fixed soon. =over 4 =item pegID ID of the PEG whose cluster number is desired. TODO: items =back =cut #: Return Type $; sub get_cluster_number { # Get the parameters. my ($self, $pegID) = @_; # Declare the return variable. my $retVal = -1; # Check for a cluster number in the color hash. if (exists $self->{colorHash}->{$pegID}) { $retVal = $self->{colorHash}->{$pegID}; } # Return the result. return $retVal; } =head3 get_pegs_from_cell C<< my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); >> Return a list of the peg IDs for the features in the specified spreadsheet cell. =over 4 =item rowstr Genome row, specified either as a row index or a genome ID. =item colstr Role column, specified either as a column index, a role name, or a role abbreviation. =item RETURN Returns a list of PEG IDs. The PEGs in the list belong to the genome in the specified row and perform the role in the specified column. If the indicated row and column does not exist, returns an empty list. =back =cut sub get_pegs_from_cell { # Get the parameters. my ($self, $rowstr, $colstr) = @_; # Get the sprout object for accessing the database. my $sprout = $self->{sprout}; # We need to convert the incoming row and column identifiers. We need a # numeric column index and a character genome ID to create the ID for the # subsystem spreadsheet cell. First, the column index: note that our version # of "get_role_index" conveniently works for both abbreviations and full role IDs. my $colIdx = ($colstr =~ /^(\d+)$/ ? $colstr : $self->get_role_index($colstr)); # Next the genome ID. In this case, we convert any number we find to a string. # This requires a little care to avoid a run-time error if the row number is # out of range. my $genomeID = $rowstr; if ($rowstr =~ /^(\d+)$/) { # Here we need to convert the row number to an ID. Insure the number is in # range. Note that if we do have a row number out of range, the genome ID # will be invalid, and our attempt to read from the database will return an # empty list. my $genomeList = $self->{genomes}; if ($rowstr >= 0 && $rowstr < @{$genomeList}) { $genomeID = $genomeList->[$rowstr]->[0]; } } # Construct the spreadsheet cell ID from the information we have. my $cellID = $self->{name} . ":$genomeID:$colIdx"; # Get the list of PEG IDs and cluster numbers for the indicated cell. my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?', [$cellID], ['ContainsFeature(to-link)', 'ContainsFeature(cluster-number)']); # Copy the pegs into the return list, and save the cluster numbers in the color hash. my @retVal = (); for my $pegEntry (@pegList) { my ($peg, $cluster) = @{$pegEntry}; $self->{colorHash}->{$peg} = $cluster; push @retVal, $peg; } # Return the list. If the spreadsheet cell was empty or non-existent, we'll end # up returning an empty list. return @retVal; } 1;