[Bio] / Sprout / SproutSubsys.pm Repository:
ViewVC logotype

View of /Sprout/SproutSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Mon Nov 28 22:52:11 2005 UTC (14 years ago) by parrello
Branch: MAIN
Changes since 1.3: +62 -2 lines
Fixed some comments.
Added "get_diagrams".

#!/usr/bin/perl -w

package SproutSubsys;

    use strict;
    use Tracer;
    use PageBuilder;
    use FIG;
    use Sprout;

=head1 Sprout Subsystem Object

=head2 Introduction

This object emulates the capabilities of the FIG-style C<Subsystem> object, but
uses Sprout methods to retrieve the data. This object can be dropped in place of
the UnvSubsys object to create subsystem displays for the Sprout rather than the
SEED.

The structure created by the constructor contains the following data members.

=over 4

=item name

Name of the subsystem. This is needed for any further database accesses required.

=item curator

Name of the subsystem's official curator.

=item notes

General notes about the subsystem.

=item sprout

Sprout object for accessing the database. This is a genuine Sprout object, not
an SFXlate object.

=item genomeHash

Map of genome IDs to row indices.

=item genomes

List of [genomeID, variantCode] tuples in row order.

=item roleHash

Map of role IDs and abbreviations to column indices. In other words, plugging
either a full-blown role ID or its abbreviation into this hash will return
the role's column index.

=item roles

List of [roleID, abbreviation] tuples in column order.

=item reactionHash

Map of role IDs to a list of the reactions catalyzed by the role.

=item colorHash

Map of PEG IDs to cluster numbers. This is used to create color maps for
display of a subsystem's PEGs.

=back

=cut

#: Constructor SproutSubsys->new();

=head2 Public Methods

=head3 new

C<< my $sub = Subsystem->new($subName, $sprout); >>

Load the subsystem.

=over 4

=item subName

Name of the desired subsystem.

=item sprout

Sprout or SFXlate object for accessing the Sprout data store.

=back

=cut

sub new {
    # Get the parameters.
    my ($class, $subName, $sprout) = @_;
    # Insure we have a Sprout object.
    if (ref $sprout eq 'SFXlate') {
        $sprout = $sprout->{sprout};
    }
    # Declare the return value.
    my $retVal;
    # Get the subsystem's data fields.
    my ($curator, $notes) = $sprout->GetEntityValues('Subsystem', $subName, ['Subsystem(curator)',
                                                                             'Subsystem(notes)']);
    # Only proceed if we found the subsystem.
    if (defined $curator) {
        # Get the genome IDs and variant codes for the rows. The list returned
        # by GetAll will be a list of 2-tuples, each consisting of a genome ID
        # and a subsystem variant code.
        my @genomes = $sprout->GetAll(['ParticipatesIn'],
                                      'ParticipatesIn(to-link) = ? ORDER BY ParticipatesIn(variant-code), ParticipatesIn(from-link)',
                                      [$subName], ['ParticipatesIn(from-link)',
                                                   'ParticipatesIn(variant-code)']);
        # Create the genome ID directory. This is a hash that maps a genome ID to its
        # row index.
        my $idx = 0;
        my %genomeHash = map { $_->[0] => $idx++ } @genomes;
        # Get the role IDs and abbreviations. The list returned by GetAll will be
        # a list of 2-tuples, each consisting of a role ID and abbreviation. The
        # 2-tuples will be ordered by the spreadsheet column number.
        my @roles = $sprout->GetAll(['OccursInSubsystem', 'Role'],
                                    'OccursInSubsystem(to-link) = ? ORDER BY OccursInSubsystem(column-number)',
                                    [$subName], ['OccursInSubsystem(from-link)', 'Role(abbr)']);
        # Now we need to create the role ID directory and the reaction hash.
        # The role ID directory maps role IDs and their abbreviations to column numbers.
        # The reaction hash maps a role ID to a list of the IDs for the reactions it
        # catalyzes.
        my %roleHash = ();
        my %reactionHash = ();
        for ($idx = 0; $idx <= $#roles; $idx++) {
            # Get the role ID and abbreviation for this column's role.
            my ($roleID, $abbr) = @{$roles[$idx]};
            # Put them both in the role directory.
            $roleHash{$roleID} = $idx;
            $roleHash{$abbr} = $idx;
            # Get this role's reactions.
            my @reactions = $sprout->GetFlat(['Catalyzes'], 'Catalyzes(from-link) = ?',
                                             [$roleID], 'Catalyzes(to-link)');
            # Put them in the reaction hash.
            if (@reactions > 0) {
                $reactionHash{$roleID} = \@reactions;
            }
        }
        # Create the subsystem object.
        $retVal = {
                    # Name of the subsystem. This is needed for any further database
                    # accesses required.
                    name => $subName,
                    # Name of the subsystem's official curator.
                    curator => $curator,
                    # General notes about the subsystem.
                    notes => $notes,
                    # Sprout object for accessing the database.
                    sprout => $sprout,
                    # Map of genome IDs to row indices.
                    genomeHash => \%genomeHash,
                    # List of [genomeID, variantCode] tuples in row order.
                    genomes => \@genomes,
                    # Map of role IDs and abbreviations to column indices.
                    roleHash => \%roleHash,
                    # List of [roleID, abbreviation] tuples in column order.
                    roles => \@roles,
                    # Map of PEG IDs to cluster numbers.
                    colorHash => {},
                    # Map of role IDs to reactions.
                    reactionHash => \%reactionHash,
                };
        # Bless and return it.
        bless $retVal, $class;
    }
    return $retVal;
}

=head3 get_genomes

C<< my @genomeList = $sub->get_genomes(); >>

Return a list of the genome IDs for this subsystem. Each genome corresponds to a row
in the subsystem spreadsheet. Indexing into this list returns the ID of the genome
in the specified row.

=cut

sub get_genomes {
    # Get the parameters.
    my ($self) = @_;
    # Return a list of the genome IDs. The "genomes" member contains a 2-tuple
    # with the genome ID followed by the variant code. We only return the
    # genome IDs.
    my @retVal = map { $_->[0] } @{$self->{genomes}};
    return @retVal;
}

=head3 get_variant_code

C<< my $code = $sub->get_variant_code($gidx); >>

Return the variant code for the specified genome. Each subsystem has multiple
variants which involve slightly different chemical reactions, and each variant
has an associated variant code. When a genome is connected to the spreadsheet,
the subsystem variant used by the genome must be specified.

=over 4

=item gidx

Row index for the genome whose variant code is desired.

=item RETURN

Returns the variant code for the specified genome.

=back

=cut

sub get_variant_code {
    # Get the parameters.
    my ($self, $gidx) = @_;
    # Extract the variant code for the specified row index. It is the second
    # element of the tuple from the "genomes" member.
    my $retVal = $self->{genomes}->{$gidx}->[1];
    return $retVal;
}

=head3 get_curator

C<< my $userName = $sub->get_curator(); >>

Return the name of this subsystem's official curator.

=cut

sub get_curator {
    # Get the parameters.
    my ($self) = @_;
    # Return the curator member.
    return $self->{curator};
}

=head3 get_notes

C<< my $text = $sub->get_notes(); >>

Return the descriptive notes for this subsystem.

=cut

sub get_notes {
    # Get the parameters.
    my ($self) = @_;
    # Return the notes member.
    return $self->{notes};
}

=head3 get_roles

C<< my @roles = $sub->get_roles(); >>

Return a list of the subsystem's roles. Each role corresponds to a column
in the subsystem spreadsheet. The list entry at a specified position in
the list will contain the ID of that column's role.

=cut

sub get_roles {
    # Get the parameters.
    my ($self) = @_;
    # Return the list of role IDs. The role IDs are stored as the first
    # element of each 2-tuple in the "roles" member.
    my @retVal = map { $_->[0] } @{$self->{roles}};
    return @retVal;
}

=head3 get_reactions

C<< my $reactHash = $sub->get_reactions(); >>

Return a reference to a hash that maps each role ID to a list of the reactions
catalyzed by the role.

=cut

sub get_reactions {
    # Get the parameters.
    my ($self) = @_;
    # Return the reaction hash member.
    return $self->{reactionHash};
}

=head3 get_subset_namesC

C<< my @subsetNames = $sub->get_subset_namesC(); >>

Return a list of the names for all the column (role) subsets. Given a subset
name, you can use the L</get_subsetC_roles> method to get the roles in the
subset.

=cut

sub get_subset_namesC {
    # Get the parameters.
    my ($self) = @_;
    # Get the sprout object and use it to retrieve the subset names.
    my $sprout = $self->{sprout};
    my @subsets = $sprout->GetFlat(['HasRoleSubset'], 'HasRoleSubset(from-link) = ?',
                                   [$self->{name}], 'HasRoleSubset(to-link)');
    # The sprout subset names are prefixed by the subsystem name. We need to pull the
    # prefix off before we return the results. The prefixing character is a colon (:),
    # so we search for the last colon to get ourselves the true subset name.
    my @retVal = map { $_ =~ /:([^:]+)$/; $1 } @subsets;
    return @retVal;
}

=head3 get_role_abbr

C<< my $abbr = $sub->get_role_abbr($ridx); >>

Return the abbreviation for the role in the specified column. The abbreviation
is a shortened identifier that is not necessarily unique, but is more likely to
fit in a column heading.

=over 4

=item ridx

Column index for the role whose abbreviation is desired.

=item RETURN

Returns an abbreviated name for the role corresponding to the indexed column.

=back

=cut

sub get_role_abbr {
    # Get the parameters.
    my ($self, $ridx) = @_;
    # Return the role abbreviation. The abbreviation is the second element
    # in the 2-tuple for the specified column in the "roles" member.
    my $retVal = $self->{roles}->[$ridx]->[1];
    return $retVal;
}

=head3 get_role_index

C<< my $idx = $sub->get_role_index($role); >>

Return the column index for the role with the specified ID.

=over 4

=item role

ID (full name) or abbreviation of the role whose column index is desired.

=item RETURN

Returns the column index for the role with the specified name or abbreviation.

=back

=cut

sub get_role_index {
    # Get the parameters.
    my ($self, $role) = @_;
    # The role index is directly available from the "roleHash" member.
    my $retVal = $self->{roleHash}->{$role};
    return $retVal;
}

=head3 get_subsetC_roles

C<< my @roles = $sub->get_subsetC_roles($subname); >>

Return the names of the roles contained in the specified role (column) subset.

=over 4

=item subname

Name of the role subset whose roles are desired.

=item RETURN

Returns a list of the role names for the columns in the named subset.

=back

=cut

sub get_subsetC_roles {
    # Get the parameters.
    my ($self, $subname) = @_;
    # Get the sprout object. We need it to be able to get the subset data.
    my $sprout = $self->{sprout};
    # Convert the subset name to Sprout format. In Sprout, the subset name is
    # prefixed by the subsystem name in order to get a unique subset ID.
    my $subsetID = $self->{name} . ":$subname";
    # Get a list of the role names for this subset.
    my @roleNames = $sprout->GetFlat(['ConsistsOfRoles'], 'ConsistsOfRoles(from-link) = ?',
                                  [$subsetID], 'ConsistsOfRoles(to-link)');
    # Sort them by column number. We get the column number from the role hash.
    my $roleHash = $self->{roleHash};
    my @retVal = sort { $roleHash->{$a} <=> $roleHash->{$b} } @roleNames;
    # Return the sorted list.
    return @retVal;
}

=head3 get_genome_index

C<< my $idx = $sub->get_genome_index($genome); >>

Return the row index for the genome with the specified ID.

=over 4

=item genome

ID of the genome whose row index is desired.

=item RETURN

Returns the row index for the genome with the specified ID, or an undefined
value if the genome does not participate in the subsystem.

=back

=cut

sub get_genome_index {
    # Get the parameters.
    my ($self, $genome) = @_;
    # Get the genome row index from the "genomeHash" member.
    my $retVal = $self->{genomeHash}->{$genome};
    return $retVal;
}

=head3 get_cluster_number

C<< my $number = $sub->get_cluster_number($pegID); >>

Return the cluster number for the specified PEG, or C<-1> if the
cluster number for the PEG is unknown or it is not clustered.

The cluster number is read into the color hash by the
L</get_pegs_from_cell> method. If the incoming PEG IDs do not
come from the most recent cell retrievals, the information returned
will be invalid. This is a serious design flaw which needs to be
fixed soon.

=over 4

=item pegID

ID of the PEG whose cluster number is desired.

=item RETURN

Returns the appropriate cluster number.

=back

=cut
#: Return Type $;
sub get_cluster_number {
    # Get the parameters.
    my ($self, $pegID) = @_;
    # Declare the return variable.
    my $retVal = -1;
    # Check for a cluster number in the color hash.
    if (exists $self->{colorHash}->{$pegID}) {
        $retVal = $self->{colorHash}->{$pegID};
    }
    # Return the result.
    return $retVal;
}

=head3 get_pegs_from_cell

C<< my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); >>

Return a list of the peg IDs for the features in the specified spreadsheet cell.

=over 4

=item rowstr

Genome row, specified either as a row index or a genome ID.

=item colstr

Role column, specified either as a column index, a role name, or a role
abbreviation.

=item RETURN

Returns a list of PEG IDs. The PEGs in the list belong to the genome in the
specified row and perform the role in the specified column. If the indicated
row and column does not exist, returns an empty list.

=back

=cut

sub get_pegs_from_cell {
    # Get the parameters.
    my ($self, $rowstr, $colstr) = @_;
    # Get the sprout object for accessing the database.
    my $sprout = $self->{sprout};
    # We need to convert the incoming row and column identifiers. We need a
    # numeric column index and a character genome ID to create the ID for the
    # subsystem spreadsheet cell. First, the column index: note that our version
    # of "get_role_index" conveniently works for both abbreviations and full role IDs.
    my $colIdx = ($colstr =~ /^(\d+)$/ ? $colstr : $self->get_role_index($colstr));
    # Next the genome ID. In this case, we convert any number we find to a string.
    # This requires a little care to avoid a run-time error if the row number is
    # out of range.
    my $genomeID = $rowstr;
    if ($rowstr =~ /^(\d+)$/) {
        # Here we need to convert the row number to an ID. Insure the number is in
        # range. Note that if we do have a row number out of range, the genome ID
        # will be invalid, and our attempt to read from the database will return an
        # empty list.
        my $genomeList = $self->{genomes};
        if ($rowstr >= 0 && $rowstr < @{$genomeList}) {
            $genomeID = $genomeList->[$rowstr]->[0];
        }
    }
    # Construct the spreadsheet cell ID from the information we have.
    my $cellID = $self->{name} . ":$genomeID:$colIdx";
    # Get the list of PEG IDs and cluster numbers for the indicated cell.
    my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?',
                                  [$cellID], ['ContainsFeature(to-link)',
                                              'ContainsFeature(cluster-number)']);
    # Copy the pegs into the return list, and save the cluster numbers in the color hash.
    my @retVal = ();
    for my $pegEntry (@pegList) {
        my ($peg, $cluster) = @{$pegEntry};
        $self->{colorHash}->{$peg} = $cluster;
        push @retVal, $peg;
    }
    # Return the list. If the spreadsheet cell was empty or non-existent, we'll end
    # up returning an empty list.
    return @retVal;
}

=head3 get_diagrams

C<< my @list = $sub->get_diagrams(); >>

Return a list of the diagrams associated with this subsystem. Each diagram
is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
page_link, img_link]> where

=over 4

=item diagram_id

ID code for this diagram.

=item diagram_name

Displayable name of the diagram.

=item page_link

URL of an HTML page containing information about the diagram.

=item img_link

URL of an HTML page containing an image for the diagram.

=back

Note that the URLs are in fact for CGI scripts with parameters that point them
to the correct place. Though Sprout has diagram information in it, it has
no relationship to the diagrams displayed in SEED, so the work is done entirely
on the SEED side.

=cut

sub get_diagrams {
    # Get the parameters.
    my ($self) = @_;
    # Find the subsystem directory.
    my $subDir = Subsystem::get_dir_from_name($self->{name});
    # Get the diagram IDs.
    my @diagramIDs = Subsystem::GetDiagramIDs($subDir);
    # Create the return variable.
    my @retVal = ();
    # Loop through the diagram IDs.
    for my $diagramID (@diagramIDs) {
        # Get the diagram name.
        my $name = Subsystem::GetDiagramName($diagramID);
        # If a name was found, get the URLs.
        if ($name) {
            my ($link, $imgLink) = Subsystem::ComputeDiagramURLs($self->{name},
                                                                 $diagramID);
            push @retVal, [$diagramID, $name, $link, $imgLink];
        }
    }
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3