[Bio] / FigKernelPackages / SS.pm Repository:
ViewVC logotype

View of /FigKernelPackages/SS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Tue Nov 3 21:20:07 2009 UTC (10 years, 5 months ago) by parrello
Branch: MAIN
Changes since 1.4: +24 -0 lines
Error handling restructure.

#!/usr/bin/perl -w
use strict;

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
package SS;

    use strict;
    use ERDB;
    use Tracer;
    use SeedUtils;
    use ServerThing;

=head1 Subsystem Server Function Object

This file contains the functions and utilities used by the Subsystem Server
(B<subsystem_server_sapling.cgi>). The L</Primary Methods> represent function
calls direct to the server. These all have a signature similar to the following.

    my $document = $ssObject->function_name($args);

where C<$ssObject> is an object created by this module, 
C<$args> is a parameter structure, and C<function_name> is the Subsystem
Server function name. The output is a structure, generally a hash reference, but
sometimes a string or a list reference.

=head2 Special Methods

=head3 new

    my $ssObject = SS->new();

Create a new Subsystem Server function object. The server function object
contains a pointer to a [[SaplingPm]] object, and is used to invoke the
server functions.

=cut

sub new {
    my ($class) = @_;
    # Create the sapling object.
    my $sap = ERDB::GetDatabase('Sapling');
    # Create the server object.
    my $retVal = { db => $sap };
    # Bless and return it.
    bless $retVal, $class;
    return $retVal;
}


=head2 Primary Methods

=head3 methods

    my $document = $coObject->methods();

Return a list of the methods allowed on this object.

=cut

use constant METHODS => [qw(is_in_subsystem
                            is_in_subsystem_with
                            all_subsystems
                            subsystem_spreadsheet
                            pegs_in_subsystems
                            pegs_implementing_roles
                            metabolic_reconstruction
                        )];

sub methods {
    # Get the parameters.
    my ($self) = @_;
    # Return the result.
    return METHODS;
}

=head3 is_in_subsystem

    my $document = $ssObject->is_in_subsystem($args);

Return the subsystem and role for each specified feature.

=over 4

=item args

Reference to either (1) a hash with a key of C<-ids> whose value is a list
of FIG feature IDs or (2) a list of FIG feature IDs.

=item RETURN

Returns a reference to a list of 3-tuples. Each 3-tuple consists of a subsystem
ID, a role ID, and the ID of a feature from the input list.

=back

=cut

sub is_in_subsystem {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the sapling database.
    my $sapling = $self->{db};
    # Declare the return variable.
    my $retVal;
    # Convert a list to a hash.
    if (ref $args ne 'HASH') {
        $args = { -ids => $args };
    }
    # Get the fig IDs from the parameters.
    my $ids = ServerThing::GetIdList(-ids => $args);
    foreach my $fid (@$ids) {
        my @resultRows = $sapling->GetAll("Subsystem Includes Role IsRoleOf MachineRole Contains Feature", 
                                          'Feature(id) = ?', [$fid], 
                                          [qw(Subsystem(id) Role(id) Feature(id))]);
        push (@$retVal, \@resultRows);
    }
    # Return the result.
    return $retVal;
}

=head3 is_in_subsystem_with

    my $document = $ssObject->is_in_subsystem_with($args);

Given a list of features, return the other features in the same subsystem
with it. For each other feature returned, its role, functional
assignment, subsystem variant, and subsystem ID will be returned as well.

=over 4

=item args

Reference to either (1) a hash with a key of C<-ids> whose value is a list
of FIG feature IDs or (2) a list of FIG feature IDs.

=item RETURN

Returns a reference to a list of lists. Each sub-list contains 6-tuples relating
to a single incoming feature ID. Each 6-tuple consists of a subsystem
ID, a variant ID, the incoming feature ID, the other feature ID, the other feature's
functional assignment, and the other feature's role in the subsystem.

=back

=cut

sub is_in_subsystem_with {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the sapling database.
    my $sapling = $self->{db};
    # Declare the return variable.
    my $retVal;
    # Convert a list to a hash.
    if (ref $args ne 'HASH') {
        $args = { -ids => $args };
    }
    # Get the fig IDs from the parameters.
    my $ids = ServerThing::GetIdList(-ids => $args);
    foreach my $fid (@$ids) {
        my @resultRows = $sapling->GetAll("Feature IsContainedIn MachineRole IsRoleFor MolecularMachine Implements Variant IsDescribedBy Subsystem AND MolecularMachine IsMachineOf MachineRole2 Contains Feature2 AND MachineRole2 HasRole Role", 
                                          'Feature(id) = ? ', 
                                          [$fid], [qw(Subsystem(id) 
                                                      Variant(id)
                                                      Feature(id)
                                                      Feature2(id)
                                                      Feature2(function)
                                                      Role(id))]);
        push (@$retVal, \@resultRows);
    }
    # Return the result.
    return $retVal;
}

=head3 all_subsystems

    my $document = $ssObject->all_subsystems($args);

Return a list of all subsystems in the system. For each subsystem, this
method will return the ID, curator, and roles.

=over 4

=item args

This function has no parameters.

=item RETURN

Returns a list of 3-tuples. Each 3-tuple will consist of a subsystem ID, a curator
name, and a role ID.

=back

=cut

sub all_subsystems {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the spaling database.
    my $sapling = $self->{db};
    # Read the subsystem data from the database.
    my @retVal = $sapling->GetAll("Subsystem Includes Role", 
                                  'ORDER BY Subsystem(id)', [],
                                  [qw(Subsystem(id) Subsystem(curator) 
                                      Role(id))]);
    # Return the result.
    return \@retVal;
}

=head3 subsystem_spreadsheet

    my $document = $ssObject->subsystem_spreadsheet($args);

This method takes a list of subsystem IDs, and for each one returns a
list of the features in the subsystem. For each feature, it will include
the feature's functional assignment, the subsystem name and variant
(spreadsheet row), and its role (spreadsheet column).

=over 4

=item args

Reference to a hash that either maps C<-ids> to a list of subsystem IDs, or a
reference to a list of subsystem IDs.

=item RETURN

Returns a list of 5-tuples. Each tuple contains a subsystem ID, a variant ID, a
feature ID, the feature's functional assignment, and the feature's role in the
subsystem.

=back

=cut

sub subsystem_spreadsheet {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the sapling database.
    my $sapling = $self->{db};
    # Declare the return variable.
    my $retVal;
    # Convert a list to a hash.
    if (ref $args ne 'HASH') {
        $args = { -ids => $args };
    }
    # Get the list of subsystem IDs.
    my $ids = ServerThing::GetIdList(-ids => $args);
    # Loop through the subsystem IDs.
    foreach my $subsysName (@$ids) {
        # Normalize the subsystem ID.
        my $subsysID = $sapling->SubsystemID($subsysName);
        # Get the subsystem's spreadsheet data.
        my @resultRows = $sapling->GetAll("Subsystem Describes Variant IsImplementedBy MolecularMachine IsMachineOf MachineRole Contains Feature AND MachineRole HasRole Role", 
                                          'Subsystem(id) = ? ORDER BY Feature(id)', 
                                          [$subsysID], [qw(Subsystem(id)
                                                           Variant(id)
                                                           Feature(id)
                                                           Feature(function)
                                                           Role(id))]);
        push (@$retVal, \@resultRows);
    }
    # Return the result.
    return $retVal;
}

=head3 pegs_in_subsystems

    my $document = $ssObject->pegs_in_subsystems($args);

This method takes a list of genomes and a list of subsystems and returns
a list of the roles represented in each genome/subsystem pair.

=over 4

=item args

Either (1) a reference to a hash with the keys C<-genomes> and C<-subsystems>,
where C<-genomes> maps to a list of genome IDs and C<-subsystems> maps to a
list of subsystem IDs, or (2) a reference to a list of lists, where the first
is a list of genome IDs and the second is a list of subsystem IDs.

=item RETURN

Returns a list of 2-tuples. Each tuple consists of a subsystem ID and a second
2-tuple that contains a role ID and a reference to a list of the feature IDs for
that role.

=back

=cut

sub pegs_in_subsystems {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the sapling database.
    my $sapling = $self->{db};
    # Get the sapling subsystem object.
    require SaplingSubsys;
    # Declare the return variable.
    my $retVal;
    # Convert a list to a hash.
    if (ref $args ne 'HASH') {
        $args = { -genomes => $args->[0], -subsystems => $args->[1] };
    }
    # Get the list of genome IDs.
    my $genomes = ServerThing::GetIdList(-genomes => $args);
    # Get the list of subsystem IDs.
    my $subs = ServerThing::GetIdList(-subsystems => $args);
    # Loop through the subsystems.
    foreach my $sub (@{$subs}) {
        # Normalize the subsystem ID.
        my $subID = $sapling->SubsystemID($sub);
        # Get the subsystem spreadsheet in memory.
        my $ss = SaplingSubsys->new($subID, $sapling);
        # Loop through the genomes.
        foreach my $g (@{$genomes}) {
            my @roles = $ss->get_roles_for_genome($g, 1);
            foreach my $role (@roles) {
                push (@$retVal, [$sub, $role]); 
            }
        }
    }
    # Return the result.
    return $retVal;
}

# Synonym for "pegs_in_subsystems" provided for backward compatability.
sub pegs_in_subsystem {
    return pegs_in_subsystems(@_);
}

=head3 pegs_implementing_roles

    my $document = $ssObject->pegs_implementing_roles($args);

Given a subsystem and a list of roles, return a list of the subsystem's
features for each role.

=over 4

=item args

Reference to either (1) a hash that maps C<-subsystem> to a subsystem ID and
C<-roles> to a list of roles or (2) a 2-tuple containing a subsystem ID followed
by a reference to a list of roles in that subsystem.

=item RETURN

Returns a list of 2-tuples. Each tuple consists of a role and a reference to a
list of the features in that role.

=back

=cut

sub pegs_implementing_roles {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the sapling database.
    my $sapling = $self->{db};
    # Get the sapling subsystem object.
    require SaplingSubsys;
    # Declare the return variable.
    my $retVal;
    # Convert a list to a hash.
    if (ref $args ne 'HASH') {
        $args = { -subsystem => $args->[0], -roles => $args->[1] };
    }
    # Get the subsystem ID.
    my $subsystem = $args->{-subsystem};
    # If there is no subsystem ID, it's an error.
    if (! defined $subsystem) {
        Confess("Subsystem ID not specified.");
    } else {
        # Normalize the subsystem ID.
        my $subsystemID = $sapling->SubsystemID($subsystem);
        # Get the list of roles.
        my $roles = ServerThing::GetIdList(-roles => $args);
        my $ss = SaplingSubsys->new($subsystemID, $sapling);
        foreach my $role (@$roles) {
            my @pegs = $ss->pegs_for_role($role);
            push (@$retVal, [$role, \@pegs]); 
        }
    }
    # Return the result.
    return $retVal;
}


=head3 metabolic_reconstruction

    my $document = $ssObject->metabolic_reconstruction($args);

This method will find for each subsystem, the subsystem variant that contains a
maximal subset of the roles in an incoming list, and output the ID of the
variant and a list of the roles in it.

=over 4

=item args

Reference to (1) a list of role descriptors or (2) a hash mapping the key C<-roles>
to a list of role descriptors. A role descriptor is a 2-tuple consisting of the
role ID followed by an arbitrary ID of the caller's choosing.

=item RETURN

Returns a list of tuples, each containing a variant ID, a role ID, and optionally a
caller-provided ID for the role.

=back

=cut

sub metabolic_reconstruction {
    # Get the parameters.
    my ($self, $args) = @_;
    # Get the sapling database.
    my $sapling = $self->{db};
    # Declare the return variable.
    my $retVal = [];
    # Convert a list to a hash.
    if (ref $args ne 'HASH') {
        $args = { -roles => $args };
    }
    # This counter will be used to generate user IDs for roles without them.
    my $next = 1000;
    # Get the list of roles.
    my $id_roles = ServerThing::GetIdList(-roles => $args);
    my @id_roles1 = map { (ref $_ ? $_ : [$_, "FR" . ++$next]) } @$id_roles;

    my @id_roles = ();
    foreach my $tuple (@id_roles1)
    {
	my($function,$id) = @$tuple;
	foreach my $role (split(/(; )|( [\]\@] )/,$function))
	{
	    push(@id_roles,[$role,$id]);
	}
    }

    my %big;
    my $id_display = 1;
    map {push(@{$big{$_->[0]}}, $_->[1])} @id_roles;
    my @resultRows = $sapling->GetAll("Subsystem Includes Role", 
                            'ORDER BY Subsystem(id), Includes(sequence)', [], 
                            [qw(Subsystem(id) Role(id) Includes(abbreviation))]);
    my %ss_roles;
    foreach my $row (@resultRows) {
        my ($sub, $role, $abbr) = @$row;
        $ss_roles{$sub}->{$role} = $abbr;
    }
    foreach my $sub (keys %ss_roles) {
        my $roles = $ss_roles{$sub};

        my @abbr = map{$roles->{$_}} grep { $big{$_}} keys %$roles;
        my $set =  join(" ",  @abbr);
        if (@abbr > 0) {
            my ($variant, $size) = $self->get_max_subset($sub, $set);
            if ($variant) {
                foreach my $role (keys %$roles) {
                    if ($id_display) {
                        foreach my $id (@{$big{$role}}) {
                            push (@$retVal, [$variant, $role, $id]);
                        }
                    } else {
                        push (@$retVal, [$variant, $role]);
                    }
                }
            }
        }
    }
    # Return the result.
    return $retVal;
}

=head2 Internal Utility Methods

=head3 get_max_subset

    my ($max_variant, $max_size) = $ssObject->get_max_subset($sub, $setA);

Given a subsystem ID and a role rule, return the ID of the variant for
the subsystem that matches the most roles in the rule and the number of
roles matched.

=over 4

=item sub

Name (ID) of the subsystem whose variants are to be examined.

=item setA

A space-delimited list of role abbreviations, lexically ordered. This provides
a unique specification of the roles in the set.

=item RETURN

Returns a 2-element list consisting of the ID of the variant found and the number
of roles matched.

=back

=cut

sub get_max_subset {
    my ($self, $sub, $setA) = @_;
    my $sapling = $self->{db};
    my $max_size = 0;
    my $max_set;
    my $max_variant;
    my %set_hash;
    my $qh = $sapling->Get("Subsystem Describes Variant", 'Subsystem(id) = ? AND Variant(type) = ?', [$sub, 'normal']);
    while (my $resultRow = $qh->Fetch()) {
        my @variantRoleRule = $resultRow->Value('Variant(role-rule)');
        my ($variantCode) = $resultRow->Value('Variant(code)');
        my $variantId = $sub.":".$variantCode;
        foreach my $setB (@variantRoleRule) {
                    my $size = is_A_a_superset_of_B($setA, $setB);
                    if ($size  && $size > $max_size) {
                            $max_size = $size;
                            $max_set = $setB;
                            $max_variant = $variantId;
                    }
        }
    }
    #if ($max_size) {
            #print STDERR "Success $max_variant, $max_set\n";
    #}
    return($max_variant, $max_size);
}


=head3 is_A_a_superset_of_B

    my $size = SS::is_A_a_superset_of_B($a, $b);

This method takes as input two role rules, and returns 0 if the first
role rule is NOT a superset of the second; otherwise, it returns the size
of the second rule. A role rule is a space-delimited list of role
abbreviations in lexical order. This provides a unique identifier for a
set of roles in a subsystem.

=over 4

=item a

First role rule.

=item b

Second role rule.

=item RETURN

Returns 0 if the first rule is NOT a superset of the second and the size of the
second rule if it is. As a result, if the first rule IS a superset, this method
will evaluate to TRUE, and to FALSE otherwise.

=back

=cut

sub is_A_a_superset_of_B {
    my ($a, $b) = @_;
    my @a = split(" ", $a);
    my @b = split(" ", $b);
    if (@b > @a) {
            return(0);
    }
    my %given;
    map { $given{$_} = 1} @a;
    map { if (! $given{$_}) {return 0}} split(" ", $b);
    my $l = scalar(@b);
    return scalar(@b);
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3