[Bio] / Sprout / SubsystemSaplingLoader.pm Repository:
ViewVC logotype

View of /Sprout/SubsystemSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Jan 19 21:43:05 2009 UTC (10 years, 9 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2009_02_05
Sapling support.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package SubsystemSaplingLoader;

    use strict;
    use Tracer;
    use ERDB;
    use base 'BaseSaplingLoader';

=head1 Sapling Subsystem Load Group Class

=head2 Introduction

The Subsystem Load Group includes all of the major subsystem-related tables.

=head3 new

    my $sl = SubsystemSaplingLoader->new($erdb, $options, @tables);

Construct a new SubsystemSaplingLoader object.

=over 4

=item erdb

[[SaplingPm]] object for the database being loaded.

=item options

Reference to a hash of command-line options.

=item tables

List of tables in this load group.

=back

=cut

sub new {
    # Get the parameters.
    my ($class, $erdb, $options) = @_;
    # Create the table list.
    my @tables = sort qw(Subsystem Describes Variant Includes Role IsClassFor
                         SubsystemClass IsSuperclassOf IsImplementedBy
                         MolecularMachine IsMachineOf MachineRole Uses
                         IsContainedIn IsRoleOf);
    # Create the BaseSaplingLoader object.
    my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
    # Return it.
    return $retVal;
}

=head2 Public Methods

=head3 Generate

    $sl->Generate();

Generate the data for the subsystem-related files.

=cut

sub Generate {
    # Get the parameters.
    my ($self) = @_;
    # Get the database object.
    my $erdb = $self->db();
    # Get the source object.
    my $fig = $self->source();
    # Is this the global section?
    if ($self->global()) {
        # Yes, build the subsystem framework.
        $self->GenerateSubsystems($fig, $erdb);
    } else {
        # Get the section ID.
        my $genomeID = $self->section();
        # Generate the subsystem date for this genome.
        $self->GenerateSubsystemData($fig, $erdb, $genomeID);
    }
}

=head3 GenerateSubsystems

    $sl->GenerateSubsystems($fig, $erdb);

Generate the subsystems, variants, and roles for this database. This
method concerns itself primarily with the genome-independent part of the
subsystem framework. This includes the following tables:

    Subsystem
    Describes
    Variant
    Includes
    Role
    IsClassFor
    SubsystemClass
    IsSuperclassOf

=over 4

=item fig

Source object from which the subsystem data will be extracted.

=item erdb

Database object for the Sapling database.

=back

=cut

sub GenerateSubsystems {
    # Get the parameters.
    my ($self, $fig, $erdb) = @_;
    # Get the subsystem hash for this Sapling instance. Its key list will be
    # the list of subsystems to put in the database.
    my $subHash = $erdb->SubsystemHash();
    # We'll track the various subsystem classes in here.
    my %subClassHash = ();
    # Loop through the subsystems.
    for my $subsystem (keys %$subHash) {
        # Compute this subsystem's ID.
        my $subsystemID = $erdb->SubsystemID($subsystem);
        Trace("Processing subsystem $subsystemID ($subsystem).") if T(3);
        # Get the FIG subsystem object.
        my $ssData = $fig->get_subsystem($subsystem);
        my ($subsystemName, $subID) = $self->AnalyzeSubsystemName($subsystem);
        # Get the subsystem properties.
        my $curator = $ssData->get_curator();
        my $description = $ssData->get_description();
        my $notes = $ssData->get_notes();
        my $version = $ssData->get_version();
        $self->PutE(Subsystem => $subID, curator => $curator,
                    description => $description, notes => $notes,
                    name => $subsystemName, version => $version);
        # Get this subsystem's roles.
        my @roles = $ssData->get_roles();
        # This will track the column number for the role.
        my $col = 0;
        # Loop through the roles.
        for my $role (@roles) {
            # Compute this role's ID and type.
            my ($roleName, $roleID) = $self->AnalyzeSubsystemName($role);
            my $hypothetical = ($role =~ /hypothetical/i ? 1 : 0);
            # Create its entity.
            $self->PutE(Role => $roleID, hypothetical => $hypothetical,
                        name => $roleName);
            # Connect it to the subsystem.
            $self->PutR(Includes => $subID, $roleID,
                        abbreviation => $ssData->get_abbr_for_role($role),
                        sequence => $col++)
        }
        # Put the subsystem in its classes.
        my $classes = $ssData->get_classification();
        my $class = pop @$classes;
        if (defined $class) {
            # Create the class record.
            $self->CreateClass($class);
            # Connect it to the subsystem.
            $self->PutR(IsClassFor => $class, $subID);
            # Move up the hierarchy.
            while (my $newClass = pop @$classes) {
                $self->CreateClass($newClass);
                $self->PutR(IsSuperclassOf => $newClass, $class);
                $class = $newClass;
            }
        }
        # Next come the variants. Variant data is sparse in the SEED. We
        # start by getting all the known variant codes.
        my %variants = map { $self->Starless($_) => '' } $ssData->get_variant_codes();
        # -1 and 0 are always present.
        $variants{'0'} = 'Subsystem functionality is incomplete.';
        $variants{'-1'} = 'Subsystem is not functional.';
        # Now get notes from any variants that have them.
        my $variantHash = $ssData->get_variants();
        for my $variant (keys %$variantHash) {
            $variants{$variant} = $variantHash->{$variant};
        }
        # Create the variants.
        for my $variant (keys %variants) {
            # The variant key is the subsystem ID plus the variant code.
            my $variantID = "$subID:$variant";
            # At this time, the role rule is not available, so we only have
            # the comment.
            $self->PutE(Variant => $variantID, comment => $variants{$variant},
                        role_rule => '');
            # Link the subsystem to the variant.
            $self->PutR(Describes => $subID, $variantID);
        }
    }
}

=head3 GenerateSubsystemData

    $sl->GenerateSubsystemData($fig, $erdb, $genomeID);

Generate the molecular machines and subsystem spreadsheet cells for this
database. This method concerns itself primarily with the genome-dependent
part of the subsystem framework. This includes the following tables.

    IsImplementedBy
    MolecularMachine
    IsMachineOf
    MachineRole
    Uses
    IsContainedIn
    IsRoleOf

=over 4

=item fig

Source object from which the subsystem data will be extracted.

=item erdb

Database object for the Sapling database.

=item genomeID

ID of the relevant genome.

=back

=cut

sub GenerateSubsystemData {
    # Get the parameters.
    my ($self, $fig, $erdb, $genomeID) = @_;
    # Get the subsystem hash for this Sapling instance. Its key list will be
    # the list of subsystems being put in the database.
    my $subHash = $erdb->SubsystemHash();    
    # Get the list of subsystems for this genome. The "1" indicates we want
    # all of them, including the ones for 0 and -1 variants. Note we grep
    # against the subsystem hash to exclude subsystems that are not flagged
    # for Sapling.
    my @subNames = grep { exists $subHash->{$_} }
                        $fig->subsystems_for_genome($genomeID, 1);
    # Loop through the named subsystems. Each one corresponds to a molecular
    # machine.
    for my $subName (@subNames) {
        $self->Track(MolecularMachines => $subName, 100);
        # Compute the subsystem ID.
        my (undef, $subID) = $self->AnalyzeSubsystemName($subName);
        # Get the subsystem object.
        my $ssData = $fig->get_subsystem($subName);
        # Create the molecular machine. To do that, we need the variant code
        # for this genome.
        my $gidx = $ssData->get_genome_index($genomeID);
        my $raw_variant_code = $ssData->get_variant_code($gidx);
        # Check for a leading asterisk. This means the variant assignment is not
        # curated.
        my $curated = ($raw_variant_code =~ /^\s*\*/ ? 0 : 1);
        # Clear any waste from the variant code.
        my $variant_code = $self->Starless($raw_variant_code);
        # Compute its type.
        my $variant_type = ($variant_code == 0 ? 'incomplete' :
                            $variant_code < 0 ? 'vacant' : 'normal');
        # Create the variant and machine IDs.
        my $variantID = "$subID:$variant_code";
        my $machineID = "$variantID:$genomeID";
        # Create the molecular machine and connect it to the genome and
        # subsystem.
        $self->PutE(MolecularMachine => $machineID, type => $variant_type,
                    curated => $curated);
        $self->PutR(IsImplementedBy => $variantID, $machineID);
        $self->PutR(Uses => $genomeID, $machineID);
        # Now we loop through the subsystem's roles, creating the MachineRoles.
        # Molecular machines function as spreadsheet rows; machine roles are
        # spreadsheet cells.
        my @roles = $ssData->get_roles();
        for my $role (@roles) {
            # Get this role's abbreviation.
            my $ridx = $ssData->get_role_index($role);
            my $abbr = $ssData->get_role_abbr($ridx);
            # Compute the role's ID.
            my (undef, $roleID) = $self->AnalyzeSubsystemName($role);
            # Create the machine-role ID.
            my $machineRoleID = "$machineID:$abbr";
            # Create the machine-role and connect it to the role and the
            # machine.
            $self->PutE(MachineRole => $machineRoleID);
            $self->PutR(IsMachineOf => $machineID, $machineRoleID);
            $self->PutR(IsRoleOf => $roleID, $machineRoleID);
            # Now get a list of the features in this cell.
            my @pegs = $ssData->get_pegs_from_cell($genomeID, $ridx);
            # Connect them to the cell.
            for my $peg (@pegs) {
                $self->PutR(IsContainedIn => $peg, $machineRoleID);
            }
        }
    }
}

=head3 CreateClass

    $sl->CreateClass($className);

Create a SubsystemClass record with the specified class name.

=over 4

=item className

Name of the subsystem classification to create.

=back

=cut

sub CreateClass {
    # Get the parameters.
    my ($self, $className) = @_;
    # Create the subsystem class record.
    $self->PutE(SubsystemClass => $className);
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3