[Bio] / Sprout / LoadComplexData.pl Repository:
ViewVC logotype

View of /Sprout/LoadComplexData.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Sep 21 21:05:15 2011 UTC (7 years, 5 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, mgrast_release_3_1_2, rast_rel_2011_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_10262011, HEAD
Changes to support complexes.

#!/usr/bin/perl -w

# -*- perl -*-
#
# Copyright (c) 2003-2011 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

=head1 LoadComplexData

This script loads the data for model complexes into the Sapling. The data
is found in four tab-delimited files in a single directory. The directory name 
is specified as a positional parameter.

=over 4

=item complex

This file contains two columns. The first is a complex ID and the second is
a complex name. Most complexes do not have a name.

=item complex2role

This file contains two columns. The first is a complex ID and the second is a
role name. This file specifies the IsTriggeredBy relationship between complexes
and roles.

=item exemplars.for.role

This file contains two columns. The first is a FIG feature ID and the second is
a role name. This file specifies which features are exemplars of a particular
role.

=item reaction2complex

This file contains two columns. The first is a reaction ID and the second is a
complex ID. This file specifies which reactions belong to each complex. If the
reactions ID is missing from a record in this file the record is ignored.

=back

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item dbname

Name of the Sapling database to use. This option is generally only useful for debugging.

=item dbhost

SQL host for the Sapling database to use. This option is generally only useful for debugging.

=item dbport

Database port to use for the Sapling database. This option is generally only useful for debugging.

=back

=cut

use strict;
use Tracer;
use Stats;
use Sapling;

# Parse the command line.
my ($options, @parameters) = StandardSetup([qw(ERDB Sapling)],
    {dbname => [$FIG_Config::saplingDB, "name of the Sapling database to use"],
     dbhost => ["", "host containing the Sapling database"],
     dbport => ["", "port for connecting to the Sapling database"],
    },
    "", @ARGV);

# Create the statistics object.
my $stats = Stats->new();
eval {
    # This will be our input file handle.
    my $ih;
    # Get the input file directory.
    my $inDirectory = $parameters[0];
    if (! $inDirectory) {
        Confess("No input directory specified.");
    } elsif (! -d $inDirectory) {
        Confess("Input directory $inDirectory not found.");
    } else {
        # Connect to the Sapling.
        my $sap = Sapling->new(dbName => $options->{dbname}, dbhost => $options->{dbhost},
            port => $options->{dbport});
        # First we create the complexes. We need to clear the complex table and then
        # load it from the "complex" file.
        CleanTable($sap, "Complex", $stats);
        CleanTable($sap, "ComplexName", $stats);
        $ih = Open(undef, "<$inDirectory/complex");
        # Loop through the complex file, adding the complexes.
        while (! eof $ih) {
            # Get the complex ID and name.
            my ($complexID, $name) = Tracer::GetLine($ih);
            $stats->Add(linesRead => 1);
            # The name is optional, so we need to see if it's there.
            my @nameParms;
            if ($name) {
                push @nameParms, name => [$name];
            }
            $sap->InsertObject('Complex', id => $complexID, @nameParms);
            $stats->Add(Complex => 1);
        }
        # Now we connect the complexes to the reactions.
        CleanTable($sap, "IsSetOf", $stats);
        $ih = Open(undef, "<$inDirectory/reaction2complex");
        # Loop through the reaction2complex file. Note that we have to worry
        # about lines with missing reaction IDs.
        while (! eof $ih) {
            my ($reaction, $complex) = Tracer::GetLine($ih);
            $stats->Add(linesRead => 1);
            if (! $reaction) {
                $stats->Add(noReaction => 1);
            } else {
                $sap->InsertObject('IsSetOf', from_link => $complex, 
                    to_link => $reaction, optional => 0);
                $stats->Add(IsSetOf => 1);
            }
        }
        # The next piece of the puzzle is to connect complexes to roles.
        CleanTable($sap, "IsTriggeredBy", $stats);
        $ih = Open(undef, "<$inDirectory/complex2role");
        # Loop through the complex2role file.
        while (! eof $ih) {
            my ($complex, $role) = Tracer::GetLine($ih);
            $stats->Add(linesRead => 1);
            $sap->InsertObject('IsTriggeredBy', from_link => $complex, to_link => $role);
            $stats->Add(IsTriggeredBy => 1);
        }
        # Finally, we need to connect roles to exemplar features.
        CleanTable($sap, "IsExemplarOf", $stats);
        $ih = Open(undef, "<$inDirectory/exemplars.for.roles");
        # Loop through the exemplars.for.roles file.
        while (! eof $ih) {
            my ($fid, $role) = Tracer::GetLine($ih);
            $stats->Add(linesRead => 1);
            $sap->InsertObject('IsExemplarOf', from_link => $fid, to_link => $role);
            $stats->Add(IsExemplarOf => 1);
        }
    }
};
if ($@) {
    Trace("ERROR: $@") if T(0);
}
Trace("Statistics for run:\n" . $stats->Show());

## Insure the specified table is in the database and has been cleared.
sub CleanTable {
    # Get the parameters.
    my ($sap, $tableName, $stats) = @_;
    # Insure the table exists.
    my $created = $sap->VerifyTable($tableName);
    # Did it exist?
    if ($created) {
        # Yes. Record that we created it.
        $stats->Add(newTables => 1);
    } else {
        # No. Clear it.
        $sap->TruncateTable($tableName);
        $stats->Add(clearedTables => 1);
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3