[Bio] / Sprout / ERDBTypeProteinData.pm Repository:
ViewVC logotype

View of /Sprout/ERDBTypeProteinData.pm

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Mon Nov 1 15:17:52 2010 UTC (9 years, 7 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_04082011, mgrast_dev_06072011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011
Protein-data type.

#!/usr/bin/perl -w

# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
# This file is part of the SEED Toolkit.
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.

package ERDBTypeProteinData;

    use strict;
    use Tracer;
    use ERDB;
    use base qw(ERDBType);

=head1 ERDB Protein FASTA Data Type Definition

=head2 Introduction

This object represents the data type for a list of protein FASTA sequences. Each
sequence consists of a triple of data items: an ID, a comment, and the actual sequence.
An individual data value contains a list of these triples. In the database, the
entire structure is encoded as an escaped string. The individual pieces of a triple
are tab-separated, and the triples themselves are separated by new-lines.

=head3 new

    my $et = ERDBTypeProteinData->new();

Construct a new ERDBTypeProteinData descriptor.


sub new {
    # Get the parameters.
    my ($class) = @_;
    # Create the ERDBTypeProteinData object.
    my $retVal = { };
    # Bless and return it.
    bless $retVal, $class;
    return $retVal;

=head2 Virtual Methods

=head3 averageLength

    my $value = $et->averageLength();

Return the average length of a data item of this field type when it is stored in the
database. This value is used to compute the expected size of a database table.


sub averageLength {
    return 100000;

=head3 prettySortValue

    my $value = $et->prettySortValue();

Number indicating where fields of this type should go in relation to other
fields. The value should be somewhere between C<1> and C<5>. A value outside
that range will make terrible things happen.


sub prettySortValue() {
    return 5;

=head3 validate

    my $okFlag = $et->validate($value);

Return an error message if the specified value is invalid for this field type.

The parameters are as follows.

=over 4

=item value

Value of this type, for validation.

=item RETURN

Returns an empty string if the specified field is valid, and an error message



sub validate {
    # Get the parameters.
    my ($self, $value) = @_;
    # Assume it's valid until we prove otherwise.
    my $retVal = "";
    # Verify that we're an array.
    if (ref $value ne 'ARRAY') {
        $retVal = "Protein data set is not a list reference.";
    # Return the determination.
    return $retVal;

=head3 encode

    my $string = $et->encode($value, $mode);

Encode a value of this field type for storage in the database (or in a database load

The parameters are as follows.

=over 4

=item value

Value of this type, for encoding.

=item mode

TRUE if the value is being encoding for placement in a load file, FALSE if it
is being encoded for use as an SQL statement parameter. In most cases, the
encoding is the same for both modes.



sub encode {
    # Get the parameters.
    my ($self, $value, $mode) = @_;
    # Convert the list to a string.
    my $retVal = Tracer::Escape(join("\n", map { join("\t", @$_) } @$value));
    # Return the result.
    return $retVal;

=head3 decode

    my $value = $et->decode($string);

Decode a string from the database into a value of this field type.

The parameters are as follows.

=over 4

=item string

String from the database to be decoded.

=item RETURN

Returns a value of the desired type.



sub decode {
    # Get the parameters.
    my ($self, $string) = @_;
    # Unescape and split the string.
    my $retVal = [map { [split /\t/, $_] } split /\n/, Tracer::UnEscape($string)];
    # Return the result.
    return $retVal;

=head3 sqlType

    my $typeString = $et->sqlType();

Return the SQL data type for this field type.


sub sqlType {
    return "LONGTEXT";

=head3 indexMod

    my $length = $et->indexMod();

Return the index modifier for this field type. The index modifier is the number of
characters to be indexed. If it is undefined, the field cannot be indexed. If it
is an empty string, the entire field is indexed. The default is an empty string.


sub indexMod {
    return undef;

=head3 sortType

    my $letter = $et->sortType();

Return the sorting type for this field type. The sorting type is C<n> for integers,
C<g> for floating-point numbers, and the empty string for character fields.
The default is the empty string.


sub sortType {
    return "";

=head3 documentation

    my $docText = $et->documentation();

Return the documentation text for this field type. This should be in TWiki markup
format, though HTML will also work.


sub documentation() {
    return 'Protein FASTA list, encoded';

=head3 name

    my $name = $et->name();

Return the name of this type, as it will appear in the XML database definition.


sub name() {
    return "proteinData";

=head3 default

    my $defaultValue = $et->default();

Return the default value to be used for fields of this type if no default value
is specified in the database definition or in an L<ERDBLoadGroup/Put> call
during a loader operation. The default is undefined, which means an error will
be thrown during the load.


sub default {
    return '';

=head3 html

    my $html = $et->html($value);

Return the HTML for displaying the content of a field of this type in an output
table. The default is the raw value, html-escaped.


sub html {
    # Get the parameters.
    my ($self, $value) = @_;
    # We'll build a list in here and then output it later.
    my @retVal;
    # Loop through the triples in the list.
    for my $triple (@$value) {
        # Split the triple.
        my ($id, $comment, $sequence) = @$triple;
        # Adjust the sequence if it's too long.
        if (length $sequence > 60) {
            $sequence = substr($sequence, 0, 60) . "...";
        # Form the ID and sequence into a string.
        push @retVal, "$id: $sequence";
    # Return the result.
    return CGI::ul(map { CGI::li($_) } @retVal);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3