[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

View of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Wed Aug 19 17:05:13 2009 UTC (10 years, 7 months ago) by parrello
Branch: MAIN
Changes since 1.4: +63 -1 lines
Added "min" and "max" methods.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package SeedUtils;

    use strict;
    use Tracer;
    use base qw(Exporter);

    our @EXPORT = qw(create_fasta_record rev_comp genome_of min max);

=head1 SEED Utility Methods

=head2 Introduction

This is a simple utility package that performs functions useful for
bioinformatics, but that do not require access to the databases.

=head2 Public Methods

=head3 max

    my $max = max(@nums);

Return the maximum number from all the values in the specified list.

=over 4

=item nums

List of numbers to examine.

=item RETURN

Returns the maximum numeric value from the specified parameters, or
an undefined value if an empty list is passed in.

=back

=cut

sub max {
    my ($retVal, @nums) = @_;
    for my $num (@nums) {
        if ($num > $retVal) {
            $retVal = $num;
        }
    }
    return $retVal;
}

=head3 min

    my $min = min(@nums);

Return the minimum number from all the values in the specified list.

=over 4

=item nums

List of numbers to examine.

=item RETURN

Returns the minimum numeric value from the specified parameters, or
an undefined value if an empty list is passed in.

=back

=cut

sub min {
    my ($retVal, @nums) = @_;
    for my $num (@nums) {
        if ($num < $retVal) {
            $retVal = $num;
        }
    }
    return $retVal;
}

=head3 create_fasta_record

    my $fastaString = create_fasta_record($id, $comment, $sequence);

Create a FASTA record from the specified DNA or protein sequence. The
sequence will be split into 60-character lines, and the record will
include an identifier line.

=over 4

=item id

ID for the sequence, to be placed at the beginning of the identifier
line.

=item comment (optional)

Comment text to place after the ID on the identifier line. If this parameter
is empty, undefined, or 0, no comment will be placed.

=item sequence

Sequence of letters to form into FASTA. For purposes of convenience, whitespace
characters in the sequence will be removed automatically.

=item RETURN

Returns the desired sequence in FASTA format.

=back

=cut

sub create_fasta_record {
    # Get the parameters.
    my ($id, $comment, $sequence) = @_;
    # Start with the ID.
    my $header = ">$id";
    # Add a comment, if any.
    if ($comment) {
        $header .= " $comment";
    }
    # Clean up the sequence.
    $sequence =~ s/\s+//g;
    # We need to format the sequence into 60-byte chunks. We use the infamous
    # grep-split trick. The split, because of the presence of the parentheses,
    # includes the matched delimiters in the output list. The grep strips out
    # the empty list items that appear between the so-called delimiters, since
    # the delimiters are what we want.
    my @chunks = grep { $_ } split /(.{1,60})/, $sequence;
    Trace(scalar(@chunks) . " chunks found in sequence of length " .
          length($sequence) . ".") if T(3);
    # Add the chunks and the trailer.
    my $retVal = join("\n", $header, @chunks);
    # Return the result.
    return $retVal;
}

=head3 rev_comp

    my $revcmp = rev_comp($dna);

or

    rev_comp(\$dna);

Return the reverse complement of a DNA string.

=over 4

=item dna

Either a DNA string, or a reference to a DNA string.

=item RETURN

If the input is a DNA string, returns the reverse complement. If the
input is a reference to a DNA string, the string itself is reverse
complemented.

=back

=cut

sub rev_comp {
    # Get the parameters.
    my ($dna) = @_;
    # Determine how we were called.
    my ($retVal, $refMode);
    if (ref $dna eq 'SCALAR') {
        $retVal = lc reverse $dna;
        $refMode = 0;
    } else {
        $retVal = lc reverse $$dna;
        $refMode = 1;
    }
    # Now $retVal contains the reversed DNA string in all lower case, and
    # $refMode is TRUE iff the user passed in a reference. The following
    # translation step complements the string.
    $retVal =~ tr/acgtumrwsykbdhv/tgcaakywsrmvhdb/;
    # Return the result in the method corresponding to the way it came in.
    if ($refMode) {
        $$dna = $retVal;
        return;
    } else {
        return $retVal;
    }
}

=head3 genome_of

    my $genomeID = genome_of($fid);

Return the Genome ID embedded in the specified FIG feature ID.

=over 4

=item fid

Feature ID of interest.

=item RETURN

Returns the genome ID in the middle portion of the FIG feature ID. If the
feature ID is invalid, this method returns an undefined value.

=back

=cut

sub genome_of {
    # Get the parameters.
    my ($fid) = @_;
    # Declare the return variable.
    my $retVal;
    # Parse the feature ID.
    if ($fid =~ /^fig\|(\d+\.\d+)\./) {
        $retVal = $1;
    }
    # Return the result.
    return $retVal;
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3