[Bio] / FigKernelPackages / ScriptThing.pm Repository:
ViewVC logotype

View of /FigKernelPackages/ScriptThing.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Thu Oct 14 17:28:49 2010 UTC (9 years, 1 month ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011
Changes since 1.7: +7 -4 lines
Expression vector support methods.

#!/usr/bin/perl -w
use strict;

#!/usr/bin/perl -w
#
#	This is a SAS Component.
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package ScriptThing;

=head1 Script Utilities Package

This is a simple package containing utility methods of use to the server scripts.

=head2 Public Methods

=head3 GetBatch

    my @lines = ScriptThing::GetBatch($ih, $size);

Get a batch of work to do. The specified input stream will be read, and a
list of IDs pulled out, along with the contents of the input lines on
which the IDs were found. The input stream can be an open file handle or a
list of singleton values to return.

=over 4

=item ih

Open input file handle, or alternatively a reference to a list of values to return.
If a list is specified, the items will be removed from the list as they are returned.

=item size (optional)

Maximum permissible batch size. If omitted, the default is C<1000>.

=item column (optional)

Index (1-based) of the column containing the IDs. The default is the last
column.

=item RETURN

Returns a list of 2-tuples; each 2-tuple consists of an ID followed by the text
of the input line containing the ID (with the trailing new-line removed).

=back

=cut

sub GetBatch {
    # Get the parameters.
    my ($ih, $size, $column) = @_;
    # Declare the return variable.
    my @retVal;
    # Compute the batch size.
    my $linesLeft = $size || 1000;
    # Determine the mode in which we're operating.
    if (ref $ih eq 'ARRAY') {
        # Here we have a list reference. Loop through it until we run out or fill
        # the batch.
        while ($linesLeft-- > 0 && @$ih > 0) {
            # Get the next list entry.
            my $id = shift @$ih;
            # Put it in the return list as the desired ID and the line it appeared on.
            push @retVal, [$id, $id];
        }
    } else {
        # Loop through the input until we run out or fill the batch.
        while ($linesLeft-- > 0 && ! eof $ih) {
            # Get the next input line.
            my $line = <$ih>;
            chomp $line;
            # Only proceed if it's nonblank.
            if ($line =~ /\S/) {
                # We'll put our desired column in here.
                my $id = GetColumn($line, $column);
                # Put it in the return list.
                push @retVal, [$id, $line];
            }
        }
    }
    # Return the result.
    return @retVal;
}

=head3 GetList

    my @list = ScriptThing::GetList($ih, $column);

Extract a list of data items from a tab-delimited file. Unlike L</GetBatch>,
this method reads the entire file, and it only returns the column of interest
instead of tuples containing the original data lines.

=over 4

=item ih

Open file handle for the input.

=item column (optional)

Index (1-based) of the column containing the IDs. The default is the last
column.

=item RETURN

Returns a list containing the contents of the desired column for every record
in the input stream.

=back

=cut

sub GetList {
    # Get the parameters.
    my ($ih, $column) = @_;
    # Declare the return variable.
    my @retVal;
    # Loop through the input.
    while (! eof $ih) {
        # Get the next input line.
        my $line = <$ih>;
        chomp $line;
        # We'll put our desired column in here.
        my $id = GetColumn($line, $column);
        # Put it in the return list.
        push @retVal, $id;
    }
    # Return the list.
    return @retVal;
}

=head3 GetColumn

    my $id = ScriptThing::GetColumn($line, $column);

Get the specified column from a tab-delimited input line.

=over 4

=item line

A tab-delimited line of text.

=item column

The index (1-based) of the column whose value is desired. If undefined or 0,
then the last column will be extracted.

=item RETURN

Returns the value of the desired column. Note that if it is the last column, no
trimming of new-line characters will take place.

=back

=cut

sub GetColumn {
    # Get the parameters.
    my ($line, $column) = @_;
    # Declare the return variable.
    my $retVal;
    # Are we looking for a specific column or the last one?
    if ($column) {
        # We want a specific column.
        my @cols = split /\t/, $line;
        $retVal = $cols[$column - 1];
    } else {
        # We want the last column.
        if ($line =~ /.*\t(.+)$/) {
            $retVal = $1;
        } else {
            $retVal = $line;
        }
    }
    # Return the result.
    return $retVal;
}

=head3 CommentHash

    my %hash = ScriptThing::CommentHash(\@tuples, $column);

Convert the 2-tuples returned by L</GetBatch> to a comment hash for
FASTA-based methods. The return hash will map each incoming ID to a
string containing the fields from the corresponding line.

=over 4

=item tuples

Reference to a list of 2-tuples. Each 2-tuple contains an ID followed by a
tab-delimited input line (without the new-line character).

=item column

Index (1-based) of the column containing the ID value. The default is the last
column.

=item RETURN

Returns a hash mapping each incoming ID to the text from its input line.

=back

=cut

sub CommentHash {
    # Get the parameters.
    my ($tuples, $column) = @_;
    # Declare the return variable.
    my %retVal;
    # Loop through the tuples.
    for my $tuple (@$tuples) {
        # Get the ID and line.
        my ($id, $line) = @$tuple;
        # Split the line and pop off the ID.
        my @fields = split /\t/, $line;
        if (! $column) {
            pop @fields;
        } else {
            splice @fields, $column - 1, 1;
        }
        # Rejoin the fields with spaces in between to form the result.
        $retVal{$id} = join(" ", @fields);
    }
    # Return the result.
    return %retVal;
}


=head3 AdjustStdin

    AdjustStdin();

Check the environment for a STDIN variable, and if present, open the
named file as STDIN. This is a debugging hack that allows the scripts to
be run easily inside a symbolic debugger.

=cut

sub AdjustStdin {
    # Check for the environment variable.
    my $file = $ENV{STDIN};
    if ($file) {
        # We found it, so open STDIN using the specified file name.
        open STDIN, "<$file" || die $!;
    }
}







1;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3