[Bio] / FigKernelScripts / collect_related_sequences.pl Repository:
ViewVC logotype

View of /FigKernelScripts/collect_related_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Sat Apr 28 15:19:29 2007 UTC (12 years, 9 months ago) by golsen
Branch: MAIN
Script to extract sequences from a fasta file with similarity to sequence
in a file.  This will be made more elaborate later.

########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

#   collect_related_sequences  [options] dbfile seqfile

# use Data::Dumper;
use strict;
use gjoseqlib;
use collect_related_sequences;

my $usage =<<'End_of_Usage';

Usage:  collect_related_sequences  [options]  dbfile  seqfile

Options:

    -c  min_coverage  # D=0.80
    -d  tmp_dir       # name of temporary directory
    -e  max_e_value   # D=0.01
    -i  min_identity  # D=0.25
    -t  tmp           # place for temporary directory
    -x  extra_ends    # extra length at ends

End_of_Usage

my $min_coverage = 0.80;
my $tmp_dir;
my $max_e_value  = 0.01;
my $min_identity = 0.25;
my $tmp;
my $extra_ends   = 10;

while ( $ARGV[0] =~ /^-/ )
{
    $_ = shift;
    if    ( s/^-c// ) { $min_coverage = $_ || shift }
    elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }
    elsif ( s/^-e// ) { $max_e_value  = $_ || shift }
    elsif ( s/^-i// ) { $min_identity = $_ || shift }
    elsif ( s/^-t// ) { $tmp          = $_ || shift }
    elsif ( s/^-x// ) { $extra_ends   = $_ || shift }
    else
    {
        usage( "Bad command flag '$_'\n" );
    }
}

@ARGV == 2 or usage( "collect_related_sequences requires 2 parameters" );

my ( $dbfile, $seqfile ) = @ARGV;

my @seq = read_fasta( $seqfile );
@seq or usage( "Failed to read sequences from '$seqfile'" );

my $options =
    { min_coverage => $min_coverage,
      max_e_value  => $max_e_value,
      min_identity => $min_identity,
      extra_ends   => $extra_ends
    };
$options->{ tmp     } = $tmp     if $tmp;
$options->{ tmp_dir } = $tmp_dir if $tmp_dir;

my $other = collect_related_sequences::collect_related_sequences( $dbfile, \@seq, $options );

print_alignment_as_fasta( $other );
exit;


sub usage
{
    print STDERR join( "\n", @_, $usage );
    exit;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3