[Bio] / FigKernelScripts / collect_related_sequences.pl Repository:
ViewVC logotype

View of /FigKernelScripts/collect_related_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Fri May 11 18:51:28 2007 UTC (12 years, 6 months ago) by golsen
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.2: +34 -66 lines
Encapsulate all of the SEED-dependent functions in the perl module.
It should still run all SEED-independent functions in any environment.

########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2007 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

#   collect_related_sequences  [options] dbfile seqfile

# use Data::Dumper;
use strict;
use gjoseqlib;
use collect_related_sequences;

my $min_coverage = 0.80;
my $tmp_dir;
my $max_e_value  = 0.001;
my $fids;
my $min_identity = 0.25;
my $merge        = 1;
my $nr           = 0;
my $tmp;
my $extra_ends   = 10;

my $usage =<<"End_of_Usage";

Usage:  collect_related_sequences  [options]  dbfile  seqfile

Options:

    -c  min_coverage  # fraction of exemplar coverged (D=$min_coverage)
    -d  tmp_dir       # name of temporary directory
    -e  max_e_value   # required match significance (D=$max_e_value)
    -f  'fid ...'     # use exemplar(s) from SEED (instead of seqfile)
    -i  min_identity  # required similarity to exemplar (D=$min_identity)
    -m                # do NOT merge queries with found sequences
    -n                # use SEED nr database (instead of dbfile)
    -t  tmp           # place for temporary directory
    -x  extra_ends    # extra length at ends (D=$extra_ends residues)

End_of_Usage

while ( $ARGV[0] =~ /^-/ )
{
    $_ = shift;
    if    ( s/^-c// ) { $min_coverage = $_ || shift }
    elsif ( s/^-d// ) { $tmp_dir      = $_ || shift }
    elsif ( s/^-e// ) { $max_e_value  = $_ || shift }
    elsif ( s/^-f// ) { $fids         = $_ || shift }
    elsif ( s/^-i// ) { $min_identity = $_ || shift }
    elsif ( s/^-m// ) { $merge        = 0 }
    elsif ( s/^-n// ) { $nr           = 1           }
    elsif ( s/^-t// ) { $tmp          = $_ || shift }
    elsif ( s/^-x// ) { $extra_ends   = $_ || shift }
    else
    {
        usage( "Bad command flag '$_'\n" );
    }
}

my $dbfile = shift @ARGV if ! $nr;
$nr || -f $dbfile || usage( "Cannot locate database file '$dbfile'." );

my @seq;
my @fids = ();

if ( $fids )
{
    @fids = ref( $fids ) eq 'ARRAY' ? @$fids
                                    : split /[,\s]+/, $fids;
}
else
{
    my $seqfile = shift @ARGV;
    @seq = read_fasta( $seqfile );
    @seq or usage( "Failed to read sequences from '$seqfile'." );
}

my $options =
    { max_e_value  => $max_e_value,
      min_coverage => $min_coverage,
      min_identity => $min_identity,
      extra_ends   => $extra_ends
    };

$options->{ exemplars } = \@seq      if ! $fids;
$options->{ fids      } = \@fids     if   $fids;
$options->{ no_merge  } =  1         if ! $merge;
$options->{ nr        } =  1         if   $nr;
$options->{ seq_db    } = $dbfile    if ! $nr;
$options->{ tmp       } =  $tmp      if   $tmp;
$options->{ tmp_dir   } =  $tmp_dir  if   $tmp_dir;

my $found = collect_related_sequences::collect_related_sequences( $options );

print_alignment_as_fasta( $found ) if $found;

exit;


sub usage
{
    print STDERR join( "\n", @_, $usage );
    exit;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3