[Bio] / FigKernelScripts / collect_related_sequences.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/collect_related_sequences.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : golsen 1.1 ########################################################################
2 :     # -*- perl -*-
3 :     #
4 : golsen 1.3 # Copyright (c) 2003-2007 University of Chicago and Fellowship
5 : golsen 1.1 # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     # collect_related_sequences [options] dbfile seqfile
21 :    
22 :     # use Data::Dumper;
23 :     use strict;
24 :     use gjoseqlib;
25 :     use collect_related_sequences;
26 :    
27 : golsen 1.3 my $min_coverage = 0.80;
28 :     my $tmp_dir;
29 :     my $max_e_value = 0.001;
30 :     my $fids;
31 :     my $min_identity = 0.25;
32 :     my $merge = 1;
33 :     my $nr = 0;
34 :     my $tmp;
35 :     my $extra_ends = 10;
36 : golsen 1.2
37 :     my $usage =<<"End_of_Usage";
38 : golsen 1.1
39 :     Usage: collect_related_sequences [options] dbfile seqfile
40 :    
41 :     Options:
42 :    
43 : golsen 1.3 -c min_coverage # fraction of exemplar coverged (D=$min_coverage)
44 : golsen 1.1 -d tmp_dir # name of temporary directory
45 : golsen 1.3 -e max_e_value # required match significance (D=$max_e_value)
46 :     -f 'fid ...' # use exemplar(s) from SEED (instead of seqfile)
47 :     -i min_identity # required similarity to exemplar (D=$min_identity)
48 : golsen 1.2 -m # do NOT merge queries with found sequences
49 :     -n # use SEED nr database (instead of dbfile)
50 : golsen 1.1 -t tmp # place for temporary directory
51 : golsen 1.3 -x extra_ends # extra length at ends (D=$extra_ends residues)
52 : golsen 1.1
53 :     End_of_Usage
54 :    
55 :     while ( $ARGV[0] =~ /^-/ )
56 :     {
57 :     $_ = shift;
58 :     if ( s/^-c// ) { $min_coverage = $_ || shift }
59 :     elsif ( s/^-d// ) { $tmp_dir = $_ || shift }
60 :     elsif ( s/^-e// ) { $max_e_value = $_ || shift }
61 : golsen 1.3 elsif ( s/^-f// ) { $fids = $_ || shift }
62 : golsen 1.1 elsif ( s/^-i// ) { $min_identity = $_ || shift }
63 : golsen 1.2 elsif ( s/^-m// ) { $merge = 0 }
64 : golsen 1.3 elsif ( s/^-n// ) { $nr = 1 }
65 : golsen 1.1 elsif ( s/^-t// ) { $tmp = $_ || shift }
66 :     elsif ( s/^-x// ) { $extra_ends = $_ || shift }
67 :     else
68 :     {
69 :     usage( "Bad command flag '$_'\n" );
70 :     }
71 :     }
72 :    
73 : golsen 1.3 my $dbfile = shift @ARGV if ! $nr;
74 :     $nr || -f $dbfile || usage( "Cannot locate database file '$dbfile'." );
75 : golsen 1.2
76 : golsen 1.3 my @seq;
77 :     my @fids = ();
78 : golsen 1.1
79 : golsen 1.2 if ( $fids )
80 :     {
81 : golsen 1.3 @fids = ref( $fids ) eq 'ARRAY' ? @$fids
82 :     : split /[,\s]+/, $fids;
83 : golsen 1.2 }
84 :     else
85 :     {
86 :     my $seqfile = shift @ARGV;
87 :     @seq = read_fasta( $seqfile );
88 :     @seq or usage( "Failed to read sequences from '$seqfile'." );
89 :     }
90 : golsen 1.1
91 :     my $options =
92 : golsen 1.3 { max_e_value => $max_e_value,
93 :     min_coverage => $min_coverage,
94 : golsen 1.1 min_identity => $min_identity,
95 :     extra_ends => $extra_ends
96 :     };
97 : golsen 1.2
98 : golsen 1.3 $options->{ exemplars } = \@seq if ! $fids;
99 :     $options->{ fids } = \@fids if $fids;
100 :     $options->{ no_merge } = 1 if ! $merge;
101 :     $options->{ nr } = 1 if $nr;
102 :     $options->{ seq_db } = $dbfile if ! $nr;
103 :     $options->{ tmp } = $tmp if $tmp;
104 :     $options->{ tmp_dir } = $tmp_dir if $tmp_dir;
105 : golsen 1.2
106 : golsen 1.3 my $found = collect_related_sequences::collect_related_sequences( $options );
107 : golsen 1.2
108 : golsen 1.3 print_alignment_as_fasta( $found ) if $found;
109 : golsen 1.1
110 :     exit;
111 :    
112 :    
113 :     sub usage
114 :     {
115 :     print STDERR join( "\n", @_, $usage );
116 :     exit;
117 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3