Parent Directory
|
Revision Log
Revision 1.3 - (view) (download) (as text)
1 : | golsen | 1.1 | ######################################################################## |
2 : | # -*- perl -*- | ||
3 : | # | ||
4 : | golsen | 1.3 | # Copyright (c) 2003-2007 University of Chicago and Fellowship |
5 : | golsen | 1.1 | # for Interpretations of Genomes. All Rights Reserved. |
6 : | # | ||
7 : | # This file is part of the SEED Toolkit. | ||
8 : | # | ||
9 : | # The SEED Toolkit is free software. You can redistribute | ||
10 : | # it and/or modify it under the terms of the SEED Toolkit | ||
11 : | # Public License. | ||
12 : | # | ||
13 : | # You should have received a copy of the SEED Toolkit Public License | ||
14 : | # along with this program; if not write to the University of Chicago | ||
15 : | # at info@ci.uchicago.edu or the Fellowship for Interpretation of | ||
16 : | # Genomes at veronika@thefig.info or download a copy from | ||
17 : | # http://www.theseed.org/LICENSE.TXT. | ||
18 : | # | ||
19 : | |||
20 : | # collect_related_sequences [options] dbfile seqfile | ||
21 : | |||
22 : | # use Data::Dumper; | ||
23 : | use strict; | ||
24 : | use gjoseqlib; | ||
25 : | use collect_related_sequences; | ||
26 : | |||
27 : | golsen | 1.3 | my $min_coverage = 0.80; |
28 : | my $tmp_dir; | ||
29 : | my $max_e_value = 0.001; | ||
30 : | my $fids; | ||
31 : | my $min_identity = 0.25; | ||
32 : | my $merge = 1; | ||
33 : | my $nr = 0; | ||
34 : | my $tmp; | ||
35 : | my $extra_ends = 10; | ||
36 : | golsen | 1.2 | |
37 : | my $usage =<<"End_of_Usage"; | ||
38 : | golsen | 1.1 | |
39 : | Usage: collect_related_sequences [options] dbfile seqfile | ||
40 : | |||
41 : | Options: | ||
42 : | |||
43 : | golsen | 1.3 | -c min_coverage # fraction of exemplar coverged (D=$min_coverage) |
44 : | golsen | 1.1 | -d tmp_dir # name of temporary directory |
45 : | golsen | 1.3 | -e max_e_value # required match significance (D=$max_e_value) |
46 : | -f 'fid ...' # use exemplar(s) from SEED (instead of seqfile) | ||
47 : | -i min_identity # required similarity to exemplar (D=$min_identity) | ||
48 : | golsen | 1.2 | -m # do NOT merge queries with found sequences |
49 : | -n # use SEED nr database (instead of dbfile) | ||
50 : | golsen | 1.1 | -t tmp # place for temporary directory |
51 : | golsen | 1.3 | -x extra_ends # extra length at ends (D=$extra_ends residues) |
52 : | golsen | 1.1 | |
53 : | End_of_Usage | ||
54 : | |||
55 : | while ( $ARGV[0] =~ /^-/ ) | ||
56 : | { | ||
57 : | $_ = shift; | ||
58 : | if ( s/^-c// ) { $min_coverage = $_ || shift } | ||
59 : | elsif ( s/^-d// ) { $tmp_dir = $_ || shift } | ||
60 : | elsif ( s/^-e// ) { $max_e_value = $_ || shift } | ||
61 : | golsen | 1.3 | elsif ( s/^-f// ) { $fids = $_ || shift } |
62 : | golsen | 1.1 | elsif ( s/^-i// ) { $min_identity = $_ || shift } |
63 : | golsen | 1.2 | elsif ( s/^-m// ) { $merge = 0 } |
64 : | golsen | 1.3 | elsif ( s/^-n// ) { $nr = 1 } |
65 : | golsen | 1.1 | elsif ( s/^-t// ) { $tmp = $_ || shift } |
66 : | elsif ( s/^-x// ) { $extra_ends = $_ || shift } | ||
67 : | else | ||
68 : | { | ||
69 : | usage( "Bad command flag '$_'\n" ); | ||
70 : | } | ||
71 : | } | ||
72 : | |||
73 : | golsen | 1.3 | my $dbfile = shift @ARGV if ! $nr; |
74 : | $nr || -f $dbfile || usage( "Cannot locate database file '$dbfile'." ); | ||
75 : | golsen | 1.2 | |
76 : | golsen | 1.3 | my @seq; |
77 : | my @fids = (); | ||
78 : | golsen | 1.1 | |
79 : | golsen | 1.2 | if ( $fids ) |
80 : | { | ||
81 : | golsen | 1.3 | @fids = ref( $fids ) eq 'ARRAY' ? @$fids |
82 : | : split /[,\s]+/, $fids; | ||
83 : | golsen | 1.2 | } |
84 : | else | ||
85 : | { | ||
86 : | my $seqfile = shift @ARGV; | ||
87 : | @seq = read_fasta( $seqfile ); | ||
88 : | @seq or usage( "Failed to read sequences from '$seqfile'." ); | ||
89 : | } | ||
90 : | golsen | 1.1 | |
91 : | my $options = | ||
92 : | golsen | 1.3 | { max_e_value => $max_e_value, |
93 : | min_coverage => $min_coverage, | ||
94 : | golsen | 1.1 | min_identity => $min_identity, |
95 : | extra_ends => $extra_ends | ||
96 : | }; | ||
97 : | golsen | 1.2 | |
98 : | golsen | 1.3 | $options->{ exemplars } = \@seq if ! $fids; |
99 : | $options->{ fids } = \@fids if $fids; | ||
100 : | $options->{ no_merge } = 1 if ! $merge; | ||
101 : | $options->{ nr } = 1 if $nr; | ||
102 : | $options->{ seq_db } = $dbfile if ! $nr; | ||
103 : | $options->{ tmp } = $tmp if $tmp; | ||
104 : | $options->{ tmp_dir } = $tmp_dir if $tmp_dir; | ||
105 : | golsen | 1.2 | |
106 : | golsen | 1.3 | my $found = collect_related_sequences::collect_related_sequences( $options ); |
107 : | golsen | 1.2 | |
108 : | golsen | 1.3 | print_alignment_as_fasta( $found ) if $found; |
109 : | golsen | 1.1 | |
110 : | exit; | ||
111 : | |||
112 : | |||
113 : | sub usage | ||
114 : | { | ||
115 : | print STDERR join( "\n", @_, $usage ); | ||
116 : | exit; | ||
117 : | } |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |