[Bio] / FigKernelScripts / add_to_alignment.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/add_to_alignment.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : olson 1.2 #
2 : golsen 1.7 # Copyright (c) 2003-2009 University of Chicago and Fellowship
3 : olson 1.2 # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : golsen 1.1 # Insert new sequences into an alignment without altering the relative
19 :     # alignment of the existing sequences. The alignment is based on a profile
20 :     # of those sequences that are not significantly less similar than the most
21 :     # similar sequence.
22 :     #
23 : golsen 1.6 # usage: add_to_alignment [-b] [-t] Alignment [ NewSeqs ]
24 : golsen 1.1 #
25 :     # Requires that clustalw is in the path
26 :    
27 :     use strict;
28 :     use gjoalignment qw(
29 : golsen 1.7 add_to_alignment_v2
30 : golsen 1.8 );
31 :     use gjoseqlib qw(
32 :     read_fasta
33 :     print_alignment_as_fasta
34 : golsen 1.1 );
35 :    
36 : golsen 1.6 my $usage = <<"End_of_Usage";
37 :     add_to_alignment - Add one or more sequences to an existing alignment
38 :    
39 :     usage: add_to_alignment [options] Alignment [ NewSeqs ]
40 :    
41 :     options:
42 :    
43 :     -b Order sequences by size, longest to shortest
44 :     -i Silently ignore sequences with duplicate id
45 :     -s Order sequences by size (same as -b)
46 :     -t Trim added sequences to the alignment width
47 :     (consider including the -b option for better result)
48 :    
49 :     End_of_Usage
50 : overbeek 1.4
51 : golsen 1.3 my ( $ali_file, $aligned, $seq_file, $seqs, $seq );
52 : golsen 1.1
53 : golsen 1.6 my $by_size = 0;
54 :     my $ignore = 0;
55 :     my $trim = 0;
56 :    
57 :     while ( $ARGV[0] =~ /^-/ )
58 : overbeek 1.4 {
59 :     $_ = shift @ARGV;
60 : golsen 1.6 if ( s/^-b//i ) { $by_size = 1 }
61 :     elsif ( s/^-i//i ) { $ignore = 1 }
62 :     elsif ( s/^-s//i ) { $by_size = 1 }
63 :     elsif ( s/^-t//i ) { $trim = 1 }
64 : overbeek 1.4 else
65 :     {
66 : golsen 1.6 die "Illegal flag $_\n$usage";
67 : overbeek 1.4 }
68 :     }
69 :    
70 : golsen 1.8 ( ( $ali_file = shift @ARGV ) && ( $aligned = gjoseqlib::read_fasta( $ali_file ) )
71 :     && ( $seqs = gjoseqlib::read_fasta( @ARGV ) ) )
72 : overbeek 1.4 || die $usage;
73 : golsen 1.1
74 : golsen 1.6 if ( $by_size )
75 :     {
76 :     @$seqs = sort { length( $b->[2] ) <=> length( $a->[2] ) }
77 :     @$seqs;
78 :     }
79 :    
80 :     if ( $ignore )
81 :     {
82 :     my %seen = map { $_->[0] => 1 } @$aligned;
83 :     @$seqs = grep { ! $seen{ $_->[0] }++ } @$seqs;
84 :     }
85 :    
86 : golsen 1.7 my $options = { trim => $trim };
87 :     foreach $seq ( @$seqs ) { $aligned = add_to_alignment_v2( $seq, $aligned, $options ) }
88 : golsen 1.6
89 :     # Wrap the sequences in the alignment:
90 :    
91 :     foreach ( @$aligned ) { $_->[2] = join( "\n", $_->[2] =~ /.{1,60}/g ) }
92 : golsen 1.1
93 : golsen 1.8 gjoseqlib::print_alignment_as_fasta( $aligned );
94 : golsen 1.1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3