[Bio] / FigKernelScripts / add_to_alignment.pl Repository:
ViewVC logotype

View of /FigKernelScripts/add_to_alignment.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (download) (as text) (annotate)
Sat May 12 20:47:03 2007 UTC (12 years, 10 months ago) by golsen
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_12_18, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2009_03_26, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.5: +44 -19 lines
Modifications to control ordering of sequences in representative sets and
in sequeneces added to an alignment.  Also new options to silently remove
sequences with duplicate ids.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

#  Insert new sequences into an alignment without altering the relative
#  alignment of the existing sequences.  The alignment is based on a profile
#  of those sequences that are not significantly less similar than the most
#  similar sequence.
#
#  usage: add_to_alignment [-b] [-t]  Alignment  [ NewSeqs ]
#
#  Requires that clustalw is in the path

use strict;
use gjoalignment qw(
	read_fasta_file
	add_to_alignment
	write_fasta_file
	);

my $usage = <<"End_of_Usage";
add_to_alignment - Add one or more sequences to an existing alignment

usage: add_to_alignment [options]  Alignment  [ NewSeqs ]

   options:

      -b   Order sequences by size, longest to shortest
      -i   Silently ignore sequences with duplicate id
      -s   Order sequences by size (same as -b)
      -t   Trim added sequences to the alignment width
              (consider including the -b option for better result)

End_of_Usage

my ( $ali_file, $aligned, $seq_file, $seqs, $seq );

my $by_size = 0;
my $ignore  = 0;
my $trim    = 0;

while ( $ARGV[0] =~ /^-/ )
{
    $_ = shift @ARGV;
    if    ( s/^-b//i ) { $by_size = 1 }
    elsif ( s/^-i//i ) { $ignore  = 1 }
    elsif ( s/^-s//i ) { $by_size = 1 }
    elsif ( s/^-t//i ) { $trim    = 1 }
    else
    {
	die "Illegal flag $_\n$usage";
    }
}

( ( $ali_file = shift @ARGV ) && ( $aligned = read_fasta_file( $ali_file ) )
                              && ( $seqs    = read_fasta_file( @ARGV ) ) )
    || die $usage;

if ( $by_size )
{
    @$seqs = sort { length( $b->[2] ) <=> length( $a->[2] ) }
             @$seqs;
}

if ( $ignore )
{
    my %seen = map { $_->[0] => 1 } @$aligned;
    @$seqs = grep { ! $seen{ $_->[0] }++ } @$seqs;
}

foreach $seq ( @$seqs ) { $aligned = add_to_alignment( $seq, $aligned, $trim ) }

#  Wrap the sequences in the alignment:

foreach ( @$aligned ) { $_->[2] = join( "\n", $_->[2] =~ /.{1,60}/g ) }

write_fasta_file( $aligned );


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3