[Bio] / FigKernelScripts / add_to_alignment.pl Repository:
ViewVC logotype

View of /FigKernelScripts/add_to_alignment.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Sat Feb 19 18:12:52 2011 UTC (8 years, 8 months ago) by golsen
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.7: +7 -5 lines
Fix the libraries used for I/O.

#
# Copyright (c) 2003-2009 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

#  Insert new sequences into an alignment without altering the relative
#  alignment of the existing sequences.  The alignment is based on a profile
#  of those sequences that are not significantly less similar than the most
#  similar sequence.
#
#  usage: add_to_alignment [-b] [-t]  Alignment  [ NewSeqs ]
#
#  Requires that clustalw is in the path

use strict;
use gjoalignment qw(
	add_to_alignment_v2
	);
use gjoseqlib qw(
	read_fasta
	print_alignment_as_fasta
	);

my $usage = <<"End_of_Usage";
add_to_alignment - Add one or more sequences to an existing alignment

usage: add_to_alignment [options]  Alignment  [ NewSeqs ]

   options:

      -b   Order sequences by size, longest to shortest
      -i   Silently ignore sequences with duplicate id
      -s   Order sequences by size (same as -b)
      -t   Trim added sequences to the alignment width
              (consider including the -b option for better result)

End_of_Usage

my ( $ali_file, $aligned, $seq_file, $seqs, $seq );

my $by_size = 0;
my $ignore  = 0;
my $trim    = 0;

while ( $ARGV[0] =~ /^-/ )
{
    $_ = shift @ARGV;
    if    ( s/^-b//i ) { $by_size = 1 }
    elsif ( s/^-i//i ) { $ignore  = 1 }
    elsif ( s/^-s//i ) { $by_size = 1 }
    elsif ( s/^-t//i ) { $trim    = 1 }
    else
    {
	die "Illegal flag $_\n$usage";
    }
}

( ( $ali_file = shift @ARGV ) && ( $aligned = gjoseqlib::read_fasta( $ali_file ) )
                              && ( $seqs    = gjoseqlib::read_fasta( @ARGV ) ) )
    || die $usage;

if ( $by_size )
{
    @$seqs = sort { length( $b->[2] ) <=> length( $a->[2] ) }
             @$seqs;
}

if ( $ignore )
{
    my %seen = map { $_->[0] => 1 } @$aligned;
    @$seqs = grep { ! $seen{ $_->[0] }++ } @$seqs;
}

my $options = { trim => $trim };
foreach $seq ( @$seqs ) { $aligned = add_to_alignment_v2( $seq, $aligned, $options ) }

#  Wrap the sequences in the alignment:

foreach ( @$aligned ) { $_->[2] = join( "\n", $_->[2] =~ /.{1,60}/g ) }

gjoseqlib::print_alignment_as_fasta( $aligned );


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3