[Bio] / FigKernelScripts / make_trimmed_alignment_from_ids.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_trimmed_alignment_from_ids.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (download) (as text) (annotate)
Sun Apr 20 13:00:15 2008 UTC (11 years, 7 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.12: +4 -2 lines
return empty list of failure to open FIFfam

########################################################################
use strict;
use gjoseqlib;

my $usage = "usage: make_trimmed_alignment_from_ids [-s CoreSz] [-S max_sz] [-m MaxIden] [-i MinIden] [-c MaxClustSz] CoreAli Id1 Id2 ... > FullAli";

use FIG;
my $fig = new FIG;

my $max_clust_sz = 1000;        # this is the maximum we will align before switching to inserting one-at-a-time
my $core_sz      = 50;          # number of reps in the core alignment (which is used to build core tree)
my $max_iden     = 0.9;         # anything closer gets taken out and added after aligning reps
my $min_iden     = 0.25;        # the parameter controlling gathering the set of seqs to align
my $verbose      = 0;
my $max_ids      = 5000;        # this is the maximum size of an alignment that we will build
my $min_cov      = 0.8;         # minimum coverage for similarity

while ( $ARGV[0] =~ /^-/ )
{
    $_ = shift;
    if    ( s/^-s// ) { $core_sz      = $_ || shift }
    elsif ( s/^-m// ) { $max_iden     = $_ || shift }
    elsif ( s/^-i// ) { $min_iden     = $_ || shift }
    elsif ( s/^-c// ) { $max_clust_sz = $_ || shift }
    elsif ( s/^-S// ) { $max_ids      = $_ || shift }
    elsif ( s/^-C// ) { $min_cov      = $_ || shift }
    elsif ( s/^-v// ) { $verbose      = 1           }
    else
    {
        usage( "Bad command flag '$_'\n" );
    }
}

my $core_ali;
(
 ($core_ali = shift @ARGV) &&
 (@ARGV > 0) 
)
    || die $usage;

my $ids = "'" . join("' '",@ARGV) . "'";
my $tmp_dir = "$FIG_Config::temp/tmp.$$";
print STDERR "tmp_dir=$tmp_dir\n" if ($verbose);

&FIG::verify_dir($tmp_dir);

my $gathered = "$tmp_dir/gathered.seqs";
my $reps50   = "$tmp_dir/reps.50";
my $reps     = "$tmp_dir/reps";
my $reps_ali = "$tmp_dir/reps.ali";
my $reps_trm = "$tmp_dir/reps.ali.trimmed";

&FIG::run("collect_related_sequences -n -f $ids -i $min_iden -c $min_cov > $gathered");
if ($verbose) { print STDERR "collected sequences into $gathered\n" }
my @seqs = &gjoseqlib::read_fasta($gathered);
if (@seqs < 2) { print STDERR "Could not gather sequences related to $ids\n"; exit }

while (@seqs > $max_ids)
{
    $min_iden += 0.05;
    &FIG::run("collect_related_sequences -n -f $ids -i $min_iden -c $min_cov > $gathered");
    if ($verbose) { print STDERR "collected sequences into $gathered\n" }
    @seqs = &gjoseqlib::read_fasta($gathered);
}

my @good_seqs = grep { ! $fig->is_deleted_fid($_->[0]) } @seqs;
if (@seqs != @good_seqs) { &gjoseqlib::print_alignment_as_fasta($gathered,\@good_seqs) }

if ((@good_seqs <= 50) && (@good_seqs > 1))
{
    system "cp $gathered $reps";
    system "cp $gathered $reps50";
}
else
{
    &FIG::run("reps_and_close -t $max_clust_sz $reps /dev/null < $gathered");
    &FIG::run("reps_and_close $reps50 /dev/null < $reps");
}
if ($verbose) { print STDERR "reps in  $reps\n" }
&FIG::run("align_seqs < $reps > $reps_ali");
if ($verbose) { print STDERR "aligned reps in $reps_ali\n" }
&FIG::run("trim_alignment < $reps_ali > $reps_trm");
if ($verbose) { print STDERR "      trimmed in $reps_trm\n" }
my %ids = map { $_->[0] => 1 } &gjoseqlib::read_fasta($reps50);
my @ali50 = grep { $ids{$_->[0]} } &gjoseqlib::read_fasta("$reps_trm");
@ali50 = &gjoseqlib::pack_alignment(@ali50);

&gjoseqlib::print_alignment_as_fasta($core_ali,\@ali50);

&FIG::run("add_to_alignment -b -i -t $reps_trm $gathered");

system "/bin/rm -r $tmp_dir";

sub usage
{
    print STDERR join( "\n", @_, $usage );
    exit;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3