[Bio] / FigKernelScripts / align_with_clustal_2.pl Repository:
ViewVC logotype

View of /FigKernelScripts/align_with_clustal_2.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Mon Apr 9 21:01:27 2007 UTC (12 years, 7 months ago) by golsen
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.3: +1 -1 lines
Trivial format change.

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use strict;
use Carp;
use Data::Dumper;
use gjonewicklib qw(
		parse_newick_tree_str
		reroot_newick_to_approx_midpoint
		aesthetic_newick_tree
		newick_relabel_tips
		text_plot_newick
		dump_tree
		);
use FIG;

my $fig = new FIG;

$| = 1;

my $usage = "align_with_clustal_2 [-org] [-func[=user]] [-tree] [-UniProt] [-save=Dir] Id1 Id2 ... ";

(@ARGV > 1) || die "usage: $usage";

my $temp_dir = $FIG_Config::temp;
my $file     = "$temp_dir/tmp$$";
my $relabel  = {};
my $add_org = "";
my $save = "";
my $add_func = "";
my $tree = "";
my $uni_prot = "";
my $trouble = 0;
my $user = undef;

while ($ARGV[0] =~ m/^-/)
{
    if ($ARGV[0] =~ /-org/)
    {
	$add_org = shift @ARGV;
    }
    elsif ($ARGV[0] =~ /-save=(\S+)/)
    {
	$save = $1;
	shift @ARGV;
    }
    elsif ($ARGV[0] =~ /-func/)
    {
	$add_func = shift @ARGV;
	if ($add_func =~ m/=(\S+)/)  { $user = $1; }
    }
    elsif ($ARGV[0] =~ /-tree/)
    {
	$tree =	shift @ARGV;
    }
    elsif ($ARGV[0] =~ /-UniProt/i)
    {
	$uni_prot = 1;
	shift @ARGV;
    }
    else
    {
	$trouble = 1;
	print STDERR "Invalid flag $ARGV[0]\n";
	shift @ARGV;
    }
}
die "\nusage: $usage\n\n" if $trouble;

my %seen;
open(TMP,">$file.fasta") || die "could not open $file";
foreach my $id (@ARGV)
{
    next if ($seen{$id});
    $seen{$id} = 1;
    
    if (my $seq = $fig->get_translation($id))
    {
	print TMP ">$id\n$seq\n";
    }
    else
    {
	print STDERR "could not find translation for $id\n";
    }

    my $label = $id;
    if ($uni_prot && ($_ = $fig->to_alias($id,"uni")) )      { $label .= " [$_]" }
    if ($add_org  && ($_ = $fig->org_of($id)) )              { $label .= " [$_]"; }
    if ($add_func && ($_ = $fig->function_of($id, $user)) )  { $label .= " $_"; }
    
    $relabel->{$id} = $label;
}
close(TMP);
# print STDERR "$0;\n", Dumper($relabel);

system "$FIG_Config::ext_bin/clustalw -infile=$file.fasta -align -outorder=aligned > /dev/null";
if ( -s "$file.aln" && open( ALIGN, "<$file.aln" ) )
{
    print <ALIGN>;
    close( ALIGN );
}

if ( $tree && -s "$file.aln" )
{
    system "$FIG_Config::ext_bin/clustalw -infile=$file.aln -tree > /dev/null";
    my $treetext = "";
    my $tree1;
    if ( ( -s "$file.ph"  && open( TREE, "<$file.ph"  ) )
      || ( -s "$file.dnd" && open( TREE, "<$file.dnd" ) ) )
    {
        if ( $tree1 = &parse_newick_tree_str( join( "", <TREE> ) ) )
        {
            # dump_tree( $tree1 );
            my $tree1b = reroot_newick_to_approx_midpoint( $tree1 );
            my $tree2 = aesthetic_newick_tree( $tree1b );
            # my $tree2 = aesthetic_newick_tree( $tree1 );
            my $tree3 = newick_relabel_tips( $tree2, $relabel );
            #  @textlines = text_plot_newick( $node, $width, $min_dx, $dy )
            $treetext = join( "\n", text_plot_newick( $tree3, 80, 2, 2 ) );
        }
        close( TREE );
    }
    print "=======================================================================\n\n\n";
    print "$treetext\n";
}

if ($save)
{
    system "cp $file.fasta \"$save/seqs\"";
    system "cp $file.aln \"$save/aln\"";
    system "cp $file.dnd \"$save/tree\"";
    ( $tree && -s "$file.ph" ) and system "cp $file.ph \"$save/tree2\"";
}

unlink("$file.fasta","$file.aln","$file.dnd");
# -s "$file.ph" and unlink("$file.ph");

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3