[Bio] / FigKernelPackages / clustaltree.pm Repository:
ViewVC logotype

View of /FigKernelPackages/clustaltree.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Apr 9 21:51:56 2007 UTC (12 years, 10 months ago) by golsen
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_12_18, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2009_03_26, rast_rel_2008_11_24, rast_rel_2008_08_07
A rudimentary package to make a clustalw tree.  The intent is to put the
command line scripts align_with_clustal.pl and align_with_clustal_2.pl
into a package.

#
# Copyright (c) 2003-2007 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package clustaltree;

#  A package of functions for a clustal tree
#
#  $tree = tree_with_clustal( \@alignment );

use Carp;
use strict;
use gjonewicklib;

my $is_fig;
eval { require FIG; require FIG_Config; $is_fig = 1 };
eval { require Data::Dumper };

my ( $ext_bin, $tmp_dir );
if ( $is_fig )
{
    $ext_bin = "$FIG_Config::ext_bin";
    $tmp_dir =  $FIG_Config::temp;
}
else
{
    $ext_bin = '';
    $tmp_dir = -d '/tmp' ? '/tmp' : '.';
}

require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(
        tree_with_clustal
        );


#===============================================================================
#  Tree sequence with clustalw and return the tree.  Tree is gjonewick format.
#
#    $tree = tree_with_clustal(  @alignment )
#    $tree = tree_with_clustal( \@alignment )
#
#  Currently very pedantic:
#     $tree is gjonewick format.
#     @alignment is composed of triples: ( $id, $definition, $sequence )
#===============================================================================
sub tree_with_clustal
{
    @_ and ref( $_[0] ) eq 'ARRAY' or return undef;
    my @seqs = ref( $_[0]->[0] ) eq 'ARRAY' ? @{ $_[0] } : @_;

    #  Temporary file names:

    my $seqfile  = "$tmp_dir/align_fasta_tmp_${$}.aln";
    my $treefile = "$tmp_dir/align_fasta_tmp_${$}.ph";

    #  Remap the id to be clustal friendly, saving the originals in a hash:

    my ( $id, $def, $seq, $id2, %ori_id, @seqs2 );

    $id2 = "seq00000";
    @seqs2 = map { ( $id, $def, $seq ) = @$_;
                   $ori_id{ ++$id2 } = $id . ( $def ? " $def" : '' );
                   [ $id2, '', clean_for_clustal( $seq ) ]
                 } @seqs;

    open( SEQ, ">$seqfile" ) || return undef;
    foreach ( @seqs2 ) { print SEQ ">$_->[0]\n$_->[2]\n" }
    close SEQ;

    #  Do the tree:

    my $clustalw = $ext_bin ? "$ext_bin/clustalw" : 'clustalw';
    &run( "$clustalw -infile=$seqfile -newtree=$treefile -tree > /dev/null" );

    my $tree = &gjonewicklib::read_newick_tree( $treefile );
    $tree || return undef;

    #  Clean up:

    &run( "/bin/rm -f $seqfile $treefile" );

    #  Restore the id:

    &gjonewicklib::newick_relabel_tips( $tree, \%ori_id );
}


sub clean_for_clustal
{
    local $_ = shift;
    s/U/C/gi;          # Sec -> Cys (well, for proteins)
    s/\*/X/g;          # Clustal chokes on *
    s/[^A-Z]/-/gi;     # Nonstandard gaps
    $_
}


sub run { system( $_[0] ) == 0 || confess( "FAILED: $_[0]" ) }


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3