[Bio] / FigKernelScripts / modify_tree.pl Repository:
ViewVC logotype

View of /FigKernelScripts/modify_tree.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Sep 28 20:24:29 2010 UTC (9 years, 4 months ago) by golsen
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
A general purpose tree modification utility.

#! /usr/bin/perl
#
#  Modify a newick tree.
#

use strict;
# use gjoseqlib;  # I think that this is no longer required
use gjonewicklib;

my $usage = <<"End_of_Usage";

Usage:  modify_tree  [options]  < tree  > modified_tree

    Options:

        -a         Reorder taxa in aesthetic tree order
        -c         Collapse zero-length branches
        -f fasta   Relabel tips from descriptions in fasta sequence file
        -i         Omit identifiers (first word) when relabeling tips
        -k keep    Keep only the taxa listed (one per line) in the file keep
        -l table   Relabel tips from tab delimited from -> to table
        -m         Use midpoint rooting
        -o omit    Delete the taxa listed (one per line) in the file omit
        -r tip     Reroot next to tip
        -t tip     Reroot to tip
        -u         Uproot the tree

    The order of operations is reroot -> filter -> reorder -> uproot.

End_of_Usage

my $aesthetic;
my $collapse;
my $midpoint;
my $noderoot;
my $relabel;
my $skip_id;
my $tiproot;
my $uproot;

my $fastafile = '';
my $tablefile = '';
my $treefile  = '';
my $keepfile  = '';
my $omitfile  = '';

while ( @ARGV && $ARGV[0] =~ /^-/ )
{
    my $flag = shift @ARGV;
    if ( $flag =~ m/^-[acimu]+$/ )
    {
        $flag =~ /a/ and $aesthetic = 1;
        $flag =~ /c/ and $collapse  = 1;
        $flag =~ /i/ and $skip_id   = 1;
        $flag =~ /m/ and $midpoint  = 1;
        $flag =~ /u/ and $uproot    = 1;
    }
    elsif ( $flag =~ s/^-f *=? *// )
    {
        $fastafile = $flag || shift @ARGV or die "Missing value for -f\n$usage\n";
        $relabel = 1;
    }
    elsif ( $flag =~ s/^-k *=? *// )
    {
        $keepfile = $flag || shift @ARGV or die "Missing value for -k\n$usage\n";
    }
    elsif ( $flag =~ s/^-l *=? *// )
    {
        $tablefile = $flag || shift @ARGV or die "Missing value for -l\n$usage\n";
        $relabel = 1;
    }
    elsif ( $flag =~ s/^-o *=? *// )
    {
        $omitfile = $flag || shift @ARGV or die "Missing value for -o\n$usage\n";
    }
    elsif ( $flag =~ s/^-r *=? *// )
    {
        $noderoot = $flag || shift @ARGV or die "Missing value for -r\n$usage\n";
    }
    elsif ( $flag =~ s/^-t *=? *// )
    {
        $tiproot = $flag || shift @ARGV or die "Missing value for -t\n$usage\n";
    }
    else
    {
        die "Bad flag: $flag\n$usage\n"
    }
}

my %label = ();
if ( $fastafile )
{
    -f $fastafile or die "Relabeling fasta file ($fastafile) not found\n";
    open( FASTA, "<$fastafile" ) || die "Could not open fasta relabeling file\n";
    while ( defined( $_ = <FASTA> ) )
    {
        s/^>\s*// or next;
        chomp;
        my ( $id, $def ) = $_ =~ /^(\S+)\s+(\S.*)$/;
        if ( $id && $def )
        {
            ( my $id2 = $id ) =~ s/_/ /g;
            $label{ $id2 } = ( $skip_id ? "" : "$id " ) . $def;
        }
    }
    close( FASTA );
}

my @keep;
if ( $keepfile )
{
    -f $keepfile or die "Keep id file ($keepfile) not found\n";
    open KEEP, "<$keepfile" or print STDERR "Could not open file '$keepfile'\n" and exit;
    @keep = map { ( m/(\S+)/ ) } <KEEP>;
    close KEEP;
    @keep or print STDERR "No ids found in keep id file '$keepfile'." and exit;
}

my @omit;
if ( $omitfile )
{
    -f $omitfile or die "Omit id file ($omitfile) not found\n";
    open OMIT, "<$omitfile" or print STDERR "Could not open file '$omitfile'\n" and exit;
    @omit = map { ( m/(\S+)/ ) } <OMIT>;
    close OMIT;
}

if ( $tablefile )
{
    -f $tablefile or die "Relabeling table file ($tablefile) not found\n";
    open( TABLE, "<$tablefile" ) || die "Could not open relabeling table file\n";
    while ( defined( $_ = <TABLE> ) )
    {
        chomp;
        my ( $old, $new ) = split /\t/;
        if ( $old && $new )
        {
            $label{ $old } = $new;
            if ( $old =~ s/_/ /g ) { $label{ $old } = $new }
        }
    }
    close( TABLE );
}

my $treestr = join( "", <STDIN> ) or die "$usage\n";

my $tree0 = gjonewicklib::parse_newick_tree_str( $treestr );
$tree0 || die "Could not parse tree: " . substr( $treestr, 0, 100 ) . "\n\n$usage\n";

if ( @omit )
{
    my %omit = map { $_ => 1 } @omit;
    @keep = grep { ! $omit{ $_ } } newick_tip_list( $tree0 );
}

my $tree1 = $midpoint  ? gjonewicklib::reroot_newick_to_midpoint_w( $tree0 )   :
            $noderoot  ? gjonewicklib::reroot_newick_next_to_tip( $tree0, $noderoot ) :
            $tiproot   ? gjonewicklib::reroot_newick_to_tip( $tree0, $tiproot )       : $tree0;

my $tree2 = @keep      ? gjonewicklib::rooted_newick_subtree( $tree1, \@keep )        : $tree1;
my $tree3 = $aesthetic ? gjonewicklib::aesthetic_newick_tree( $tree2 )                : $tree2;
my $tree4 = $uproot    ? gjonewicklib::uproot_newick( $tree3 )                        : $tree3;
my $tree5 = $relabel   ? gjonewicklib::newick_relabel_tips( $tree4, \%label )         : $tree4;
gjonewicklib::collapse_zero_length_branches( $tree5 ) if $collapse;

gjonewicklib::writeNewickTree( $tree5 );

exit;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3