[Bio] / FigKernelScripts / freqs_2_nj_tree.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/freqs_2_nj_tree.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : golsen 1.3 #!/usr/bin/env perl -w
2 : golsen 1.1 #
3 :     # freqs_2_nj_tree [options] < freqs_file > newick_tree
4 :     #
5 : golsen 1.3 # Distance is Manhattan metric within amino acid, and Euclidian
6 : golsen 1.1 # between amino acids. The tree is a newick tree with the neighbor program.
7 :     #
8 :     # This should use a temporary subdirectory, but for now ...
9 :     #
10 :    
11 :     use strict;
12 :     use gjocodonlib;
13 :     use gjonewicklib;
14 :    
15 :     sub usage
16 :     {
17 :     print STDERR <<'End_of_usage';
18 :     freqs_2_nj_tree -
19 :    
20 :     Construct a neighbor-joining tree of codon usage frequencies (supplied as
21 :     input). The output is a tree in the Newick's 8:45 standard. The distance
22 : golsen 1.3 is Manhattan metric within amino acid, and Euclidian between amino acids.
23 : golsen 1.1
24 :     This program requires that the neighbor program from the PHYLIP package be
25 :     installed and accessible throught the current "path".
26 :    
27 :     Usage: freqs_2_nj_tree [options] < freqs_file > newick_tree
28 :    
29 :     Options:
30 :    
31 : golsen 1.2 -a # Aesthetic tree order of tips in output tree (default)
32 :     -A # AnAesthetic tree order of tips in output tree
33 : golsen 1.1 -c # Clobber infile, outfile and outtree files if they exisit
34 :     -m # Midpoint root the output tree
35 :    
36 :     End_of_usage
37 :    
38 :     exit;
39 :     }
40 :    
41 : golsen 1.2 my $aesthetic = 1;
42 : golsen 1.1 my $clobber = 0;
43 :     my $midpoint = 0;
44 :    
45 :     while ( @ARGV && $ARGV[0] =~ s/^-// )
46 :     {
47 :     local $_ = shift;
48 :     if ( s/a//g ) { $aesthetic = 1 }
49 : golsen 1.2 if ( s/A//g ) { $aesthetic = 0 }
50 : golsen 1.1 if ( s/c//g ) { $clobber = 1 }
51 :     if ( s/m//g ) { $midpoint = 1 }
52 : golsen 1.3 if ( m/./ ) { print STDERR "Bad flag: '$_'.\n\n"; usage() }
53 : golsen 1.1 }
54 :    
55 :     if ( $clobber )
56 :     {
57 :     unlink 'infile' if -f 'infile';
58 :     unlink 'outfile' if -f 'outfile';
59 :     unlink 'outtree' if -f 'outtree';
60 :     }
61 :     else
62 :     {
63 :     my @found = ();
64 :     push @found, 'infile' if -f 'infile';
65 :     push @found, 'outfile' if -f 'outfile';
66 :     push @found, 'outtree' if -f 'outtree';
67 :     if ( @found )
68 :     {
69 :     print STDERR "The following files conflict with this program:\n";
70 :     foreach ( @found ) { print STDERR " $_\n" }
71 :     print STDERR "Either remove them, or use the -c flag.\n";
72 :     usage();
73 :     }
74 :     }
75 :    
76 :     my $i = '00000';
77 :     my @freqs;
78 :     my %labels;
79 :    
80 :     while ( <> )
81 :     {
82 :     my ( $freq, undef, $desc ) = gjocodonlib::split_frequencies( $_ );
83 : golsen 1.2 $freq or next;
84 : golsen 1.1 $i++;
85 :     my $lbl1 = $desc || "freq_$i";
86 :     my $lbl2 = "otu$i";
87 :     $labels{ $lbl2 } = $lbl1;
88 :     push @freqs, [ $freq, $lbl2 ];
89 :     }
90 :    
91 :     @freqs > 2
92 :     or print STDERR "Less than two sets of codon usage frequencies read.\n"
93 :     and usage();
94 :    
95 :     open INFILE, '>infile'
96 :     or print STDERR "Could not open 'infile' for writing.\n"
97 :     and exit;
98 :     print INFILE $i+0 . "\n";
99 :     for ( my $f1 = 0; $f1 < @freqs; $f1++ )
100 :     {
101 :     my $freqs1 = $freqs[$f1]->[0];
102 :     my @line = sprintf "%-10s ", $freqs[$f1]->[1];
103 :     for ( my $f2 = 0; $f2 < @freqs; $f2++ )
104 :     {
105 :     #
106 : golsen 1.3 # codon_freq_distance_2 is Manhattan metric within amino acid, and
107 : golsen 1.1 # Euclidian between amino acids.
108 :     #
109 :     push @line, sprintf( "%.4f", gjocodonlib::codon_freq_distance_2( $freqs1, $freqs[$f2]->[0] ) );
110 :     }
111 :     print INFILE join( ' ', @line), "\n";
112 :     }
113 :     close INFILE;
114 :    
115 :     # Beware: neighbor writes to stdout, so this will contaminate the output of
116 :     # this script unless it redirected.
117 :    
118 : golsen 1.2 my $cmd = SeedAware::executable_for( 'neighbor' );
119 :     my $redir = { stdout => '/dev/null' };
120 :     my $fh = SeedAware::write_to_pipe_with_redirect( $cmd, $redir )
121 :     or print STDERR "Could not open pipe to 'neighbor'.\n"
122 :     and exit;
123 :    
124 :     print $fh "y\n";
125 :     close $fh;
126 : golsen 1.1
127 :     -f 'outtree'
128 :     or print STDERR "Could not find outtree for reading.\n"
129 :     and exit;
130 :     my $tree = gjonewicklib::read_newick_tree( 'outtree' );
131 : golsen 1.2 $tree or print STDERR "Could not read outtree.\n"
132 : golsen 1.1 and exit;
133 :     unlink( 'infile', 'outfile', 'outtree' );
134 :    
135 :     $tree = gjonewicklib::newick_relabel_nodes( $tree, \%labels );
136 :     $tree = gjonewicklib::reroot_newick_to_midpoint_w( $tree ) if $midpoint;
137 :     $tree = gjonewicklib::aesthetic_newick_tree( $tree ) if $aesthetic;
138 :    
139 :     gjonewicklib::writeNewickTree( $tree );
140 :     # gjonewicklib::printer_plot_newick( $tree );
141 :    
142 :     exit;
143 :    
144 :     # Split frequences with amino acids separated by vertical bars, and
145 :     # codons within the amino acid separated by commas:
146 :     #
147 :     # fc1aa1,fc2aa1,fc3aa1,fc4aa1|fc1aa2,fc2aa2,...|fc1aa3,fc2aa3,...
148 :    
149 :     sub split_freq { [ map { [ map { $_ + 0 } split /,/ ] } split /\|/, shift ] }
150 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3