[Bio] / FigKernelScripts / make_phob_from_seqs.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/make_phob_from_seqs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : overbeek 1.1
2 : overbeek 1.3 &make_sure("clustalw","protdist","clustal_to_fasta","fasta_to_phylip","trim_sequences","neighbor");
3 : overbeek 1.1
4 :     $usage = "usage: make_phob_from_seqs Directory [-trim] < Seqs";
5 :    
6 :     (
7 :     ($puD = shift @ARGV)
8 :     )
9 :     || die $usage;
10 :    
11 :     $trim = (@ARGV > 0) && ($ARGV[0] =~ /^-trim/i);
12 :    
13 :     (! -e $puD) || die "$puD already exists -- check it (and delete it, if that is what you really wish)";
14 :     mkdir($puD,0777) || die "could not mkdir $puD";
15 :     open(SEQSOUT,">$puD/seqs.fasta") || die "could not open $puD/seqs";
16 :     open(IDS,">$puD/ids") || die "could not open $puD/ids";
17 :    
18 :     $n = 1;
19 :     $/ = "\n>";
20 :     while (defined($_ = <STDIN>))
21 :     {
22 :     chomp;
23 :     if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
24 :     {
25 :     $id = $1;
26 :     $seq = $2;
27 :     $idN = "id$n";
28 :     $n++;
29 :     print IDS "$idN\t$id\n";
30 :     $seq =~ s/\s//gs;
31 :     &display_id_and_seq($idN,\$seq,\*SEQSOUT);
32 :     }
33 :     }
34 :     close(IDS);
35 :     close(SEQSOUT);
36 :    
37 :     if ($trim)
38 :     {
39 :     rename("$puD/seqs.fasta","$puD/seqs.untrimmed");
40 : overbeek 1.2 &run("trim_sequences 1.0e-5 0.8 < \"$puD/seqs.untrimmed\" > \"$puD/seqs.fasta\"");
41 : overbeek 1.1 }
42 :    
43 :     chdir $puD;
44 :     &run("clustalw -infile=seqs.fasta -align -outorder=aligned > /dev/null");
45 :     rename("seqs.dnd","tree.dnd");
46 :     rename("seqs.aln","aln");
47 :     rename("seqs.fasta","seqs");
48 :     &run("clustal_to_fasta < aln > aln.fasta");
49 :     &run("fasta_to_phylip < aln.fasta > infile");
50 :    
51 :     open(PROTDIST,"| protdist > /dev/null") || die "could not open protdist";
52 :     print PROTDIST "Y\n";
53 :     close(PROTDIST);
54 : overbeek 1.2 &run("cp outfile distance.matrix");
55 :     rename("outfile","infile");
56 :    
57 : overbeek 1.3 open(NEIGHBOR,"| neighbor > /dev/null") || die "could not open neighbor";
58 :     print NEIGHBOR "Y\n";
59 :     close(NEIGHBOR);
60 : overbeek 1.2 rename("outtree","tree.dnd");
61 : overbeek 1.1
62 :     unlink("infile");
63 : overbeek 1.4 unlink("outfile");
64 : overbeek 1.1
65 :     if (! ((-s "distance.matrix") && (-s "aln.fasta") && (-s "tree.dnd")))
66 :     {
67 :     print STDERR "*** Something went wrong\n";
68 :     exit(1);
69 :     }
70 :    
71 :     sub display_id_and_seq {
72 :     my( $id, $seq, $fh ) = @_;
73 :    
74 :     if (! defined($fh) ) { $fh = \*STDOUT; }
75 :    
76 :     print $fh ">$id\n";
77 :     &display_seq($seq, $fh);
78 :     }
79 :    
80 :     sub display_seq {
81 :     my ( $seq, $fh ) = @_;
82 :     my ( $i, $n, $ln );
83 :    
84 :     if (! defined($fh) ) { $fh = \*STDOUT; }
85 :    
86 :     $n = length($$seq);
87 :     # confess "zero-length sequence ???" if ( (! defined($n)) || ($n == 0) );
88 :     for ($i=0; ($i < $n); $i += 60)
89 :     {
90 :     if (($i + 60) <= $n)
91 :     {
92 :     $ln = substr($$seq,$i,60);
93 :     }
94 :     else
95 :     {
96 :     $ln = substr($$seq,$i,($n-$i));
97 :     }
98 :     print $fh "$ln\n";
99 :     }
100 :     }
101 :    
102 :     sub run {
103 :     my($cmd) = @_;
104 :    
105 :     # my @tmp = `date`; chomp @tmp; print STDERR "$tmp[0]: running $cmd\n";
106 :     (system($cmd) == 0) || confess "FAILED: $cmd";
107 :     }
108 :    
109 :     sub make_sure {
110 :     my(@progs) = @_;
111 :    
112 :     my $prog;
113 :     foreach $prog (@progs)
114 :     {
115 :     my @tmp = `which $prog`;
116 :     if ($tmp[0] =~ /^no $prog/)
117 :     {
118 :     print STDERR $tmp[0];
119 :     exit(1);
120 :     }
121 :     }
122 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3