[Bio] / FigKernelScripts / count_bases.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/count_bases.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : olson 1.2 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.1 # Usage: count_bases < something.fasta or
19 :     # cat something.fasta | count_bases
20 :    
21 :     $dictP = base_dict_init();
22 :    
23 :     $/ = "\n>";
24 :     while (defined($_ = <STDIN>))
25 :     {
26 :     chomp;
27 :     if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
28 :     {
29 :     $sid = $1;
30 :     $seq = $2;
31 :     $seq =~ s/\n//gs;
32 :     $seq =~ s/ //gs;
33 :     $seq =~ s/[\-\.\~]//gs;
34 :     $seq = lc $seq;
35 :     $seq =~ s/u/t/g;
36 :    
37 :     if ($seq && $sid)
38 :     {
39 :     print "$sid\t", chars_in_string(\$seq,$dictP), "\n";
40 :     }
41 :     }
42 :     }
43 :    
44 :     sub chars_in_string {
45 :    
46 :     # Counts the number certain characters in a string. Only
47 :     # the characters included in the incoming dictionary are
48 :     # counted. So this can work for amino acids, bases or ..
49 :    
50 :     # In: Pointer to string
51 :     # Pointer to dictionary hash
52 :     # Out: Integer
53 :    
54 :     local ($stringP,$dictP) = @_;
55 :     my $i = 0;
56 :    
57 :     foreach $ch (keys %$dictP) {
58 :     $i += $$stringP =~ s/$ch/$ch/g;
59 :     }
60 :    
61 :     return $i;
62 :     }
63 :    
64 :     sub base_dict_init {
65 :    
66 :     # Returns a dictionary that says whether a character is a
67 :     # valid base symbol.
68 :    
69 :     # In: Nothing
70 :     # Out: Pointer to hash
71 :    
72 :     local %dict;
73 :    
74 :     $dict{"A"} = $dict{"a"} = 1;
75 :     $dict{"G"} = $dict{"g"} = 1;
76 :     $dict{"C"} = $dict{"c"} = 1;
77 :     $dict{"U"} = $dict{"u"} = 1;
78 :     $dict{"T"} = $dict{"t"} = 1;
79 :     $dict{"R"} = $dict{"r"} = 1;
80 :     $dict{"Y"} = $dict{"y"} = 1;
81 :     $dict{"W"} = $dict{"w"} = 1;
82 :     $dict{"S"} = $dict{"s"} = 1;
83 :     $dict{"M"} = $dict{"m"} = 1;
84 :     $dict{"K"} = $dict{"k"} = 1;
85 :     $dict{"H"} = $dict{"h"} = 1;
86 :     $dict{"D"} = $dict{"d"} = 1;
87 :     $dict{"V"} = $dict{"v"} = 1;
88 :     $dict{"B"} = $dict{"b"} = 1;
89 :     $dict{"N"} = $dict{"n"} = 1;
90 :    
91 :     return \%dict;
92 :     }
93 :    
94 :     sub complement {
95 :     #
96 :     # Returns the complement of a sequence with preservation of case
97 :     # complemented ambiguity codes.
98 :     #
99 :     local ($seq) = @_;
100 :     my (%dict); undef %dict;
101 :     my ($cseq) = "";
102 :    
103 :     $dict{"A"} = "U"; $dict{"a"} = "u";
104 :     $dict{"G"} = "C"; $dict{"g"} = "c";
105 :     $dict{"C"} = "G"; $dict{"c"} = "g";
106 :     $dict{"U"} = "A"; $dict{"u"} = "a";
107 :     $dict{"T"} = "A"; $dict{"t"} = "a";
108 :     $dict{"R"} = "Y"; $dict{"r"} = "y";
109 :     $dict{"Y"} = "R"; $dict{"y"} = "r";
110 :     $dict{"W"} = "W"; $dict{"w"} = "w";
111 :     $dict{"S"} = "S"; $dict{"s"} = "s";
112 :     $dict{"M"} = "K"; $dict{"m"} = "k";
113 :     $dict{"K"} = "M"; $dict{"k"} = "m";
114 :     $dict{"H"} = "D"; $dict{"h"} = "d";
115 :     $dict{"D"} = "H"; $dict{"d"} = "h";
116 :     $dict{"V"} = "B"; $dict{"v"} = "b";
117 :     $dict{"B"} = "V"; $dict{"b"} = "v";
118 :     $dict{"N"} = "N"; $dict{"n"} = "n";
119 :    
120 :     foreach (reverse split(//,$seq)) {
121 :     $cseq .= $dict{$_};
122 :     }
123 :    
124 :     return $cseq;
125 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3