[Bio] / FigKernelScripts / count_bases.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/count_bases.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : overbeek 1.1 # Usage: count_bases < something.fasta or
2 :     # cat something.fasta | count_bases
3 :    
4 :     $dictP = base_dict_init();
5 :    
6 :     $/ = "\n>";
7 :     while (defined($_ = <STDIN>))
8 :     {
9 :     chomp;
10 :     if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
11 :     {
12 :     $sid = $1;
13 :     $seq = $2;
14 :     $seq =~ s/\n//gs;
15 :     $seq =~ s/ //gs;
16 :     $seq =~ s/[\-\.\~]//gs;
17 :     $seq = lc $seq;
18 :     $seq =~ s/u/t/g;
19 :    
20 :     if ($seq && $sid)
21 :     {
22 :     print "$sid\t", chars_in_string(\$seq,$dictP), "\n";
23 :     }
24 :     }
25 :     }
26 :    
27 :     sub chars_in_string {
28 :    
29 :     # Counts the number certain characters in a string. Only
30 :     # the characters included in the incoming dictionary are
31 :     # counted. So this can work for amino acids, bases or ..
32 :    
33 :     # In: Pointer to string
34 :     # Pointer to dictionary hash
35 :     # Out: Integer
36 :    
37 :     local ($stringP,$dictP) = @_;
38 :     my $i = 0;
39 :    
40 :     foreach $ch (keys %$dictP) {
41 :     $i += $$stringP =~ s/$ch/$ch/g;
42 :     }
43 :    
44 :     return $i;
45 :     }
46 :    
47 :     sub base_dict_init {
48 :    
49 :     # Returns a dictionary that says whether a character is a
50 :     # valid base symbol.
51 :    
52 :     # In: Nothing
53 :     # Out: Pointer to hash
54 :    
55 :     local %dict;
56 :    
57 :     $dict{"A"} = $dict{"a"} = 1;
58 :     $dict{"G"} = $dict{"g"} = 1;
59 :     $dict{"C"} = $dict{"c"} = 1;
60 :     $dict{"U"} = $dict{"u"} = 1;
61 :     $dict{"T"} = $dict{"t"} = 1;
62 :     $dict{"R"} = $dict{"r"} = 1;
63 :     $dict{"Y"} = $dict{"y"} = 1;
64 :     $dict{"W"} = $dict{"w"} = 1;
65 :     $dict{"S"} = $dict{"s"} = 1;
66 :     $dict{"M"} = $dict{"m"} = 1;
67 :     $dict{"K"} = $dict{"k"} = 1;
68 :     $dict{"H"} = $dict{"h"} = 1;
69 :     $dict{"D"} = $dict{"d"} = 1;
70 :     $dict{"V"} = $dict{"v"} = 1;
71 :     $dict{"B"} = $dict{"b"} = 1;
72 :     $dict{"N"} = $dict{"n"} = 1;
73 :    
74 :     return \%dict;
75 :     }
76 :    
77 :     sub complement {
78 :     #
79 :     # Returns the complement of a sequence with preservation of case
80 :     # complemented ambiguity codes.
81 :     #
82 :     local ($seq) = @_;
83 :     my (%dict); undef %dict;
84 :     my ($cseq) = "";
85 :    
86 :     $dict{"A"} = "U"; $dict{"a"} = "u";
87 :     $dict{"G"} = "C"; $dict{"g"} = "c";
88 :     $dict{"C"} = "G"; $dict{"c"} = "g";
89 :     $dict{"U"} = "A"; $dict{"u"} = "a";
90 :     $dict{"T"} = "A"; $dict{"t"} = "a";
91 :     $dict{"R"} = "Y"; $dict{"r"} = "y";
92 :     $dict{"Y"} = "R"; $dict{"y"} = "r";
93 :     $dict{"W"} = "W"; $dict{"w"} = "w";
94 :     $dict{"S"} = "S"; $dict{"s"} = "s";
95 :     $dict{"M"} = "K"; $dict{"m"} = "k";
96 :     $dict{"K"} = "M"; $dict{"k"} = "m";
97 :     $dict{"H"} = "D"; $dict{"h"} = "d";
98 :     $dict{"D"} = "H"; $dict{"d"} = "h";
99 :     $dict{"V"} = "B"; $dict{"v"} = "b";
100 :     $dict{"B"} = "V"; $dict{"b"} = "v";
101 :     $dict{"N"} = "N"; $dict{"n"} = "n";
102 :    
103 :     foreach (reverse split(//,$seq)) {
104 :     $cseq .= $dict{$_};
105 :     }
106 :    
107 :     return $cseq;
108 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3