[Bio] / FigMetagenomeTools / countfastachars.pl Repository:
ViewVC logotype

Diff of /FigMetagenomeTools/countfastachars.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Feb 19 17:15:26 2007 UTC revision 1.2, Fri Mar 16 20:51:09 2007 UTC
# Line 2  Line 2 
2    
3  # count fasta characters  # count fasta characters
4    
5    my $have_meta;
6    
7    eval {
8        require GenomeMeta;
9        import GenomeMeta;
10        $have_meta++;
11    };
12    
13  use strict;  use strict;
14    
15  my $rewrite; # should we rewrite the file?  my $rewrite; # should we rewrite the file?
16  my $summary; #do we just want a summary  my $summary; #do we just want a summary
17  my $protein; # is it a protein  my $protein; # is it a protein
18  unless (@ARGV) {die "$0: <-r rewrite file> <-s summary> <-p protein files> <fasta files or sequences to count>\n"}  my $meta;
19  foreach my $file (@ARGV) {  my $meta_key = "countfasta.default";
20    unless (@ARGV) {die "$0: <-m metafile> <-k meta key> <-r rewrite file> <-s summary> <-p protein files> <fasta files or sequences to count>\n"}
21    while (@ARGV)
22    {
23        my $file = shift;
24    if ($file eq "-r") {$rewrite=1; next}    if ($file eq "-r") {$rewrite=1; next}
25    if ($file eq "-s") {$summary=1; next}    if ($file eq "-s") {$summary=1; next}
26    if ($file eq "-p") {$protein=1; next}    if ($file eq "-p") {$protein=1; next}
27    if (-e $file) {      if ($file eq '-m')
28        {
29            $have_meta or die "metafile support not available\n";
30            $meta = new GenomeMeta(undef, shift);
31            next;
32        }
33        if ($file eq '-k')
34        {
35            $meta_key = shift;
36            next;
37        }
38        if (-e $file)
39        {
40     my $runningtotal; my $number;     my $runningtotal; my $number;
41     my %long; my %short; $long{'length'}=1; $short{'length'}=1000000000; # use these to store shortest and longest          my %long; my %short;
42     if ($file =~ /\.gz$/) {open(IN, "gunzip -c $file|") || die "Can't open pipe to gunzip"}          $long{'length'}=1;
43     else {open (IN, $file)|| die "Can't open $file\n"}          $short{'length'}=1000000000; # use these to store shortest and longest
44    
45            if ($file =~ /\.gz$/)
46            {
47                open(IN, "gunzip -c $file|") || die "Can't open pipe to gunzip";
48            }
49            else
50            {
51                open (IN, $file)|| die "Can't open $file\n";
52            }
53    
54     unless ($summary) {     unless ($summary) {
55      open (OUT, ">$file.count") || die "Can't open $file.count for writing\n";      open (OUT, ">$file.count") || die "Can't open $file.count for writing\n";
56      if ($protein) {print OUT "Sequence\tLength\tRunning Total\n"}              if ($protein) {
57      else {print OUT "Sequence\tA\tG\tC\tT\tN\tLine total\tRunning total\n"}                  print OUT "Sequence\tLength\tRunning Total\n";
58                }
59                else {
60                    print OUT "Sequence\tA\tG\tC\tT\tN\tLine total\tRunning total\n";
61                }
62            }
63            if ($rewrite) {
64                open REWRITE, ">$file.fasta" || die "Can't open $file.fasta for writing\n";
65     }     }
    if ($rewrite) {open REWRITE, ">$file.fasta" || die "Can't open $file.fasta for writing\n"}  
66     my $seq; my $tag;     my $seq; my $tag;
67     while (<IN>) {     while (<IN>) {
68      chomp;      chomp;
# Line 54  Line 94 
94      }      }
95     else {s/\s//g; $seq.=$_}     else {s/\s//g; $seq.=$_}
96     }     }
97            close(IN);
98    
99    my $length=length($seq);    my $length=length($seq);
100    if ($length > $long{'length'}) {%long=('length'=>$length, 'tag'=>$tag)}    if ($length > $long{'length'}) {%long=('length'=>$length, 'tag'=>$tag)}
101    if ($length < $short{'length'}) {%short=('length'=>$length, 'tag'=>$tag)}    if ($length < $short{'length'}) {%short=('length'=>$length, 'tag'=>$tag)}
# Line 74  Line 116 
116     if ($rewrite) {$seq =~ s/(.{0,60})/$1\n/g; chomp($seq); print REWRITE "$tag\n$seq"}     if ($rewrite) {$seq =~ s/(.{0,60})/$1\n/g; chomp($seq); print REWRITE "$tag\n$seq"}
117     unless ($summary) {print OUT "$tag\t$a\t$g\t$c\t$t\t$n\t$total\t$runningtotal\n"}     unless ($summary) {print OUT "$tag\t$a\t$g\t$c\t$t\t$n\t$total\t$runningtotal\n"}
118    }    }
119    if ($summary) {print "$file: number of seqs:$number total: $runningtotal, average: ", $runningtotal/$number, ", Shortest: $short{'tag'} ($short{'length'}) Longest: $long{'tag'} ($long{'length'})\n"}          if ($summary) {
120                print "$file: number of seqs:$number total: $runningtotal, average: ", $runningtotal/$number, ", Shortest: $short{'tag'} ($short{'length'}) Longest: $long{'tag'} ($long{'length'})\n";
121            }
122    
123            if ($meta)
124            {
125                $meta->set_metadata("${meta_key}.file", $file);
126                $meta->set_metadata("${meta_key}.num_seqs", $number);
127                $meta->set_metadata("${meta_key}.total", $runningtotal);
128                $meta->set_metadata("${meta_key}.average", $runningtotal / $number);
129                $meta->set_metadata("${meta_key}.shortest_seq", $short{'tag'});
130                $meta->set_metadata("${meta_key}.shortest_len", $short{'length'});
131                $meta->set_metadata("${meta_key}.longest_seq", $long{'tag'});
132                $meta->set_metadata("${meta_key}.longest_len", $long{'length'});
133            }
134    }    }
135    else {    else {
136     my $seq = $file;     my $seq = $file;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3