Parent Directory
|
Revision Log
Miscellaneous loader fixes.
#!/usr/bin/perl -w =head1 Amino Acid Frequency Script This script reads a protein FASTA file and displays the number of times each amino acid occurs. In addition, a SEED organism directory can be specified and all of the proteins in all of the genomes will be counted. The currently-supported command-line options are as follows. =over 4 =item figDir If specified, the input file name will be assumed to be a SEED organism directory. The protein FASTA files for all the organisms will be scanned. =back There is one positional parameter: the name of the input FASTA file or organism directory, =cut use strict; use Stats; use Getopt::Long; # Create the statistics object. my $stats = Stats->new(); # Get the command-line options. my $figDir; my $ok = GetOptions(figDir => \$figDir); my ($inFile) = @ARGV; if (! $ok || ! $inFile) { die "AAFrequencies [ --figDir ] infile"; } # Compute the input files. my @files; if (! $figDir) { if (! -f $inFile) { die "Input file $inFile not found."; } else { @files = ($inFile); } } else { # Here we have a SEED organism directory. my $dh; if (! opendir($dh, $inFile)) { die "Could not open SEED directory $inFile."; } else { my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir($dh); closedir $dh; print scalar(@genomes) . " genome directories found.\n"; for my $genome (@genomes) { $stats->Add(genomes => 1); my $file = "$inFile/$genome/Features/peg/fasta"; if (-f $file) { push @files, $file; } } } } # The counts will go in here. my %counts; # Process each FASTA file individually. print scalar(@files) . " files to process.\n"; for my $file (@files) { print "Processing $file.\n"; $stats->Add(inFiles => 1); my $ih; if (! open($ih, "<$file")) { print "Could not open file: $!\n"; $stats->Add(fileFail => 1); } else { # Loop through the FASTA file. while (! eof $ih) { my $line = <$ih>; chomp $line; $stats->Add(lineIn => 1); # Check for a header line. if (substr($line, 0, 1) eq ">") { $stats->Add(headerIn => 1); } else { # Here we have a data line. $stats->Add(dataIn => 1); # Loop through the letters. for my $letter (split "", $line) { $stats->Add(lettersIn => 1); $counts{$letter}++; } } } } } # Sort the results and output them. print "Result counts.\n\n"; my @letters = sort { $counts{$b} <=> $counts{$a} } keys %counts; for my $letter (@letters) { my $number = $counts{$letter}; if (length($number) < 10) { $number = (" " x (10 - length($number))) . $number; } print "$letter $number\n"; } print "\nAll done:\n" . $stats->Show();
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |