[Bio] / FigKernelScripts / kmer_classifier.pl Repository:
ViewVC logotype

View of /FigKernelScripts/kmer_classifier.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri Feb 28 22:30:11 2014 UTC (5 years, 8 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Add Ross/Jim Davis kmer classifier code.

use strict;
use KmerClassifier;
use Getopt::Long::Descriptive;
use File::Temp;

my($opt, $usage) = describe_options("%c %o classifier-data-directory < input > output",
				    ["input|i=s" => "Input file (defaults to stdin)"],
				    ["output|o=s" => "Output file (defaults to stdout)"],
				    ["groups|g" => "Print the list of group identifiers to stdout and exit."],
				    ["detailed-output-file|d=s" => "File to write detailed output (reads and hit information)"],
				    ["unclassified-output-file|u=s" => "File to write unclassified read IDs to"],
				    ["help|h" => "Print the usage message and exit."],
				    );

print($usage->text), exit if $opt->help;
print(STDERR $usage->text), exit 1 if @ARGV != 1;

my $in_file;
if ($opt->input)
{
    $in_file = $opt->input;
}
else
{
    $in_file = File::Temp->new;
    while (<STDIN>)
    {
	print $in_file $_;
    }
    close($in_file);
}

my $out_fh;
if ($opt->output)
{
    open($out_fh, ">", $opt->output) or die "Cannot write output file " . $opt->output . ": $!";
}
else
{
    $out_fh = \*STDOUT;
}

my $det_fh;
if ($opt->detailed_output_file)
{
    open($det_fh, ">", $opt->detailed_output_file) or die "Cannot open " . $opt->detailed_output_file . ": $!";
}

my $missed_fh;
if ($opt->unclassified_output_file)
{
    open($missed_fh, ">", $opt->unclassified_output_file) or die "Cannot open " . $opt->unclassified_output_file . ": $!";
}

my $classifier_data = shift;
my $classifier = KmerClassifier->new($classifier_data);

if ($opt->groups)
{
    my $g = $classifier->group_membership_hash();
    for my $gname (sort { my($aa) = $a =~ /group(\d+)/; my($bb) = $b =~ /group(\d+)/;
			  defined($aa) && defined($bb) ? ($aa <=> $bb) : ($a cmp $b) } keys %$g)
    {
	print join("\t", $gname, @{$g->{$gname}}), "\n";
    }
    exit 0;
}

my($bins, $missed) = $classifier->classify("" . $in_file, $det_fh);

for my $g (sort { $bins->{$b} <=> $bins->{$a} } keys %$bins)
{
    print $out_fh "$g\t$bins->{$g}\n";
}

if ($missed_fh)
{
    print $missed_fh "$_\n" foreach @$missed;
}
close($missed_fh);

close($out_fh);
close($det_fh);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3