[Bio] / FigKernelScripts / analyze_kmer_assignments.pl Repository:
ViewVC logotype

View of /FigKernelScripts/analyze_kmer_assignments.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Nov 24 17:49:27 2009 UTC (9 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
new script to compare output of assign_using_kmers to current SEED assignments

use strict;
use FIG;

my $usage = "analyze_kmer_assignments kmer-output-file\n";

@ARGV == 1 or die $usage;

my $assignF = shift;

open (FH,"<$assignF") || die "could not open $assignF";
open (OUT, ">&STDOUT");

my $fig = new FIG;

my $l = <FH>;

#
# for now we might not have all pegs, so grab the genome from the first line of the
# file and pull a peglist from SEED.
#
my ($genome) = $l =~ /fig\|(\d+\.\d)/;
my %pegs = map { $_ => 1 } $fig->pegs_of($genome);

my ($total_count, $match_count, $diff_count, $unannotated_count, $annotated_count, $misannotated);
my @out;

while (defined($l))
{
    chomp $l;
    my($peg_id, $annotation, $score, $non_overlap_hits, $overlap_hits) = split(/\t/, $l);

    delete $pegs{$peg_id};
    
    if ($annotation eq '')
    {
	$annotation = "NOT ANNOTATED";
	$unannotated_count++;
    }
    else
    {
	$annotated_count++;
    }

    $annotation =~ s/^FIG\d+[^:]*:\s*//;

    # determine if the seed annotation and the figfam annotation are the same
    my $seed_annotation = $fig->function_of($peg_id);
    
    my ($match_status);
    if ($seed_annotation eq $annotation){
	$match_status = "MATCH";
	$match_count++;
    }
    else{
	if ($annotation ne "NOT ANNOTATED"){
	    $misannotated++;
	}
	$match_status = "DIFF";
	$diff_count++;
    }

    push(@out, [$match_status, $peg_id, $annotation, $seed_annotation]);
    #print OUT "$match_status\t$peg_id\t$annotation\t$seed_annotation\n";
    $l = <FH>;
}

#
# leftover unannotated pegs
for my $peg_id (sort keys %pegs)
{
    my $seed_annotation = $fig->function_of($peg_id);
    my $annotation = 'NOT ANNOTATED';
    $unannotated_count++;
    
    my ($match_status);
    if ($seed_annotation eq $annotation){
	$match_status = "MATCH";
	$match_count++;
    }
    else{
	if ($annotation ne "NOT ANNOTATED"){
	    $misannotated++;
	}
	$match_status = "DIFF";
	$diff_count++;
    }
    
    push(@out, [$match_status, $peg_id, $annotation, $seed_annotation]);
#    print OUT "$match_status\t$peg_id\t$annotation\t$seed_annotation\n";
}

for my $l (sort { &FIG::by_fig_id($a->[1], $b->[1]) } @out)
{
    print OUT join("\t", @$l), "\n";
}

print OUT "MATCH: $match_count\nDIFF: $diff_count\nANNOTATED: $annotated_count\nUNANNOTATED: $unannotated_count\n";
close FH;
close OUT;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3