[Bio] / FigKernelScripts / compare_figfam_release_results.pl Repository:
ViewVC logotype

View of /FigKernelScripts/compare_figfam_release_results.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Thu Oct 8 18:52:33 2009 UTC (10 years, 4 months ago) by arodri7
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.4: +2 -1 lines
commit changes

#!/usr/bin/env /home/arodri7/FIGdisk/bin/run_perl

use Data::Dumper;
use Carp;
use FIG_Config;
use FIG;


my $usage = "compare_figfam_release_results -o older_figfam_relase_directory -n recent_figfam_release_directory [-print]\n";
$usage .= "The -print option will print out the ids of the sequences which function annotation did not match between releases\n";

my ($oldFF, $newFF, $print_detail, $compare_to);
while ( $ARGV[0] =~ /^-/ ) {
    $_ = shift @ARGV;
    if    ($_ =~ s/^-o//) { $oldFF       = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-n//) { $newFF       = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-first//) { $compare_to = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-print//) { $print_detail       = (1  || shift @ARGV) }
    else                  { print STDERR  "Bad flag: '$_'\n$usage"; exit 1 }
}

if (!$oldFF){
    $oldFF = &FIG::get_figfams_data();
}
my @all_outputs = glob("$newFF/quality_control/*\.compare_log");

foreach my $release_new_file (@all_outputs){
    my ($file_name) = ($release_new_file) =~ /\/quality_control\/(.*)/;
    my $release_old_file = "$oldFF/quality_control/$file_name";

    next if (!-f $release_old_file);
    my $releaseO = &read_file($release_old_file);
    my $releaseN = &read_file($release_new_file);
    my @ids = `cut -f2 $release_old_file`;

    my ($ff_count_diff, @prev_correct_and_no_annotation_now, @new_annotations, @no_prev_anno_and_new_annotations_wrong, @changed_assignment_wrong, $changed_assignment_match_count, $changed_assingment);

    my $fig = new FIG;
    $changed_assignment_match_count = 0;
    $changed_assignments = 0;
    $ff_count_diff = $releaseN->{assigned} - $releaseO->{assigned};
    $release_old_file =~ /(\d+\.\d+)\.compare_log/;
    my $genome = $1;

    my $org = $fig->genus_species($genome);

    # figure out the errors
    foreach my $id (@ids){
	chomp ($id);
	if (($releaseO->{$id}->{status} eq "MATCH") && ($releaseN->{$id}->{status} ne "MATCH")){
	    push (@prev_correct_and_no_annotation_now, $id);
	}
	if (($releaseO->{$id}->{status} eq "DIFF") &&
	    ($releaseN->{$id}->{status} eq "MATCH") ){
	    push (@new_annotations, $id);
	}
	if ( ($releaseO->{$id}->{release_annotation} eq "NOT ANNOTATED") &&
	     ($releaseN->{$id}->{release_annotation} ne "NOT ANNOTATED") &&
	     ($releaseN->{$id}->{status} eq "DIFF") ){
	    push (@no_prev_anno_and_new_annotations_wrong, $id);
	}
	if ( ($releaseO->{$id}->{release_annotation} ne $releaseN->{$id}->{release_annotation}) &&
	     ($releaseO->{$id}->{release_annotation} ne "NOT ANNOTATED") &&
	     ($releaseN->{$id}->{release_annotation} ne "NOT ANNOTATED")){
	    $changed_assignment++;
	    if ($releaseN->{$id}->{status} eq "MATCH"){
		$changed_assignment_match_count++;
	    }
	    else{
		push (@changed_assingment_wrong, $id);
	    }
	}
    }
    print "GENOME: $org ($genome)\n";
    print "ASSIGNMENT DIFFERENCE: $ff_count_diff\n";
    
    # print out the differences
    print "NEW MATCH ANNOTATIONS: " . scalar (@new_annotations) . "\n";
    
    print "PREVIOUS CORRECT, NOT CORRECT NOW: " . scalar @prev_correct_and_no_annotation_now . "\n";
    if ($print_detail){
	foreach my $id (@prev_correct_and_no_annotation_now){
	    print "Old Release: $id\t". $releaseO->{$id}->{status} . "\t" . $releaseO->{$id}->{release_annotation} . "\t" . $releaseO->{$id}->{seed_annotation} . "\n";
	    print "New Release: $id\t". $releaseN->{$id}->{status} . "\t" . $releaseN->{$id}->{release_annotation} . "\t" . $releaseN->{$id}->{seed_annotation} . "\n";
	    #print "\t$id\n";
	}
    }
    
    print "NO PREVIOUS ANNOTATION, BUT NEW ANNOTATION WRONG: " . scalar(@no_prev_anno_and_new_annotations_wrong) . "\n";
    if ($print_detail){
	foreach my $id (@no_prev_anno_and_new_annotations_wrong){
	    print "Old Release: $id\t". $releaseO->{$id}->{status} . "\t" . $releaseO->{$id}->{release_annotation} . "\t" . $releaseO->{$id}->{seed_annotation} . "\n";
	    print "New Release: $id\t". $releaseN->{$id}->{status} . "\t" . $releaseN->{$id}->{release_annotation} . "\t" . $releaseN->{$id}->{seed_annotation} . "\n";
	    #print "\t$id\n";
	}
    }
    
    if ($changed_assignment > 0){
	my $rounded = sprintf("%.3f", $changed_assignment_match_count/$changed_assignment);
	print "CHANGED ASSIGNMENTS: " . $rounded . " ( matches: $changed_assignment_match_count \/ changed: $changed_assignment)\n";
    }
    else{
	print "CHANGED ASSIGNMENTS: 0\n";
    }

    print "\n\n";
}

sub read_file{
    my ($file) = @_;

    my $release = {};
    open (FH, $file);
    while (<FH>){
	chomp($_);
	if ($_ =~ /\t/){
	    my ($status, $id, $release_annotation, $seed_annotation) = split (/\t/, $_);
	    if ($release_annotation ne "NOT ANNOTATED"){
		$release->{assigned}++;
	    }
	    else{
		$release->{not_assigned}++;
	    }
	    $release->{$id}->{status} = $status;
	    $release->{$id}->{release_annotation} = $release_annotation;
	    $release->{$id}->{seed_annotation} = $seed_annotation;
	}
    }
    close FH;
    return $release;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3