[Bio] / FigKernelScripts / compare_tbls.pl Repository:
ViewVC logotype

View of /FigKernelScripts/compare_tbls.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Thu Nov 9 09:59:15 2006 UTC (13 years ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.4: +1 -2 lines
Minor change to output. -- /gdp

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use FIG;
$fig = new FIG;
$code = &FIG::standard_genetic_code();

$0 =~ m/([^\/]+)$/;
$self  = $1;
$usage = "$self  old_tbl_file new_tbl_file";

(($old_tbl_file = shift) && (-f $old_tbl_file)) || die "Could not find old_tbl_file $old_tbl_file\n\n\tusage: $usage\n\n";
(($new_tbl_file = shift) && (-f $new_tbl_file)) || die "Could not find new_tbl_file $new_tbl_file\n\n\tusage: $usage\n\n";

($old_tbl, $old_pegs) = &load_tbl($old_tbl_file);
($new_tbl, $new_pegs) = &load_tbl($new_tbl_file);

use constant FID    =>  0;
use constant LOCUS  =>  1;
use constant CONTIG =>  2;
use constant START  =>  3;
use constant STOP   =>  4;
use constant LEN    =>  5;
use constant STRAND =>  6;
use constant TYPE   =>  7;
use constant TAXID  =>  8;
use constant ENTRY  =>  9;

$identical = 0;
$same_stop = 0;
$differ    = 0;
$short     = 0;
$long      = 0;
$added     = 0;
$lost      = 0;
foreach $key (keys %$new_tbl)
{
    $type  = $new_tbl->{$key}->[TYPE];
    $locus = $new_tbl->{$key}->[LOCUS];
    
    if (defined($old_tbl->{$key}))
    {
	die "!!! Tax_ID mismatch" if ($old_tbl->{$key}->[TAXID] ne $old_tbl->{$key}->[TAXID]);
	
	++$same_stop;
	if    ($old_tbl->{$key}->[LEN] == $new_tbl->{$key}->[LEN])
	{
	    ++$identical;
	}
	elsif ($old_tbl->{$key}->[LEN] <  $new_tbl->{$key}->[LEN])
	{
	    ++$short;
	    ++$differ;
	    warn "short:\t$key\n$old_tbl->{$key}->[LOCUS]\n$new_tbl->{$key}->[LOCUS]\n" if $ENV{VERBOSE};
	}
	elsif ($old_tbl->{$key}->[LEN] >  $new_tbl->{$key}->[LEN])
	{
	    ++$long;
	    ++$differ;
	    warn "long:\t$key\n$old_tbl->{$key}->[LOCUS]\n$new_tbl->{$key}->[LOCUS]\n" if $ENV{VERBOSE};
	}
	else
	{
	    die "Could not handle $key";
	}
    }
    else
    {
	++$added;
	++$differ;
	warn "added:\t$key\n$new_tbl->{$key}->[LOCUS]\n" if $ENV{VERBOSE};
    }
}

foreach $key (keys %$old_tbl)
{
    if (not defined($new_tbl->{$key}))
    {
	++$lost;
	++$differ;
	warn "lost:\t$key\n$old_tbl->{$key}->[LOCUS]\n" if $ENV{VERBOSE};
    }
}

print "old_num   = $old_pegs PEGs\n";
print "new_num   = $new_pegs PEGs\n\n";

printf "same_stop = %4u  %5.2f  %5.2f\n"
    , $same_stop, 100*$same_stop/$old_pegs, 100*$same_stop/$new_pegs;

printf "added     = %4u  %5.2f  %5.2f\n"
    , $added, 100*$added/$old_pegs, 100*$added/$new_pegs;

printf "lost      = %4u  %5.2f  %5.2f\n\n"
    , $lost, 100*$lost/$old_pegs, 100*$lost/$new_pegs;


printf "identical = %4u  %5.2f  %5.2f  %5.2f\n"
    , $identical, 100*$identical/$old_pegs, 100*$identical/$new_pegs, 100*$identical/$same_stop;;

printf "differ    = %4u  %5.2f  %5.2f  %5.2f\n"
    , $differ, 100*$differ/$old_pegs, 100*$differ/$new_pegs, 100*$differ/$same_stop;

printf "short     = %4u  %5.2f  %5.2f  %5.2f\n"
    , $short, 100*$short/$old_pegs, 100*$short/$new_pegs, 100*$short/$same_stop;

printf "long      = %4u  %5.2f  %5.2f  %5.2f\n\n"
    , $long, 100*$long/$old_pegs, 100*$long/$new_pegs, 100*$long/$same_stop;


sub load_tbl
{
    my ($file) = @_;
    my ($tbl, $num_pegs);
    my ($entry, $id, $locus, $contig, $beg, $end, $len, $strand, $class, $taxid, $type);
    
    open(TBL, "<$file") || die "Could not open $file";
    
    while (defined($entry = <TBL>))
    {
	chomp $entry;
	
	($id, $locus) = split /\t/, $entry;
	$id =~ m/^[^\|]+\|(\d+)\.\d+\.([^\.]+)/;
	($taxid, $type) = ($1, $2);
	
	if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus)) 
	   && defined($contig) && $contig
	   && defined($beg)    && $beg
	   && defined($end)    && $end
	   && defined($len)    && $len
           && defined($strand) && $strand
           )
	{
	    $key = "$contig\t$strand$end";
	    
	    if    ($type eq 'peg')  { ++$num_pegs; }
	    else  { warn "Unknown feature type: $entry\n"; }
	    
	    if (not defined($tbl->{$key}))
	    {
		$tbl->{$key} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $taxid, $entry ];
	    }
	    else
	    {
		warn "same-STOP:\t$entry\n";
	    }
	}
	else
	{
	    warn "INVALID ENTRY:\t$entry\n";
	}
    }
    
    return ($tbl, $num_pegs);
}

sub from_locus
{
    my ($locus) = @_;
    
    if ($locus =~ m/^(\S+)_(\d+)_(\d+)$/)
    {
	return ($1, $2, $3, (1+abs($3-$2)), (($2 < $3) ? '+' : '-'));
    }
    else
    {
	die "Invalid locus $locus";
    }
    
    return ();
}

sub by_locus
{
    my ($x, $a, $b) = @_;
    
    my (undef, undef, $A_contig, $A_beg, $A_end, $A_len, $A_strand) = $x->[$a];
    my (undef, undef, $B_contig, $B_beg, $B_end, $B_len, $B_strand) = $x->[$b];

    return (  ($A_contig cmp $B_contig) 
	   || (&FIG::min($A_beg, $A_end) <=> &FIG::min($B_beg, $B_end))
	   || ($B_len <=> $A_len)
	   || ($A_strand cmp $B_strand)
	   );
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3