[Bio] / FigKernelScripts / diff_tbls.pl Repository:
ViewVC logotype

View of /FigKernelScripts/diff_tbls.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu Jan 12 23:42:25 2006 UTC (14 years, 1 month ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07
Computes difference betwween two tbl files. -- /gdp

# -*- perl -*-

use FIG;
$fig = new FIG;
$code = &FIG::standard_genetic_code();

(($old_tbl_file = shift) && (-f $old_tbl_file)) || die "Could not find old tbl $old_tbl_file";
(($new_tbl_file = shift) && (-f $new_tbl_file)) || die "Could not find new tbl $new_tbl_file";

$old_tbl = &load_tbl($old_tbl_file);
$new_tbl = &load_tbl($new_tbl_file);

use constant FID    =>  0;
use constant LOCUS  =>  1;
use constant CONTIG =>  2;
use constant START  =>  3;
use constant STOP   =>  4;
use constant LEN    =>  5;
use constant STRAND =>  6;
use constant CLASS  =>  7;
use constant TYPE   =>  8;
use constant TAXID  =>  9;

$peg_offset = 0;
$rna_offset = 0;
foreach $key (sort by_locus keys %$new_tbl)
{
    $trans = "";
    $type  = $new_tbl->{$key}->[TYPE];
    $class = $new_tbl->{$key}->[CLASS];
    $locus = $new_tbl->{$key}->[LOCUS];
    
    $action = "";
    if ($old_tbl->{$key})
    {
	die "!!! Tax_ID mismatch" if ($old_tbl->{$key}->[TAXID] ne $old_tbl->{$key}->[TAXID]);
	
	if ($class eq 'changed')
	{
	    $old_fid =  $new_tbl->{$key}->[FID];
	    $old_fid =~ s/changed/fig/;
	    $changed{$old_fid} = 1;
	    
	    $action = 'change';
	}
	elsif ($class eq 'new')
	{
	    die "Something is wrong:\n", Dumper($new_tbl->{$key});
	}
	else
	{
	    #...Do nothing --- the entry has not changed...
	    $action = 'nothing';
	}
    }
    else
    {
	if    (($class eq 'new') || ($class eq 'fig'))
	{
	    $action = 'add';
	}
	elsif ($class eq 'changed')
	{
	    $old_fid =  $new_tbl->{$key}->[FID];
	    $old_fid =~ s/changed/fig/;
	    $changed{$old_fid} = 1;
	    
	    $action = 'change';
	}
    }

    if ($action eq 'nothing')
    {
	next;
    }
    elsif (($action eq 'add') || ($action eq 'change'))
    {
	if    ($type eq 'peg')
	{
	    $dna   =  $fig->dna_seq($new_tbl->{$key}->[TAXID], $locus);
	    $trans =  $fig->translate( $dna, $code, 'start' );
	    $trans =~ s/\*$//;   #...chop off trailing STOP, if it exists...
	    unless (($trans =~ m/^M/) || $fig->possibly_truncated($new_tbl->{$key}->[TAXID], $locus))
	    {
		warn "Possible problem with $new_tbl->{$key}->[FID]: codon=", substr($dna, 0, 3), ", AA=", substr($trans, 0, 1), "\n";
	    }
	    
	    if ($trans =~ m/\*/)
	    {
		die "Locus $locus contains STOP codons (frameshift?)\n$trans\n";
	    }
	    
	    $token = "$type." . $peg_offset++;
	}
	elsif ($type eq 'rna')
	{
	    $token = "$type." . $rna_offset++;
	}
	else
	{
	    die "Unknown type:\n", Dumper($new_tbl->{$key});
	}
    }
    
    if ($action eq 'add')
    {
	print "ADD\t$token\t$new_tbl->{$key}->[LOCUS]\t$trans\n";
    }
    elsif ($action eq 'change')
    {
	print "CHANGE\t$old_fid\t$token\t$new_tbl->{$key}->[LOCUS]\t$trans\n";
    }
    else
    {
	die "Unknown action $action";
    }
}

foreach $key (sort by_locus keys %$old_tbl)
{
    if (not $new_tbl->{$key})
    {
	if (not $changed{$old_tbl->{$key}->[FID]})
	{
	    print "DELETE\t$old_tbl->{$key}->[FID]\n";
	}
	else
	{
#	    warn "...Skipping changed $old_tbl->{$key}->[FID]\n";
	}
    }
}



sub load_tbl
{
    my ($file) = @_;
    my ($entry, $id, $locus, $contig, $beg, $end, $len, $strand, $class, $taxid, $type);
    
    open(TBL, "<$file") || die "Could not open $file";
    
    my $tbl = {};
    while (defined($entry = <TBL>))
    {
	chomp $entry;
	
	($id, $locus) = split /\t/, $entry;
	$id =~ m/^([^\|]+)\|(\d+\.\d+)\.([^\.]+)/;
	($class, $taxid, $type) = ($1, $2, $3);
	
	if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus)) 
	   && defined($contig) && $contig
	   && defined($beg)    && $beg
	   && defined($end)    && $end
	   && defined($len)    && $len
           && defined($strand) && $strand
           )
	{
	    $tbl->{"$contig\t$strand$end"} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $class, $type, $taxid ];
	}
	else
	{
	    warn "INVALID ENTRY:\t$entry\n";
	}
    }
    
    return $tbl;
}

sub from_locus
{
    my ($locus) = @_;
    my ($contig, $beg, $end);
    
    if (  ($locus =~ m/^([^,]+)_(\d+)_\d+/)  && ($contig = $1) && ($beg = $2)
       && ($locus =~ m/[^,]+_\d+_(\d+)$/) && ($end = $1)
       )
    {
	return ($contig, $beg, $end, (1+abs($end-$beg)), (($beg < $end) ? '+' : '-'));
    }
    else
    {
	die "Invalid locus $locus";
    }
    
    return ();
}

sub by_locus
{
    my ($x, $a, $b) = @_;
    
    my (undef, undef, $A_contig, $A_beg, $A_end, $A_len, $A_strand) = $x->[$a];
    my (undef, undef, $B_contig, $B_beg, $B_end, $B_len, $B_strand) = $x->[$b];

    return (  ($A_contig cmp $B_contig) 
	   || (&FIG::min($A_beg, $A_end) <=> &FIG::min($B_beg, $B_end))
	   || ($B_len <=> $A_len)
	   || ($A_strand cmp $B_strand)
	   );
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3