[Bio] / FigKernelScripts / diff_tbls.pl Repository:
ViewVC logotype

View of /FigKernelScripts/diff_tbls.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Thu Dec 24 15:54:16 2015 UTC (3 years, 10 months ago) by gdpusch
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +1 -0 lines
Add test to flip to "metagenome" mode for genomes smaller than 25 kbp, so that PRODIGAL will not bail out.FigKernelPackages/Prodigal.pm

# -*- perl -*-

use FIG;
$fig = new FIG;
$code = &FIG::standard_genetic_code();

(($old_tbl_file = shift) && (-f $old_tbl_file)) || die "Could not find old tbl $old_tbl_file";
(($new_tbl_file = shift) && (-f $new_tbl_file)) || die "Could not find new tbl $new_tbl_file";

$old_tbl = &load_tbl($old_tbl_file);
$new_tbl = &load_tbl($new_tbl_file);

use constant FID    =>  0;
use constant LOCUS  =>  1;
use constant CONTIG =>  2;
use constant START  =>  3;
use constant STOP   =>  4;
use constant LEN    =>  5;
use constant STRAND =>  6;
use constant CLASS  =>  7;
use constant TYPE   =>  8;
use constant TAXID  =>  9;

$peg_offset = 0;
$rna_offset = 0;
foreach $key (sort by_locus keys %$new_tbl)
{
    $trans = "";
    $type  = $new_tbl->{$key}->[TYPE];
    $class = $new_tbl->{$key}->[CLASS];
    $locus = $new_tbl->{$key}->[LOCUS];
    
    $action = "";
    if ($old_tbl->{$key})
    {
	die "!!! Tax_ID mismatch" if ($old_tbl->{$key}->[TAXID] ne $old_tbl->{$key}->[TAXID]);
	
	if ($class eq 'changed')
	{
	    $old_fid =  $new_tbl->{$key}->[FID];
	    $old_fid =~ s/changed/fig/;
	    $changed{$old_fid} = 1;
	    
	    $action = 'change';
	}
	elsif ($class eq 'new')
	{
	    die "Something is wrong:\n", Dumper($new_tbl->{$key});
	}
	else
	{
	    #...Do nothing --- the entry has not changed...
	    $action = 'nothing';
	}
    }
    else
    {
	if    (($class eq 'new') || ($class eq 'fig'))
	{
	    $action = 'add';
	}
	elsif ($class eq 'changed')
	{
	    $old_fid =  $new_tbl->{$key}->[FID];
	    $old_fid =~ s/changed/fig/;
	    $changed{$old_fid} = 1;
	    
	    $action = 'change';
	}
    }

    if ($action eq 'nothing')
    {
	next;
    }
    elsif (($action eq 'add') || ($action eq 'change'))
    {
	if    ($type eq 'peg')
	{
	    $dna   =  $fig->dna_seq($new_tbl->{$key}->[TAXID], $locus);
	    $trans =  $fig->translate( $dna, $code, 'start' );
	    $trans =~ s/\*$//;   #...chop off trailing STOP, if it exists...
	    unless (($trans =~ m/^M/) || $fig->possibly_truncated($new_tbl->{$key}->[TAXID], $locus))
	    {
		warn "Possible problem with $new_tbl->{$key}->[FID]: codon=", substr($dna, 0, 3), ", AA=", substr($trans, 0, 1), "\n";
	    }
	    
	    if ($trans =~ m/\*/)
	    {
		die "Locus $locus contains STOP codons (frameshift?)\n$trans\n";
	    }
	    
	    $token = "$type." . $peg_offset++;
	}
	elsif ($type eq 'rna')
	{
	    $token = "$type." . $rna_offset++;
	}
	else
	{
	    die "Unknown type:\n", Dumper($new_tbl->{$key});
	}
    }
    
    if ($action eq 'add')
    {
	print "ADD\t$token\t$new_tbl->{$key}->[LOCUS]\t$trans\n";
    }
    elsif ($action eq 'change')
    {
	print "CHANGE\t$old_fid\t$token\t$new_tbl->{$key}->[LOCUS]\t$trans\n";
    }
    else
    {
	die "Unknown action $action";
    }
}

foreach $key (sort by_locus keys %$old_tbl)
{
    if (not $new_tbl->{$key})
    {
	if (not $changed{$old_tbl->{$key}->[FID]})
	{
	    print "DELETE\t$old_tbl->{$key}->[FID]\n";
	}
	else
	{
#	    warn "...Skipping changed $old_tbl->{$key}->[FID]\n";
	}
    }
}
exit(0);



sub load_tbl
{
    my ($file) = @_;
    my ($entry, $id, $locus, $contig, $beg, $end, $len, $strand, $class, $taxid, $type);
    
    open(TBL, "<$file") || die "Could not open $file";
    
    my $tbl = {};
    while (defined($entry = <TBL>))
    {
	chomp $entry;
	
	($id, $locus) = split /\t/, $entry;
	$id =~ m/^([^\|]+)\|(\d+\.\d+)\.([^\.]+)/;
	($class, $taxid, $type) = ($1, $2, $3);
	
	if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus)) 
	   && defined($contig) && $contig
	   && defined($beg)    && $beg
	   && defined($end)    && $end
	   && defined($len)    && $len
           && defined($strand) && $strand
           )
	{
	    $tbl->{"$contig\t$strand$end"} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $class, $type, $taxid ];
	}
	else
	{
	    warn "INVALID ENTRY:\t$entry\n";
	}
    }
    
    return $tbl;
}

sub from_locus
{
    my ($locus) = @_;
    my ($contig, $beg, $end);
    
    if (  ($locus =~ m/^([^,]+)_(\d+)_\d+/)  && ($contig = $1) && ($beg = $2)
       && ($locus =~ m/[^,]+_\d+_(\d+)$/) && ($end = $1)
       )
    {
	return ($contig, $beg, $end, (1+abs($end-$beg)), (($beg < $end) ? '+' : '-'));
    }
    else
    {
	die "Invalid locus $locus";
    }
    
    return ();
}

sub by_locus
{
    my ($x, $a, $b) = @_;
    
    my (undef, undef, $A_contig, $A_beg, $A_end, $A_len, $A_strand) = $x->[$a];
    my (undef, undef, $B_contig, $B_beg, $B_end, $B_len, $B_strand) = $x->[$b];

    return (  ($A_contig cmp $B_contig) 
	   || (&FIG::min($A_beg, $A_end) <=> &FIG::min($B_beg, $B_end))
	   || ($B_len <=> $A_len)
	   || ($A_strand cmp $B_strand)
	   );
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3