[Bio] / FigKernelScripts / filter_tbl_overlaps.pl Repository:
ViewVC logotype

View of /FigKernelScripts/filter_tbl_overlaps.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (download) (as text) (annotate)
Mon May 29 18:07:44 2006 UTC (13 years, 5 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.9: +14 -4 lines
set up find_neighbors to handle existing genomes

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use FIG;
$fig = new FIG;

use Carp;
use Data::Dumper;

$| = 1;
$0 =~ m/([^\/]+)$/;
$usage = "$1 [-invert] [-weight_file=file -min_weight=integer] closest_org contigs tbl_rna tbl_peg1 tbl_peg2 ... > merged.tbl";

=pod

=head1         filter_tbl_overlaps

  Usage:       filter_tbl_overlaps [-invert] [-weight_file=file -min_weight=integer] closest_org contigs tbl_rna tbl_peg1 tbl_peg2 ... > merged.tbl

  Function: Resolves overlaps and conflicts between PEGs in one or more 
    tbl files, based on similarity evidence. Similarities must be with
    respect to organisms at least a moderate distance from 'closest_org'. 
    The order in which tbl files are entered is significant: Later files
    are considered to have lower priority in the resolution process. 
    Furthermore, the first tbl file is assumed to be of RNAs, not PEGs,
    and no PEG is allowed to overlap any of its entries; hence if no RNA tbl 
    is available, this argument should be given as '/dev/null'.

  Example:     filter_tbl_overlaps CloseOrgID contigs tbl_rna tbl_peg.1 tbl_peg.2 > tbl_peg

  Author:      Gordon D. Pusch (2004 Jan 28)

=cut

$invert      = "";
$weight_file = "";
$min_weight  = 999999999;
$trouble     = 0;
for ($i=0; $i < @ARGV; )
{
    if ($ARGV[$i] =~ m/-invert/)
    {
	$invert = splice(@ARGV, $i, 1);
    }
    elsif ($ARGV[$i] =~ m/-weight_file=(\S+)/)
    {
	$weight_file = $1;
	splice(@ARGV, $i, 1);
	if (! -e $weight_file) 
	{
	    $trouble = 1;
	    print STDERR "ERROR: weight-file $weight_file does not exist\n";
	}
    }
    elsif ($ARGV[$i] =~ m/-min_weight=(\d+)/)
    {
	$min_weight = $1;
	splice(@ARGV, $i, 1);
    }
    elsif ((($i == 0) && ($ARGV[0] =~ m/^\d+\.\d+$/) && (-d "$FIG_Config::organisms/$ARGV[0]"))
	   || (($i > 0) && (-e $ARGV[$i])))
    {
	++$i;
    }
    else
    {
	$trouble = 1;
	print STDERR "ERROR: Invalid arg $ARGV[$i]\n";
	++$i;
    }
}
die "\n\nAborting due to invalid args\n\nusage: $usage\n\n" if $trouble;

(@ARGV >= 4) || die "\n\tusage: $usage\n\n";

(($closest_org = shift @ARGV) && (-d "$FIG_Config::organisms/$closest_org"))
    || die "Closest-org directory $FIG_Config::organisms/$closest_org does not exist";

(-s ($contigs_file  = shift @ARGV))  
    || die "Contigs file $contigs_file does not exist";
((@tbl_files = @ARGV) > 1) || die "Not enough tbl files";

use constant RNA    =>  0;

use constant ENTRY  =>  0;
use constant FILE   =>  1;
use constant FID    =>  2;
use constant LOCUS  =>  3;
use constant CONTIG =>  4;
use constant START  =>  5;
use constant STOP   =>  6;
use constant LEN    =>  7;
use constant STRAND =>  8;
use constant SCORE  =>  9;
use constant WEIGHT => 10;
use constant TRUNC  => 11;
use constant SUBSYS => 12;

$min_len            =  90;
$min_ctg            = 500;
$max_convergent     =  50;
$max_divergent      = 150;
$max_normal_overlap = 150;
$max_rna_overlap    =  20;

$fuzz_factor        =   3;

($seq_of, $len_of) = &load_contigs($contigs_file);
$tbl = &load_tbls($weight_file, @tbl_files);

foreach $contig (sort by_len_and_id keys %$tbl)
{
    $x = $tbl->{$contig};
    
    print STDERR ("Contig $contig contains ", (scalar @$x), " loci\n") if $ENV{VERBOSE};
    print STDERR Dumper($x) if ($ENV{VERBOSE} && ($ENV{VERBOSE} > 2));
    
    for ($i=0; $i < @$x; ++$i)
    {
	for($j = $i+1; (($j < @$x) && &entries_overlap($x, $i, $j)); ++$j)
	{
	    if    (&bad_rna_overlap($x, $i, $j))  { &process_rna($x, $i, $j); }
	    elsif (&bad_rna_overlap($x, $j, $i))  { &process_rna($x, $j, $i); }
	}
    }
    
    for ($i=0; $i < @$x; ++$i)
    {
	next if &is_rna($x, $i);
	if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"})
	{
	    print STDERR "Skipping deleted entry $i: $x->[$i]->[STRAND]$x->[$i]->[FID]\n" if $ENV{VERBOSE};
	    next;
	}
	else
	{
	    print STDERR "Checking for CTG-START or too-short of entry $i: $x->[$i]->[STRAND]$x->[$i]->[FID]\n"
		if $ENV{VERBOSE};
	}
	
	$start_codon = substr(&get_dna($x->[$i]), 0, 3);
	print STDERR "CTG START, i=$i: $x->[$i]->[STRAND]$x->[$i]->[FID] ($x->[$i]->[LEN])\n"
	    if ($ENV{VERBOSE} && ($start_codon eq 'CTG'));

	if ((not $x->[$i]->[SUBSYS]) && (not $x->[$i]->[TRUNC]))
	{
	    if (($start_codon eq 'CTG') && ($x->[$i]->[LEN] < $min_ctg))
	    {
		$score = &compute_score($x, $i);
		print STDERR "score_i = $score\n" if $ENV{VERBOSE};
		
		if ($score < (100 * $min_ctg))
		{
		    if (not $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"})
		    {
			print STDERR "Deleting CTG-START PEG $i with no support: $x->[$i]->[ENTRY]\n\n"
			    if $ENV{VERBOSE};
			$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} 
			   = "Short:\t$x->[$i]->[WEIGHT]\t$score\t" . $x->[$i]->[ETRY];
		    }
		}
	    }
	    elsif ($x->[$i]->[LEN] < $min_len)
	    {
		print STDERR "Short PEG $i ($x->[$i]->[LEN])\n" if $ENV{VERBOSE};
		
		$score = &compute_score($x, $i);
		print STDERR "score_i = $score\n" if $ENV{VERBOSE};
		
		if ($score < 100*$min_len)
		{
		    if (not $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"})
		    {
			print STDERR "Deleting short PEG $i with no support: $x->[$i]->[ENTRY]\n\n" 
			    if $ENV{VERBOSE};
			$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} 
			   = "Short:\t$x->[$i]->[WEIGHT]\t$score\t" . $x->[$i]->[ENTRY];
		    }
		}
	    }
	    else
	    {
#		print STDERR "\n" if $ENV{VERBOSE};
	    }
	}
    }
    
    for ($i=0; $i < @$x; ++$i)
    {
	if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"})
	{
	    print STDERR "Skipping deleted entry $i: $x->[$i]->[STRAND]$x->[$i]->[FID]\n" if $ENV{VERBOSE};
	    next;
	}
	else
	{
	    print STDERR "\nChecking for embedded or embedding of entry $i: $x->[$i]->[STRAND]$x->[$i]->[FID]\n"
		if $ENV{VERBOSE};
	}
	
	for($j = $i+1; (($j < @$x) && &entries_overlap($x, $i, $j)); ++$j)
	{
	    print STDERR "...Checking for embedding against $j: $x->[$j]->[FID]\n"
		if $ENV{VERBOSE};
	    
	    if    (&embedded($x, $i, $j))  { &process_embedded($x, $i, $j); }
 	    elsif (&embedded($x, $j, $i))  { &process_embedded($x, $j, $i); }
	}
    }
    
    for ($i=0; $i < @$x; ++$i)
    {
	if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"})
	{
	    print STDERR "Skipping deleted entry $i: $x->[$i]->[FID]\n" if $ENV{VERBOSE};
	    next;
	}
	else
	{
	    print STDERR "\nChecking for overlaps of $i: $x->[$i]->[FID]\n" if $ENV{VERBOSE};
	}
	
	for($j = $i+1; (($j < @$x) && &entries_overlap($x, $i, $j)); ++$j)
	{
	    if (&opposite_strand($x, $i, $j))
	    {
		if    (&convergent($x, $i, $j) > $max_convergent)  { &process_convergent($x, $i, $j); }
		elsif (&divergent( $x, $i, $j) > $max_divergent)   { &process_divergent( $x, $i, $j); }
	    }
	    elsif (&same_strand($x, $i, $j))
	    {
		if    (&same_stop($x, $i, $j))
		{ 
		    &process_same_stop($x, $i, $j);
		}
		elsif (&normal_overlap($x, $i, $j) > $max_normal_overlap)  
		{
		    &process_normal_overlap($x, $i, $j); 
		}
	    }
	    else 
	    { 
		&impossible_overlap_warning($x, $i, $j, "end of main loop");
	    }
	}
    }
}

foreach $contig (sort keys %$tbl)
{
    $x = $tbl->{$contig};
    
    for ($i=0; $i < @$x; ++$i)
    {
	$key = "$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]";
	
	if (&is_rna($x, $i))
	{
	    print STDERR "Skipping RNA entry:\t$x->[$i]->[ENTRY]\n" if $ENV{VERBOSE};
	}
	else
	{
	    if ($invert)
	    {
		if ($deleted{$key})
		{
		    print STDERR "$deleted{$key}\n";
		    print "$x->[$i]->[ENTRY]\n";
		}
	    }
	    else
	    {
		if ($deleted{$key})
		{
		    print STDERR "Skipping DEL entry:\t$deleted{$key}\n" if $ENV{VERBOSE};
		}
		else
		{
		    print STDERR "Accepted PEG entry:\t$x->[$i]->[ENTRY]\n" if $ENV{VERBOSE};
		    print "$x->[$i]->[ENTRY]\n";
		}
	    }
	}
    }
}
exit (0);


sub by_len_and_id
{
    return (($len_of->{$a} <=> $len_of->{$b}) || ($a cmp $b));
}


sub entries_overlap
{
    my ($x, $i, $j) = @_;
    
    return 0 if ($x->[$i]->[CONTIG] ne $x->[$j]->[CONTIG]);
    
    $ov = &overlaps( $x->[$i]->[START], $x->[$i]->[STOP]
		   , $x->[$j]->[START], $x->[$j]->[STOP]);
    
    return $ov;
}

sub overlaps
{
    my ($beg1, $end1, $beg2, $end2) = @_;
    
    my ($left1, $right1, $left2, $right2);
    
    $left1  = &FIG::min($beg1, $end1);
    $left2  = &FIG::min($beg2, $end2);
    
    $right1 = &FIG::max($beg1, $end1);
    $right2 = &FIG::max($beg2, $end2);
    
    if ($left1 > $left2)
    {
        ($left1, $left2)   = ($left2, $left1);
        ($right1, $right2) = ($right2, $right1);
    }
    
    $ov = 0;
    if ($right1 >= $left2) { $ov = &FIG::min($right1,$right2) - $left2 + 1; }
    
    return $ov;
}

sub type_of
{
    my ($id) = @_;
    
    return 'rna' if ($id =~ m/^fig\|\d+\.\d+\.rna\.\d+$/);
    return 'peg' if ($id =~ m/^fig\|\d+\.\d+\.peg\.\d+$/);
    return undef;
}

sub is_rna
{
    my ($x, $i) = @_;
    
    return ($x->[$i]->[FILE] == RNA);
}

sub same_strand
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    return ($x->[$i]->[STRAND] eq $x->[$j]->[STRAND]);
}

sub opposite_strand
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    return ($x->[$i]->[STRAND] ne $x->[$j]->[STRAND]);
}

sub bad_rna_overlap
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    #...For now, treat all RNA overlaps as "non-removable"
    if (&is_rna($x, $i))
    {
	print STDERR "...overlaps by "
	    . &overlaps($x->[$i]->[START], $x->[$i]->[STOP],  $x->[$j]->[START], $x->[$j]->[STOP])
	    . " bp\n" if $ENV{VERBOSE};
	return (&overlaps($x->[$i]->[START], $x->[$i]->[STOP],  $x->[$j]->[START], $x->[$j]->[STOP]) > $max_rna_overlap);
    }
    else
    {
	return 0;
    }
}

sub embedded
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    my $left_i  = &FIG::min($x->[$i]->[START], $x->[$i]->[STOP]);
    my $right_i = &FIG::max($x->[$i]->[START], $x->[$i]->[STOP]);
    
    my $beg_j   = $x->[$j]->[START];
    my $end_j   = $x->[$j]->[STOP];
    
    if (  ($left_i < $beg_j)   && ($left_i < $end_j) 
       && ($beg_j  < $right_i) && ($end_j  < $right_i)
       )
    {
	print STDERR "$j is embedded in $i\n" if $ENV{VERBOSE};
	return &overlaps($left_i, $right_i, $beg_j, $end_j);
    }
    
    return 0;
}

sub convergent
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    my $beg_i   = $x->[$i]->[START];
    my $end_i   = $x->[$i]->[STOP];
    
    my $beg_j   = $x->[$j]->[START];
    my $end_j   = $x->[$j]->[STOP];
    
    if (   &FIG::between($beg_i, $end_j, $end_i) 
       &&  &FIG::between($beg_j, $end_i, $end_j)
       && !&FIG::between($beg_j, $beg_i, $end_j)
       && !&FIG::between($beg_i, $beg_j, $end_i)
       )
    {
	return &overlaps($beg_i, $end_i, $beg_j, $end_j);
    }
    
    return 0;
}

sub divergent
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    my $beg_i   = $x->[$i]->[START];
    my $end_i   = $x->[$i]->[STOP];
    
    my $beg_j   = $x->[$j]->[START];
    my $end_j   = $x->[$j]->[STOP];
    
    if (   &FIG::between($beg_i, $beg_j, $end_i) 
       &&  &FIG::between($beg_j, $beg_i, $end_j)
       && !&FIG::between($beg_j, $end_i, $end_j)
       && !&FIG::between($beg_i, $end_j, $end_i)
       )
    {
	return &overlaps($beg_i, $end_i, $beg_j, $end_j);
    }
    
    return 0;
}

sub normal_overlap
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    my $beg_i   = $x->[$i]->[START];
    my $end_i   = $x->[$i]->[STOP];
    
    my $beg_j   = $x->[$j]->[START];
    my $end_j   = $x->[$j]->[STOP];
    
    if    (  ($x->[$i]->[STRAND] eq '+') && ($x->[$j]->[STRAND] eq '+')
          && ($beg_i < $beg_j) && ($beg_j <= $end_i) && ($end_i < $end_j)
          )
    {
	return &overlaps($beg_i, $end_i,  $beg_j, $end_j);
    }
    elsif (  ($x->[$i]->[STRAND] eq '-') && ($x->[$j]->[STRAND] eq '-')
	  && ($end_i < $end_j) && ($end_j <= $beg_i) && ($beg_i < $beg_j) 
	  )
    {
	return &overlaps($beg_i, $end_i,  $beg_j, $end_j);
    }
    else
    {
	&impossible_overlap_warning($x, $i, $j, "Opposite strands in a normal_overlap");
	return 0;
    }
}


sub same_stop
{
    my ($x, $i, $j) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    my $end_i   = $x->[$i]->[STOP];
    my $end_j   = $x->[$j]->[STOP];
    
    if (&same_strand($x, $i, $j) && ($end_i == $end_j))
    {
	return 1;
    } else {
	 return 0;
    }
}


sub impossible_overlap_warning
{
    my ($x, $i, $j, $msg) = @_;
    
    return 0 if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return 0 if $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"};
    
    if (defined($msg))
    {
	print STDERR ("Impossible overlap in $msg:\n"
		      , join("\t", @ { $x->[$i] }), "\n"
		      , join("\t", @ { $x->[$j] }), "\n\n")
	    if $ENV{VERBOSE};
	# confess "$msg";
    }
    else
    {
	print STDERR ("Impossible overlap:\n$i: "
		      , join("\t", @ { $x->[$i] }), "\n$j: "
		      , join("\t", @ { $x->[$j] }), "\n\n")
	    if $ENV{VERBOSE};
	# confess "aborted";
    }
    
    return 0;
}



sub process_rna
{
    my ($x, $i, $j) = @_;
    
    return 0 if ($x->[$i]->[FILE] > $x->[$j]->[FILE]);
    return 0 if ($deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} || $x->[$j]->[SUBSYS]);
    
    if (&is_rna($x, $i) && &is_rna($x, $j))
    {
	&impossible_overlap_warning($x, $i, $j, "process_rna");
    }
    else
    {
	#...For now, just delete $j;
	if (not $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"})
	{
	    print STDERR ("Deleting PEG $j overlapping RNA $i:\n"
			  , "$i: $x->[$i]->[ENTRY]\n$j: $x->[$j]->[ENTRY]\n\n")
		if $ENV{VERBOSE};
	    $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "RNA:\t-999999999\t" . $x->[$j]->[ENTRY];
	}
    }
}


sub process_embedded
{
    my ($x, $i, $j) = @_;
    print STDERR "Processing embedding of PEG $j in PEG $i\n"
	if (defined($ENV{VERBOSE}) && ($ENV{VERBOSE} > 1));
    print STDERR "$i:\t$x->[$i]->[FID]\tsubsys=$x->[$i]->[SUBSYS]\ttrunc=$x->[$i]->[TRUNC]\n"
	if (defined($ENV{VERBOSE}) && ($ENV{VERBOSE} > 1));
    print STDERR "$j:\t$x->[$j]->[FID]\tsubsys=$x->[$j]->[SUBSYS]\ttrunc=$x->[$j]->[TRUNC]\n"
	if (defined($ENV{VERBOSE}) && ($ENV{VERBOSE} > 1));
    
    (print STDERR "Skipping embedding test\n" && return 0)
	if ($x->[$i]->[FILE] > $x->[$j]->[FILE]);
    return 0 if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} || $x->[$i]->[SUBSYS] || $x->[$i]->[TRUNC]);
    return 0 if ($deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} || $x->[$j]->[SUBSYS] || $x->[$j]->[TRUNC]);
    
    unless (defined($x->[$i]->[SCORE]))  { $x->[$i]->[SCORE] = &compute_score($x, $i); }
    unless (defined($x->[$j]->[SCORE]))  { $x->[$j]->[SCORE] = &compute_score($x, $j); }
    
    my $score_i  = $x->[$i]->[SCORE];
    my $score_j  = $x->[$j]->[SCORE];
    
    my $weight_i = $x->[$i]->[WEIGHT];
    my $weight_j = $x->[$j]->[WEIGHT];
    
    if (($score_j > $fuzz_factor * $score_i) && ($weight_i < $min_weight) && ($weight_i <= $weight_j))
    {
	print STDERR "Deleting embedding PEG $i in favor of embedded PEG $j, score_i = $score_i, score_j = $score_j\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "EMB_1:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
    }
    elsif (($score_i > $fuzz_factor * $score_j) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
    {
	print STDERR "Deleting embedded PEG $j in favor of embedding PEG $i, score_j = $score_j, score_i = $score_i\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "EMB_2:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
    }
    elsif (($weight_j < $min_weight) && ($weight_j <= $weight_i))
    {
	#...In case of same score, delete $j, because $i must be longer...
	
	print STDERR "Deleting embedded PEG $j in favor of embedding PEG $i, score_j = $score_j, score_i = $score_i\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "EMB_3:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
    }
    else
    {
	print STDERR "Not deleting PEG $j embedded in PEG $i, score_j = $score_j, score_i = $score_i\n"
	    if $ENV{VERBOSE};
    }
    
    return;
}


sub process_convergent
{
    my ($x, $i, $j) = @_;
    
    return 0 if ($x->[$i]->[FILE] > $x->[$j]->[FILE]);
    return 0 if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} || $x->[$i]->[SUBSYS] || $x->[$i]->[TRUNC]);
    return 0 if ($deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} || $x->[$j]->[SUBSYS] || $x->[$j]->[TRUNC]);
    
    unless (defined($x->[$i]->[SCORE]))  { $x->[$i]->[SCORE] = &compute_score($x, $i); }
    unless (defined($x->[$j]->[SCORE]))  { $x->[$j]->[SCORE] = &compute_score($x, $j); }
    
    my $score_i  = $x->[$i]->[SCORE];
    my $score_j  = $x->[$j]->[SCORE];
    
    my $weight_i = $x->[$i]->[WEIGHT];
    my $weight_j = $x->[$j]->[WEIGHT];
    
    if (($score_j > $fuzz_factor * $score_i) && ($weight_i < $min_weight) && ($weight_i <= $weight_j))
    {
	print STDERR "Deleting downstream convergent PEG $i in favor of $j, score_i = $score_i, score_j = $score_j\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "CNV_1:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
    }
    elsif (($score_i > $fuzz_factor * $score_j) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
    {
	print STDERR "Deleting upstream convergent PEG $j in favor of $i, score_j = $score_j, score_i = $score_i\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "CNV_2:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
    }
    else
    {
# 	#...default to "priority"...
# 	if (($x->[$i]->[FILE] < $x->[$j]->[FILE]) && ($weight_i < $min_weight) && ($weight_i <= $weight_j))
# 	{
# 	    print STDERR "Deleting downstream convergent PEG $i in favor of $j, score_i = $score_i, score_j = $score_j\n"
# 		if $ENV{VERBOSE};
# 	    print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
# 	    print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
#	    
# 	    $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "CNV_3:\t$weight_i\t$score_i\t" . $x->[$j]->[ENTRY];
# 	}
# 	elsif (($x->[$i]->[FILE] > $x->[$j]->[FILE]) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
# 	{
# 	    print STDERR "Deleting upstream convergent PEG $j in favor of $i, score_j = $score_j, score_i = $score_i\n"
# 		if $ENV{VERBOSE};
# 	    print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
# 	    print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
#	    
# 	    $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "CNV_4:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
# 	}
# 	else
# 	{
# 	    #...Warn and move on...
#	    
# 	    &impossible_overlap_warning($x, $i, $j, "process_convergent");
# 	}
    }
    
    return;
}


sub process_divergent
{
    my ($x, $i, $j) = @_;
    
    return 0 if ($x->[$i]->[FILE] > $x->[$j]->[FILE]);
    return 0 if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} || $x->[$i]->[SUBSYS] || $x->[$i]->[TRUNC]);
    return 0 if ($deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} || $x->[$j]->[SUBSYS] || $x->[$j]->[TRUNC]);
    
    unless (defined($x->[$i]->[SCORE]))  { $x->[$i]->[SCORE] = &compute_score($x, $i); }
    unless (defined($x->[$j]->[SCORE]))  { $x->[$j]->[SCORE] = &compute_score($x, $j); }
    
    my $score_i  = $x->[$i]->[SCORE];
    my $score_j  = $x->[$j]->[SCORE];
        
    my $weight_i = $x->[$i]->[WEIGHT];
    my $weight_j = $x->[$j]->[WEIGHT];
    
    if (($score_j > $fuzz_factor * $score_i) && ($weight_i < $min_weight) && ($weight_i <= $weight_j))
    {
	print STDERR "Deleting downstream divergent PEG $i in favor of $j, score_i = $score_i, score_j = $score_j\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "DIV_1:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
    }
    elsif (($score_i > $fuzz_factor * $score_j) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
    {
	print STDERR "Deleting upstream divergent PEG $j in favor of $i, score_j = $score_j, score_i = $score_i\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "DIV_2:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
    }
    else
    {
# 	#...default to "priority"...
# 	if (($x->[$i]->[FILE] < $x->[$j]->[FILE]) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
# 	{
# 	    print STDERR "Deleting downstream divergent PEG $i in favor of $j, score_i = $score_i, score_j = $score_j\n"
# 		if $ENV{VERBOSE};
# 	    print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
# 	    print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
#	    
# 	    $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "DIV_3:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
# 	}
# 	elsif (($x->[$i]->[FILE] > $x->[$j]->[FILE]) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
# 	{
# 	    print STDERR "Deleting upstream divergent PEG $j in favor of $i, score_j = $score_j, score_i = $score_i\n"
# 		if $ENV{VERBOSE};
# 	    print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
# 	    print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
#	    
# 	    $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "DIV_4:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
# 	}
# 	else
# 	{
# 	    #...Warn and move on...
#	    
# 	    &impossible_overlap_warning($x, $i, $j, "process_divergent");
# 	}
    }
    
    return;
}

sub process_same_stop
{
    my ($x, $i, $j) = @_;
    
    return 0 if ($x->[$i]->[FILE] > $x->[$j]->[FILE]);
    return 0 if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} || $x->[$i]->[SUBSYS] || $x->[$i]->[TRUNC]);
    return 0 if ($deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} || $x->[$j]->[SUBSYS] || $x->[$j]->[TRUNC]);
    
    my $len_i    = $x->[$i]->[LEN];
    my $len_j    = $x->[$j]->[LEN];
    
    my $score_i  = $x->[$i]->[SCORE];
    my $score_j  = $x->[$j]->[SCORE];
        
    my $weight_i = $x->[$i]->[WEIGHT];
    my $weight_j = $x->[$j]->[WEIGHT];
    
    if ($len_i < $len_j)
    {
	print STDERR "Deleting same-STOP PEG $i in favor of $j, len_i = $len_i, len_j = $len_j\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "STP_1:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
    }
    else
    {
	print STDERR "Deleting same-STOP PEG $j in favor of $i, len_i = $len_i, len_j = $len_j\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "STP_2:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
    }
    
    return;
}    

sub process_normal_overlap
{
    my ($x, $i, $j) = @_;
    
    return 0 if ($x->[$i]->[FILE] > $x->[$j]->[FILE]);
    return 0 if ($deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} || $x->[$i]->[SUBSYS] || $x->[$i]->[TRUNC]);
    return 0 if ($deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} || $x->[$j]->[SUBSYS] || $x->[$j]->[TRUNC]);
    
    unless (defined($x->[$i]->[SCORE]))  { $x->[$i]->[SCORE] = &compute_score($x, $i); }
    unless (defined($x->[$j]->[SCORE]))  { $x->[$j]->[SCORE] = &compute_score($x, $j); }
    
    my $score_i  = $x->[$i]->[SCORE];
    my $score_j  = $x->[$j]->[SCORE];
        
    my $weight_i = $x->[$i]->[WEIGHT];
    my $weight_j = $x->[$j]->[WEIGHT];
    
    if (($score_j > $fuzz_factor * $score_i) && ($weight_i < $min_weight) && ($weight_i <= $weight_j))
    {
	print STDERR "Deleting downstream same-strand PEG $i in favor of $j, score_i = $score_i, score_j = $score_j\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "NRM_1:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
    }
    elsif (($score_i > $fuzz_factor * $score_j) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
    {
	print STDERR "Deleting upstream same-strand PEG $j in favor of $i, score_j = $score_j, score_i = $score_i\n"
	    if $ENV{VERBOSE};
	print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
	print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
	
	$deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "NRM_2:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
    }
    else
    {
# 	#...default to "priority"...
# 	if (($x->[$i]->[FILE] < $x->[$j]->[FILE])  && ($weight_i < $min_weight) && ($weight_i <= $weight_j))
# 	{
# 	    print STDERR "Deleting downstream same-strand PEG $i in favor of $j, score_i = $score_i, score_j = $score_j\n"
# 		if $ENV{VERBOSE};
# 	    print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
# 	    print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
#	    
# 	    $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"} = "NRM_3:\t$weight_i\t$score_i\t" . $x->[$i]->[ENTRY];
# 	}
# 	elsif (($x->[$i]->[FILE] > $x->[$j]->[FILE]) && ($weight_j < $min_weight) && ($weight_j <= $weight_i))
# 	{
# 	    print STDERR "Deleting upstream same-strand PEG $j in favor of $i, score_j = $score_j, score_i = $score_i\n"
# 		if $ENV{VERBOSE};
# 	    print STDERR "$i: $x->[$i]->[ENTRY]\n"   if $ENV{VERBOSE};
# 	    print STDERR "$j: $x->[$j]->[ENTRY]\n\n" if $ENV{VERBOSE};
#	    
# 	    $deleted{"$x->[$j]->[FILE]\t$x->[$j]->[LOCUS]"} = "NRM_4:\t$weight_j\t$score_j\t" . $x->[$j]->[ENTRY];
# 	}
# 	else
# 	{
# 	    #...Warn and move on...
#	    
# 	    &impossible_overlap_warning($x, $i, $j, "process_normal_overlap");
# 	}
    }
    
    return;
}


sub compute_score
{
    my ($x, $i) = @_;
    
    my $max_psc   = 1.0e-10;
    my $min_dist  = 0.002;
    my $min_match = 0.7;
    
    my ($id2, $len2, $psc, $beg2, $end2);
    my ($dist, $width);
    
    return 0   if $deleted{"$x->[$i]->[FILE]\t$x->[$i]->[LOCUS]"};
    return $x->[$i]->[SCORE]  if (defined($x->[$i]->[SCORE]));
    
    my $id    = $x->[$i]->[FID];
    my $beg   = $x->[$i]->[START];
    my $end   = $x->[$i]->[STOP];
    my $locus = $x->[$i]->[LOCUS]; 

#     my $pseq  = $fig->get_translation($id);
#    
#     unless ($pseq)
#     {
# 	print STDERR "Skipping $i: $id, locus $locus --- no sequence\n";
# 	return 0;
#     }
#    
#     my $tmp = $pseq;
#     $tmp =~ s/[^ARNDCQEGHILKMFPSTWYVBZ]//ig;
#     if (length($tmp) < 10)
#     {
# 	print STDERR "Skipping locus $locus --- too few valid AA chars: $pseq\n";
# 	return 0;
#     }
    
    print STDERR "Getting sims for $i: $id\t$locus\t$pseq\n" if $ENV{VERBOSE};
    print STDERR "\n"     if ((length($pseq) > 90) && $ENV{VERBOSE});
    my @hits = $fig->sims($id, 500, 1.0e-5, 'all');
    
    my $bscore = 0;
    my $dscore = 0;
    my $qscore = 0;
    my $num_hits = 1;     #...pseudo "Laplace correction" (prevents divide by zero)
    foreach my $hit (@hits)
    {
	$id2  = $hit->id2;
	$len2 = $hit->ln2;
	$psc  = $hit->psc;
	$bsc  = $hit->bsc;
	$beg2 = $hit->b2;
	$end2 = $hit->e2;

	next unless (&type_of($id2) eq 'peg');
	
	$dist  = $fig->crude_estimate_of_distance($closest_org, $fig->genome_of($id2));
	die "Could not get dist between $closest_org and ". $fig->genome_of($id2) unless defined $dist;
	
	$width = $end2 - $beg2 + 1;
	if (($psc <= $max_psc) && ($dist >= $min_dist)
	    && ($width > $min_match*$len2)
	    )
	{
	    ++$num_hits;
	    $bscore += $bsc;
	    $dscore += $dist;
	    $qscore += $dist * $bsc;
	    print STDERR "Accepting $id2:\t$psc\t$dist\t$width\t$len2\n"
		if ($ENV{VERBOSE} && ($ENV{VERBOSE} > 1));
	}
	else
	{
	    print STDERR "Rejecting $id2:\t$psc\t$dist\t$width\t$len2\n"
		if ($ENV{VERBOSE} && ($ENV{VERBOSE} > 1));
	}
    }
    
    $bscore /= $num_hits;
    $score   = $x->[$i]->[LEN] + 10000 * $qscore;
    if ($x->[$i]->[SUBSYS])  { $score *= 1000000; }
    
    $x->[$i]->[SCORE] = $score;
    return $score;
}

sub get_dna
{
    my ($feature) = @_;

    my $contig = $feature->[CONTIG];
    my $beg    = $feature->[START];
    my $end    = $feature->[STOP];
    my $len    = $feature->[LEN];
    my $strand = $feature->[STRAND];
    
    my $seq = "";
    if    ($feature->[STRAND] eq '+')
    {
	$seq = substr($$seq_of{$contig}, $beg-1, $len);
    } 
    elsif ($feature->[STRAND] eq '-')
    {
	$seq = substr($$seq_of{$contig}, $end-1, $len);
	$seq = $ { &FIG::rev_comp(\$seq) };
    }
    else
    {
	die "Invalid strand '$strand' for feature:\n", Dumper($feature);
    }
    
    return uc( $seq );
}    

sub protseq
{
    my ($contig, $beg, $end) = @_;
    my ($seq, $pseq);
    
    if ($beg < $end)
    {
	$seq = substr($$seq_of{$contig}, $beg-1, 1+($end-$beg));
    } 
    else 
    {
	$seq = substr($$seq_of{$contig}, $end-1, 1+($beg-$end));
	$seq = $ { &rev_comp(\$seq) };
    }
    
    $pseq = &translate(\$seq);
    substr($$pseq,0,1) = &toM(\$seq);
    
    return $pseq;
}


sub load_tbls
{
    my ($weight_file, @tbl_files) = @_;
    my ($entry, $fid, $org, $locus, $contig, $beg, $end, $strand, $len);
    
    my %weight;
    if ($weight_file)
    {
	my @fids;
	open(TMP, "<$weight_file") || die "Could not open $weight_file";
	while (defined($entry = <TMP>))
	{
	    chomp $entry;
	    @fids = split /\t/, $entry;
	    foreach $fid (@fids)
	    {
		#...lazy hack --- avoids testing entries for Org-ID...
		$weight{$fid} = (scalar @fids);
	    }
	}
    }
    
    my $tbl  = {};
    my $file = 0;
    foreach my $tbl_file (@tbl_files)
    {
	my $num_fids = 0;
	open(TBL,"<$tbl_file") || die "could not open $tbl_file";
	print STDERR "Loading $tbl_file ...\n" if $ENV{VERBOSE};
	
	while (defined($entry = <TBL>))
	{
	    ++$num_fids;
	    chomp $entry;
	    if ($entry =~ /^(\S+)\s+(\S+)/)
	    {
		($fid, $locus) = ($1, $2);
		if ($fid =~ m/^fig\|(\d+\.\d+)/) { $org = $1; } else { die "Invalid FID $fid"; }

		($contig, $beg, $end) = $fig->boundaries_of($locus);
		
		if ($beg < $end) { $strand = '+'; } else { $strand = '-'; }
		
		$len = 1 + abs($end-$beg);
		
		unless (defined($tbl->{$contig})) { $tbl->{$contig} = []; }
		push(@ { $tbl->{$contig} }
		     , [$entry, $file, $fid, $locus, $contig, $beg, $end, $len, $strand
			, undef, (($file == RNA) ? 999999999 : (defined($weight{$fid}) ? $weight{$fid} : 1)) 
			, &possibly_truncated($org, $locus), $fig->peg_to_subsystems($fid)
			] );
	    }
	    else
	    {
		print STDERR "Skipping invalid entry $entry\n";
	    }
	}
	print STDERR "Loaded $num_fids features from $tbl_file\n\n" if $ENV{VERBOSE};
	close(TBL);
	++$file;
    }
    
    foreach my $contig (keys(%$tbl))
    {
	my $x = $tbl->{$contig};
	$tbl->{$contig} = [sort { &FIG::min($a->[START],$a->[STOP]) <=> &FIG::min($b->[START],$b->[STOP]) } @$x];
    }
    
    return $tbl;
}



sub load_contigs
{
    my ($contigs_file) = @_;
    
    my $seq_of = {};
    my $len_of = {};
    
    open (CONTIGS, "<$contigs_file") or die "could not open $contigs_file to read";
    while ( (! eof(CONTIGS)) && ( my ($id, $seqP) = &get_a_fasta_record(\*CONTIGS) ) )
    {
    	$$seqP =~ tr/a-z/A-Z/;
	$seq_of->{$id} = $$seqP;
	$len_of->{$id} = length($$seqP);
    }
    close(CONTIGS) or die "could not close $contigs_file";
    
    return ($seq_of, $len_of);
}
 
sub get_a_fasta_record
{
    my ($fh) = @_;
    my ( $old_eol, $entry, @lines, $head, $id, $seq );
 
    if (not defined($fh))  { $fh = \*STDIN; }
 
    $old_eol = $/;
    $/ = "\n>";
 
    $entry =  <$fh>;
    chomp $entry;
    @lines =  split( /\n/, $entry );
    $head  =  shift @lines;
    $head  =~ s/^>?//;
    $head  =~ m/^(\S+)/;
    $id    =  $1;
    
    $seq   = join( "", @lines );
 
    $/ = $old_eol;
    return ( $id, \$seq );
}

sub possibly_truncated
{
    my($arg1, $arg2) = @_;
    my($fid,  $loc,  $genome);
    
    if ($fig->is_genome($arg1)) { return $fig->possibly_truncated(@_); }
    
    if (($arg1 =~ m/^fig\|\d+\.\d+\.([^\.]+)\.\d+$/) && (not defined($arg2)))
    {
        $fid    = $arg1;
        $loc    = $self->feature_location($fid);
        $genome = $self->genome_of($fid);
    }
    elsif (($arg1 =~ m/^\d+\.\d+$/) && ($arg2 =~ m/\S+_\d+_\d+/))
    {
        $genome = $arg1;
        $loc    = $arg2;
    }
    else
    {
        confess "Invalid Arguments ", join(", ", @_), " to FIG::possibly_truncated";
    }
    
    my ($contig,$beg,$end) = &boundaries_of($loc);
    if ((not &near_end($contig, $beg)) && (not &near_end($contig, $end)))
    {
        return 0;
    }
    return 1;
}

sub near_end
{
    my ($contig, $x) = @_;

    return (($x < 300) || ($x > ($len_of->{$contig} - 300)));
}

sub boundaries_of
{
    my($location) = (@_ == 1) ? $_[0] : $_[1];
    my($contigQ);

    if (defined($location))
    {
        my @exons = split(/,/,$location);
        my($contig,$beg,$end);

        if (($exons[0] =~ /^(\S+)_(\d+)_\d+$/) &&
            (($contig,$beg) = ($1,$2)) && ($contigQ = quotemeta $contig) &&
            ($exons[$#exons] =~ /^$contigQ\_\d+_(\d+)$/) &&
            ($end = $1))
        {
            return ($contig, $beg, $end);
        }
    }
    
    return undef;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3