[Bio] / FigKernelScripts / check_sims_basic.pl Repository:
ViewVC logotype

View of /FigKernelScripts/check_sims_basic.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Tue Feb 8 21:32:27 2005 UTC (14 years, 10 months ago) by overbeek
Branch: MAIN
Changes since 1.2: +3 -0 lines
Added a comment. -- /gdp

# -*- perl -*-

$SIG{HUP} = 'ignore';

use File::Path;
use File::Basename;

$usage = "usage: check_sims_basic [-delint_dir=Dir] [-logfile=log] NR [SimsDir | Sims1 Sims2 Sims3 ...] < sims > checked.sims [2> errors (recommended if a logfile isn't specified)]";

$outdir  = "";
$logfh   = \*STDERR;
$trouble = 0;
for ($i=0; $i < @ARGV; )
{
    if ($ARGV[$i] =~ m/-delint_dir=(\S+)/)
    {
	$outdir = $1;
	splice @ARGV, $i, 1;
	if (-d $outdir)
	{
	    $trouble = 1;
	    warn "$outdir exists";
	} else {
	    mkpath($outdir, 0, 0777) || die "Could not create $outdir";
	}
    }
    elsif ($ARGV[$i] =~ m/-logfile=(\S+)/)
    {
	$logfile = $1;
	splice @ARGV, $i, 1;
	open(LOG, ">$logfile") || die "Could not open $logfile";
	$logfh = \*LOG;
    }
    elsif (-s $ARGV[$i]) { 
	++$i; 
    }
    else {
	print STDERR "Invalid arg $ARGV[$i]";
	++$i;
    }
}
die "There were bad args" if ($trouble);

(($nr = shift @ARGV) && (-s $nr))
    || die $usage;

if  (@ARGV == 0) { push @ARGV, "$FIG_Config::data/Sims"; }
if ((@ARGV == 1) && (-d $ARGV[0]))
{
    $sims_dir = shift @ARGV;
    opendir(SIMS, $sims_dir) || die "Could not open $sims_dir";
    @ARGV = grep !/^\./, readdir(SIMS);
    @ARGV = map  { $_ = "$sims_dir/$_" } @ARGV;
    closedir(SIMS) || die "Could not close $sims_dir";
}

$trouble = 0;
foreach $file (@ARGV)
{
    next if ($file eq '-');
    if (!-e $file) { print STDERR "Simfile $file does not exist"; $trouble = 1; }
}
die "There were nonexistent input files" if $trouble;

open(NR,"<$nr") || die $usage;

$/ = "\n>";
while (defined($_ = <NR>))
{
    chomp;
    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
    {
	$id  =  $1;
	$seq =  $2;
	$seq =~ s/\s//gs;
	$ln{$id} = length($seq);
    }
}
$/ = "\n";
close(NR);

if ($outdir)
{
    $file  = "$outdir/" . basename($ARGV[0]);
    open(OUTPUT, ">$file") || die "could not write-open $file";
    print STDERR "Opening $file" if $ENV{FIG_VERBOSE};
    $outfh = \*OUTPUT;
}
else
{
    $outfh = \*STDOUT;
}

while (defined($_ = <>))
{
    chomp;    # $/)
    if ($_ =~ m/^\S+\t\S+\t(\d+|\d+\.\d+)(\t\d+){7}\t(\d+(\.\d*)?e[-+]?\d+|\d+\.\d+)\t(\d\.\d*e[-+]?\d+|\d+\.\d+|\d+)/)
    {
	# print STDERR "$1\t$2\t$3\t$4\t$5\t$6\n";
	($id1,$id2,$iden,$ali_ln,$mis,$gaps,$b1,$e1,$b2,$e2,$psc,$bsc,$ln1,$ln2) =
	    split(/\t/,$_);
	
	if ($ln{$id1} && $ln{$id2} && ($ln{$id1} == $ln1) && ($ln{$id2} == $ln2))
	{
	    print $outfh "$_\n";   #...print valid sims to OUTPUT
	}
	else
	{
	    if ($ln{$id1})
	    {
		if ($ln{$id1} != $ln1) { print $logfh "badlen1\t$ARGV, $.:\t$id1\t$ln{$id1}\t$ln1\t$_\n"; }
	    }
	    else
	    {
		print $logfh "undef1\t$ARGV, $.:\t$id1\t\t\t$_\n";
	    }
	    
	    if ($ln{$id2})
	    {
		if ($ln{$id2} != $ln2) { print $logfh "badlen2\t$ARGV, $.:\t$id2\t$ln{$id2}\t$ln2\t$_\n"; }
	    }
	    else
	    {
		print $logfh "undef2\t$ARGV, $.:\t$id2\t\t\t$_\n";
	    }
	}
    }
    else
    {
	print $logfh "INVALID FORMAT\t$ARGV, $.:\t$_\n";
    }
}
continue
{
#...Suggested by the "Perl Cookbook;" however, Ross does not like 
# the use of the 'continue' block, so this needs to be re-written...

    if (eof)
    {
#...reset line-number to zero, so that we know which line of which file is bad...
	close(ARGV) || warn "Could not close $ARGV";
	print STDERR "Finished processing $ARGV\n\n" if $ENV{FIG_VERBOSE};
	
	if ($outdir && $ARGV)
	{
	    $file  = "$outdir/" . basename($ARGV[0]);
	    open(OUTPUT, ">$file") || die "could not write-open $file";
	    print STDERR "Opening $file" if $ENV{FIG_VERBOSE};
	    $outfh = \*OUTPUT;
	}
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3