# -*- perl -*- $SIG{HUP} = 'ignore'; use File::Path; use File::Basename; $usage = "usage: check_sims_basic [-delint_dir=Dir] [-logfile=log] NR [SimsDir | Sims1 Sims2 Sims3 ...] < sims > checked.sims [2> errors (recommended if a logfile isn't specified)]"; $outdir = ""; $logfh = \*STDERR; $trouble = 0; for ($i=0; $i < @ARGV; ) { if ($ARGV[$i] =~ m/-delint_dir=(\S+)/) { $outdir = $1; splice @ARGV, $i, 1; if (-d $outdir) { $trouble = 1; warn "$outdir exists"; } else { mkpath($outdir, 0, 0777) || die "Could not create $outdir"; } } elsif ($ARGV[$i] =~ m/-logfile=(\S+)/) { $logfile = $1; splice @ARGV, $i, 1; open(LOG, ">$logfile") || die "Could not open $logfile"; $logfh = \*LOG; } elsif (-s $ARGV[$i]) { ++$i; } else { print STDERR "Invalid arg $ARGV[$i]"; ++$i; } } die "There were bad args" if ($trouble); (($nr = shift @ARGV) && (-s $nr)) || die $usage; if (@ARGV == 0) { push @ARGV, "$FIG_Config::data/Sims"; } if ((@ARGV == 1) && (-d $ARGV[0])) { $sims_dir = shift @ARGV; opendir(SIMS, $sims_dir) || die "Could not open $sims_dir"; @ARGV = grep !/^\./, readdir(SIMS); @ARGV = map { $_ = "$sims_dir/$_" } @ARGV; closedir(SIMS) || die "Could not close $sims_dir"; } $trouble = 0; foreach $file (@ARGV) { next if ($file eq '-'); if (!-e $file) { print STDERR "Simfile $file does not exist"; $trouble = 1; } } die "There were nonexistent input files" if $trouble; open(NR,"<$nr") || die $usage; $/ = "\n>"; while (defined($_ = )) { chomp; if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s) { $id = $1; $seq = $2; $seq =~ s/\s//gs; $ln{$id} = length($seq); } } $/ = "\n"; close(NR); if ($outdir) { $file = "$outdir/" . basename($ARGV[0]); open(OUTPUT, ">$file") || die "could not write-open $file"; print STDERR "Opening $file" if $ENV{FIG_VERBOSE}; $outfh = \*OUTPUT; } else { $outfh = \*STDOUT; } while (defined($_ = <>)) { chomp; # $/) if ($_ =~ m/^\S+\t\S+\t(\d+|\d+\.\d+)(\t\d+){7}\t(\d+(\.\d*)?e[-+]?\d+|\d+\.\d+)\t(\d\.\d*e[-+]?\d+|\d+\.\d+|\d+)/) { # print STDERR "$1\t$2\t$3\t$4\t$5\t$6\n"; ($id1,$id2,$iden,$ali_ln,$mis,$gaps,$b1,$e1,$b2,$e2,$psc,$bsc,$ln1,$ln2) = split(/\t/,$_); if ($ln{$id1} && $ln{$id2} && ($ln{$id1} == $ln1) && ($ln{$id2} == $ln2)) { print $outfh "$_\n"; #...print valid sims to OUTPUT } else { if ($ln{$id1}) { if ($ln{$id1} != $ln1) { print $logfh "badlen1\t$ARGV, $.:\t$id1\t$ln{$id1}\t$ln1\t$_\n"; } } else { print $logfh "undef1\t$ARGV, $.:\t$id1\t\t\t$_\n"; } if ($ln{$id2}) { if ($ln{$id2} != $ln2) { print $logfh "badlen2\t$ARGV, $.:\t$id2\t$ln{$id2}\t$ln2\t$_\n"; } } else { print $logfh "undef2\t$ARGV, $.:\t$id2\t\t\t$_\n"; } } } else { print $logfh "INVALID FORMAT\t$ARGV, $.:\t$_\n"; } } continue { #...Suggested by the "Perl Cookbook;" however, Ross does not like # the use of the 'continue' block, so this needs to be re-written... if (eof) { #...reset line-number to zero, so that we know which line of which file is bad... close(ARGV) || warn "Could not close $ARGV"; print STDERR "Finished processing $ARGV\n\n" if $ENV{FIG_VERBOSE}; if ($outdir && $ARGV) { $file = "$outdir/" . basename($ARGV[0]); open(OUTPUT, ">$file") || die "could not write-open $file"; print STDERR "Opening $file" if $ENV{FIG_VERBOSE}; $outfh = \*OUTPUT; } } }