Parent Directory
|
Revision Log
minor fix to phylogernetic profiles
# -*- perl -*- $SIG{HUP} = 'ignore'; use FIG; use File::Path; use File::Basename; $usage = "usage: check_sims_basic [-delint_dir=Dir] [-logfile=log] [-synonyms=peg_synonyms_file] NR [ < sims | - | SimsDir | Sims1 Sims2 Sims3 ...] > checked.sims [2> errors (recommended if a logfile isn't specified)]"; $outdir = ""; $logfile = ""; $synfile = ""; $trouble = 0; for ($i=0; $i < @ARGV; ) { if ($ARGV[$i] =~ m/-delint_dir=(\S+)/) { $outdir = $1; splice @ARGV, $i, 1; if (-d $outdir) { $trouble = 1; warn "$outdir exists"; } else { mkpath($outdir, 0, 0777) || die "Could not create $outdir"; } } elsif ($ARGV[$i] =~ m/-logfile=(\S+)/) { $logfile = $1; splice @ARGV, $i, 1; open($logfh, ">$logfile") || die "Could not write-open $logfile"; } elsif ($ARGV[$i] =~ m/-synonyms=(\S+)/) { $synfile = $1; splice @ARGV, $i, 1; if (-s $synfile) { open(TMP, "<$synfile") || die "Could not read-open $synfile"; while (defined($entry = <TMP>)) { chomp $entry; $entry =~ m/^([^,]+),\d+(\S+)$/o; ($major_syn, $syns) = ($1, $2); @syns = map { m/^([^,]+)/; $1 } split /;/, $syns; foreach $syn (@syns) { $major{$syn} = $major; } } close(TMP) || die "Could not close $synfile"; } } elsif (-s $ARGV[$i]) { ++$i; } else { $trouble = 1; print STDERR "Invalid arg $ARGV[$i]\n"; ++$i; } } die "aborting due to invalid args" if ($trouble); (($nr = shift @ARGV) && (-s $nr)) || die $usage; if (@ARGV == 0) { if (-t STDIN) { push @ARGV, '-'; } else { print STDERR "No arguments given --- checking $FIG_Config::data/Sims by default\n"; push @ARGV, "$FIG_Config::data/Sims"; } } if ((@ARGV == 1) && (-d $ARGV[0])) { $sims_dir = shift @ARGV; opendir(SIMS, $sims_dir) || die "Could not open $sims_dir"; @ARGV = grep !/^\./, readdir(SIMS); @ARGV = map { $_ = "$sims_dir/$_" } @ARGV; closedir(SIMS) || die "Could not close $sims_dir"; } $trouble = 0; foreach $file (@ARGV) { next if ($file eq '-'); if (!-e $file) { print STDERR "Simfile $file does not exist"; $trouble = 1; } } die "There were nonexistent input files" if $trouble; unless ($logfile) { $logfh = \*STDERR; } unless ($outdir) { $outfh = \*STDOUT; } opendir(ORGS, "$FIG_Config::organisms") || die "Could not open dir $FIG_Config::organisms"; @env = grep s{^(9999999\.\d+)}{$FIG_Config::organisms/$1/Features/peg/fasta}, readdir(ORGS); closedir(ORGS) || die "Could not close dir $FIG_Config::organisms"; foreach $file ($nr, @env) { open(TMP, "<$file") || die "Could not read-open $file"; print STDERR "Loading lengths from $file ...\n" if $ENV{FIG_VERBOSE}; while (($id, $seqP) = &FIG::read_fasta_record(\*TMP)) { $ln{$id} = length($$seqP); } } foreach $simfile (@ARGV) { print STDERR "Processing $simfile\n" if $ENV{FIG_VERBOSE}; open(SIMFILE, "<$simfile") || die "Could not open $simfile"; if ($outdir) { $outfile = "$outdir/" . basename($simfile); open($outfh, ">$outfile") || die "could not write-open $outfile"; } while (defined($sim = <SIMFILE>)) { chomp $sim; $sim =~ s/\t\t/\t/go; if ($sim =~ m/^(\S+)\t(\S+)\t(\d+|\d+\.\d+)\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t(\d+(\.\d*)?e[-+]?\d+|\d+\.\d+)\t(\d\.\d*e[-+]?\d+|\d+\.\d+|\d+)\t(\d+)\t(\d+)/o) { # die "$1, $2, $3, $4, $5, $6, $7, $8, $9\n"; ($id1, $id2, $ln1, $ln2) = ($1, $2, $7, $8); # die "$id1, $id2, $ln1, $ln2"; if ($ln{$id1} && $ln{$id2} && ($ln{$id1} == $ln1) && ($ln{$id2} == $ln2)) { print $outfh "$sim\n"; #...print valid sims to OUTPUT } else { if ($ln{$id1}) { if ($ln{$id1} != $ln1) { print $logfh "badlen1\t$simfile, $.:\t$id1\t$ln{$id1}\t$ln1\t$sim\n"; } } else { if ($synfile) { if ($major{$id1}) { print $logfh "synref1\t$simfile, $.:\t$id1\t\t\t$sim\n"; } else { print $logfh "undef1\t$simfile, $.:\t$id1\t\t\t$sim\n"; } } else { print $logfh "undef1\t$simfile, $.:\t$id1\t\t\t$sim\n"; } } if ($ln{$id2}) { if ($ln{$id2} != $ln2) { print $logfh "badlen2\t$simfile, $.:\t$id2\t$ln{$id2}\t$ln2\t$sim\n"; } } else { if ($synfile) { if ($major{$id2}) { print $logfh "synref2\t$simfile, $.:\t$id2\t\t\t$sim\n"; } else { print $logfh "undef2\t$simfile, $.:\t$id2\t\t\t$sim\n"; } } else { print $logfh "undef2\t$simfile, $.:\t$id2\t\t\t$sim\n"; } } } } else { print $logfh "INVALID FORMAT\t$simfile, $.:\t$sim\n"; } } }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |