Parent Directory
|
Revision Log
figfam build updates
# # Compare the results of two tested FIGfam dirs, writing the results into the # newer one. # use Data::Dumper; use YAML::Any; use strict; use File::Basename; use SeedUtils; @ARGV == 2 or die "Usage: FFB2_compare_tests FFdir1 FFdir2\n"; my $ff_dir_1 = shift; my $ff_dir_2 = shift; my $ff_rel_1 = validate($ff_dir_1); my $ff_rel_2 = validate($ff_dir_2); print "Comparing release $ff_rel_1 ($ff_dir_1) to release $ff_rel_2 ($ff_dir_2)\n"; my $out_dir = "$ff_dir_2/comparisons/$ff_rel_1"; &SeedUtils::verify_dir($out_dir); # # Find the set of kmers & genomes in common between the two test runs. # my %all_genomes; my @to_compare; for my $f1kdir (<$ff_dir_1/tests/*>) { my $k = basename($f1kdir); my $f2kdir = "$ff_dir_2/tests/$k"; if (! -d $f2kdir) { warn "Skipping $k, not present in release $ff_rel_2\n"; next; } for my $outfile (<$f1kdir/*.out>) { my $outbase = basename($outfile); if (! -f "$f2kdir/$outbase") { warn "Skipping $outbase, not present in release $ff_rel_2\n"; next; } my ($genome) = $outbase =~ /^(\d+\.\d+)/; $all_genomes{$genome}++; push(@to_compare, [$k, $genome, $outfile, "$f2kdir/$outbase"]); } } # # Find genome peg counts (common data). # my %genome_peg_count; for my $genome (keys %all_genomes) { open(F, "<", "/vol/figfam-prod/test_genomes/$genome.peg.fasta") or die "cannot open /vol/figfam-prod/test_genomes/$genome.peg.fasta: $!"; my $c = 0; while (<F>) { /^>/ and $c++; } close(F); $genome_peg_count{$genome} = $c; } for my $comp (sort { $a->[0] <=> $b->[0] or $a->[1] <=> $b->[1] } @to_compare) { my($k, $genome, $f1, $f2) = @$comp; print "Compare @$comp\n"; &compare($k, $genome, $f1, $f2); } # # Compare two runs of the kmers. # # Compute a bunch of metrics, put in data structure, and # write the YAML version into $ff_dir_2/comparisons/$ff_rel_1/$k.$genome. # sub compare { my($k, $genome, $f1, $f2) = @_; my $results = {}; $results->{peg_count} = $genome_peg_count{$genome}; $results->{kmer} = $k; $results->{genome} = $genome; $results->{this_version} = $ff_rel_2; $results->{prev_version} = $ff_rel_1; $results->{this_data} = $f2; $results->{prev_data} = $f1; # # Read first run. # open(F, "<", $f1) or die "Cannot open $f1: $!"; my %r1; my $num_assigned_1 = 0; while (<F>) { chomp; my($id, $func) = split(/\t/); $func = SeedUtils::strip_func($func); $r1{$id} = $func; $num_assigned_1++; } $results->{prev_num_assigned} = $num_assigned_1; # # Read second run. # open(F, "<", $f2) or die "Cannot open $f2: $!"; my $num_assigned_2 = 0; while (<F>) { chomp; my($id, $func) = split(/\t/); $func = SeedUtils::strip_func($func); my $pfunc = $r1{$id}; if ($func eq '' && $pfunc eq '') { # neither assigned, don't care. } elsif ($func eq '' && $pfunc ne '') { # last assigned, we did not $results->{num_calls_lost}++; push(@{$results->{calls_lost}}, [$id, $pfunc, $func]); } elsif ($func ne '' && $pfunc eq '') { # we assigned, last did not $results->{num_calls_gained}++; push(@{$results->{calls_gained}}, [$id, $pfunc, $func]); } else { # we both assigned, are same? if ($func eq $pfunc) { $results->{num_calls_same}++; } else { $results->{num_calls_diff}++; push(@{$results->{calls_diff}}, [$id, $pfunc, $func]); } } $num_assigned_2++; } $results->{this_num_assigned} = $num_assigned_2; close(F); open(OUT, ">", "$out_dir/$k.$genome") or die "Cannot write $out_dir/$k.$genome: $!"; print OUT Dump($results); close(OUT); } sub validate { my($dir) = @_; -d $dir or die "$dir not a directory\n"; -d "$dir/tests" or die "$dir has no test data available\n"; my $base = basename($dir); if ($base =~ /^Release(\d+)$/) { return $1; } die "$dir doesn't have the right directory name (should be ReleaseXX)\n"; }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |