Thu May 31 19:24:43 2007 UTC (12 years, 10 months ago) by olson
Branch: MAIN
Missing scripts.

# Compute a cache of genome similarity data.
# The tied hash has an entry for keys {$g1,$g2} = $N where $N = 0 if
# not "too similar", $N = 1 if "too similar".
# The entry for key {$genome} is a comma-separated list of genomes for which the
# data has been computed.

use strict;
use FIG;
use Data::Dumper;

@ARGV == 1 or die "Usage: compute_genome_similarity cachefile\n";

my $cache = shift;

my $fig = new FIG;

use DB_File;

my %toosim;
my $tie = tie %toosim, 'DB_File', $cache, O_RDWR | O_CREAT, 0666, $DB_BTREE;
$tie or die "cannot tie: $!\n";

for my $genome ($fig->genomes)
    next unless $fig->is_archaeal($genome) or $fig->is_bacterial($genome);
    my $gs = $fig->genus_species($genome);
    print "$genome $gs\n";
    my @r = $fig->compute_genome_similarity($genome);

    print "Found " . int(@r) . " results\n";

    my %g2list;
    for my $ent (@r)
	my($g2, $toosim, $c1, $c2) = @$ent;
	$toosim{$genome, $g2} = $toosim;
    $toosim{$genome} = join(",", keys %g2list);
untie %toosim;

