[Bio] / FigKernelScripts / FFB2_load_memcache.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB2_load_memcache.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (download) (as text) (annotate)
Mon Feb 14 22:44:07 2011 UTC (9 years ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.6: +5 -4 lines
Update to figfam build code

#
# Load the memcache instance with data (functions & translations) from the given families.2c file.
#

use FIG;
use FIG_Config;
use strict;
use Cache::Memcached::Fast;
use Data::Dumper;
use Getopt::Long;

my $fig = new FIG;

my $seed = $FIG_Config::data;

my $rc = GetOptions("seed=s" => \$seed);

($rc && @ARGV == 4) or die "Usage: FFB2_load_memcache [-seed alternate-datadir] subsys_based_families families.2c memcache-host memcache-port\n";
my $fam2c = shift;
my $ssfam = shift;
my $mchost = shift;
my $mcport = shift;

my $mc = new Cache::Memcached::Fast({ servers => ["$mchost:$mcport"] } );
$mc or die "Could not connect to memcached at $mchost:$mcport\n";

my %genomes;

open(F, "<", $fam2c) or die "cannot open $fam2c: $!";

while (<F>)
{
    if (/fig\|(\d+\.\d+)/)
    {
	$genomes{$1}++;
    }
}
close(F);

open(F, "<", $ssfam) or die "cannot open $ssfam: $!";

while (<F>)
{
    if (/fig\|(\d+\.\d+)/)
    {
	$genomes{$1}++;
    }
}
close(F);

#
#  Ensure that everything from the genome.sets file is loaded as well.
#
open(SETS,"<$seed/Global/genome.sets") || die "could not open genome.sets";
while (<SETS>)
{
    if (/^\d+\t(\d+\.\d+)/)
    {
	$genomes{$1}++;
    }
}
close(SETS);

my @list = sort { &FIG::by_genome_id($a,$b) } keys %genomes;
#@list = ("500638.3");
my $n = @list;
for my $i (0..$#list)
{
    my $genome = $list[$i];
    
    my $dir = "$seed/Organisms/$genome";

    print STDERR "Load $genome into cache ($i of $n)\n";

    if (!open(F, "<", "$dir/Features/peg/tbl"))
    {
	warn "Cannot open $dir/Features/peg/tbl: $!";
	next;
    }
    my %pegs;
    while (<F>)
    {
	if (/^(fig\|\d+\.\d+\.peg.\d+)\t/)
	{
	    $pegs{$1} = 1;
	}
    }
    close(F);
    
    if (!open(F, "<", "$dir/assigned_functions"))
    {
	warn "cannot open $dir/assigned_functions: $!";
	next;
    }
    my %af;
    while (<F>)
    {
	chomp;
	my($id, $func) = split(/\t/);
	if ($id ne '')
	{
	    delete $pegs{$id};
	    next if $fig->is_deleted_fid($id);
	    $af{$id} = $func;
	}
    }
    close(F);

    #
    # Any leftover pegs get marked hypothetical.
    #
    for my $peg (keys %pegs)
    {
	print STDERR "Set $peg to hypothetical protein\n";
	$af{$peg} = "hypothetical protein";
    }

    my @update = map { ["f:$_", $af{$_}] } keys %af;
    my $res = $mc->set_multi(@update);
    for my $rk (keys %$res)
    {
	if (!$res->{$rk})
	{
	    print STDERR "set_multi fail $rk $res->{$rk}\n";
	}
    }
    undef %af;
    undef @update;

    open(F, "<", "$dir/Features/peg/fasta") or die "cannot open $dir/Features/peg/fasta: $!";

    while (my($id, $seqp) = &FIG::read_fasta_record(\*F))
    {
	next if $fig->is_deleted_fid($id);
	$af{$id} = $seqp;
    }
    close(F);

    my @update = map { ["s:$_", ${$af{$_}}] } keys %af;

    $mc->set_multi(@update);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3