[Bio] / FigKernelScripts / FFB3_assemble_translations.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB3_assemble_translations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Tue Jul 9 16:49:46 2013 UTC (6 years, 4 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.1: +1 -1 lines
FFB3_build_updated_FF: Add --temp to force use of given temp directory
Change hardcoded database host to seed-db-read for the annotator seed.

#
# Assemble the table of protein translations and lengths for
# figfam generation.
#
# We pull all translations from anno-seed and pubseed.
#


use FIG;
use strict;
use DB_File;
use DBI;

@ARGV == 2 or die "Usage: FFB3_assemble_translations translation-btree length-btree";

my $translation_btree = shift;
my $len_btree = shift;

my %trans;
my %len;

my $fig = FIG->new;

my $anno_dbh = DBI->connect('dbi:mysql:host=seed-db-read.mcs.anl.gov;database=fig_anno_v5', 'seed');
$anno_dbh or die;

my $pubseed_dbh = $fig->db_handle->{_dbh};

my %trans_btree;
tie %trans_btree, 'DB_File', $translation_btree, O_RDWR | O_CREAT, 0666, $DB_BTREE or die "tie $translation_btree failed: $!";
my %len_btree;
tie %len_btree, 'DB_File', $len_btree, O_RDWR | O_CREAT, 0666, $DB_BTREE or die "tie $len_btree failed: $!";

if ($ENV{FFB3_TEST_MODE})
{
    $anno_dbh = DBI->connect('dbi:mysql:host=seed-db-read.mcs.anl.gov;database=fig_test1', 'seed');
    $pubseed_dbh = DBI->connect('dbi:mysql:host=seed-db-read.mcs.anl.gov;database=fig_test2', 'seed');
}

my %seen;
load_trans(\%trans, \%len, $anno_dbh, '/vol/mirror-seed/Data.mirror');
load_trans(\%trans, \%len, $pubseed_dbh, $FIG_Config::data);

for my $peg (sort keys %trans)
{
    $len_btree{$peg} = $len{$peg};
    $trans_btree{$peg} = ${$trans{$peg}};
}

untie %trans_btree;
untie %len_btree;

sub load_trans
{
    my($trans, $len, $dbh, $data) = @_;

    my $glist = $dbh->selectcol_arrayref(qq(SELECT genome
					    FROM genome));
    my $n = @$glist;
    for (my $i = 0; $i < @$glist; $i++)
    {
	my $genome = $glist->[$i];
	if ($i % 100 == 0)
	{
	    print "$i of $n: $genome\n";
	}
	if (!open(FA, "<", "$data/Organisms/$genome/Features/peg/fasta"))
	{
	    warn "No fasta for $genome in $data\n";
	    next;
	}
	while (my($peg, $seqp) = &FIG::read_fasta_record(\*FA))
	{
	    $trans->{$peg} = $seqp;
	    $len->{$peg} = length($$seqp);
	}
	close(FA);
    }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3