[Bio] / FigKernelScripts / FFB2_build_ff_indexes.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB2_build_ff_indexes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Jun 23 21:16:51 2010 UTC (9 years, 4 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Rollup of figfam update fixes

#
# Build the berkeley db indexes used for the accelerated figfam code.
#
# function2index maps a function from fam.func.index to the associated index
# function2families maps a function to the families it is found in
# peg2family maps a peg to the family it is in
# genome2families maps a genome to the families it is in
# family2function maps a figfam id to the function for the family
# family2pegs maps a figfam id to the pegs in the family.
# md52family maps a md5 signature to the family it is in
#
use SeedUtils;
use FIG_Config;
use FIG;

my $usage = "usage: FFB2_build_ff_indexes FigfamRelDir\n";

my $ff_dir = shift;

-d $ff_dir || die "FigFamRelDir $dir not a directory\n";

my $temp = "$FIG_Config::temp/ffbuild_tmp.$$";

&FIG::run("cut -f1,3 $ff_dir/fam.func.index | sort -u > $temp");
&SeedUtils::create_berk_table($temp, [1], [0], "$ff_dir/function2index.db");
&SeedUtils::create_berk_table("$ff_dir/fam.func.index", [2], [1], "$ff_dir/function2families.db", -multiple_values => 1);

open(I, "<", "$ff_dir/family.functions") or die "cannot open $ff_dir/family.functions: $!";
open(O, "| sort > $temp") or die "Cannot open  $temp: $!";
while (defined(my $line = <I>))
{
    chomp;
    if ($line =~ /^(FIG\d+)\t(.*)/)
    {
	my $fam = $1;
	my @roles = &SeedUtils::roles_of_function($2);
	print O "$_\t$fam\n" for @roles;
    }
}
close(I);
close(O);
&SeedUtils::create_berk_table($temp, [0], [1], "$ff_dir/role2families.db", -multiple_values => 1);


&SeedUtils::create_berk_table("$ff_dir/families.2c", [0], [1], "$ff_dir/family2pegs.db", -multiple_values => 1);
&SeedUtils::create_berk_table("$ff_dir/families.2c", [1], [0], "$ff_dir/peg2family.db", -sort => 1);
&SeedUtils::create_berk_table("$ff_dir/family.functions", [0], [1], "$ff_dir/family2function.db");

open(I, "<", "$ff_dir/families.2c") or die "cannot open $ff_dir/families.2c: $!";
open(O, "| sort > $temp") or die "Cannot open  $temp: $!";
while (<I>)
{
    if (/^(FIG\d+)\tfig\|(\d+\.\d+)/)
    {
	print O "$2\t$1\n";
    }
}
close(I);
close(O);
&SeedUtils::create_berk_table($temp, [0], [1], "$ff_dir/genome2families.db", -multiple_values => 1);


unlink($temp);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3