[Bio] / Clearinghouse / gen_seed_tables.pl Repository:
ViewVC logotype

View of /Clearinghouse/gen_seed_tables.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Apr 27 17:46:11 2005 UTC (14 years, 5 months ago) by olson
Branch: MAIN
CVS Tags: myrast_33, HEAD
Pull the db stuff out into Clearinghouse::ch_database. This module exports db_init(). It also initializes the
contents of CH_Config to have the database stuff from FIG_Config if it's not already set. This gives us
a good default behavior while allowing modification. For instance, on my laptop which has a mysql
database for the normal SEED, the CH_Config looks like this:

package CH_Config;

$dbms = "Pg";
$db = "clearinghouse";
$dbuser = "olson";
$dbpass = "";
$dbport = "10101";
$service_url = "http://localhost/FIG/clearinghouse_services.cgi";
$dir = "/Users/olson/chdir";

Do some misc cleanup on the services.

Add gen_seed_tables, which generates a postgres input file (suitable for loading with
'psql -f load_seed.sql') for the genomes in the current seed.

#
# Generate table data for the initial SEED data.
#
# We make up our own clearinghouse IDs here; the database will have to
# be manually synched. (Or we can do it).
#

use FIG;
use strict;
use Data::Dumper;

my $fig = new FIG;
my $db = $fig->db_handle();

#
# Initial IDs.
#

my $next_ch_id = 100;
my $next_acc_id = 100;

#
# Retrieve genome information.
#

my $ret = $db->SQL(qq(SELECT genome, gname, complete, restrictions, taxonomy
		      FROM genome));

open(CH, ">ch.dat");
open(ACC, ">acc.dat");
open(MD, ">md.dat");
open(G, ">genome_id.dat");

my $now = time;

for my $ent (@$ret)
{
    my($genome, $gs, $complete, $restrictions, $taxonomy) = @$ent;

    my $gdir = "$FIG_Config::organisms/$genome";
    my $project = &FIG::file_head("$gdir/PROJECT");
    chomp $project;
    my $md5 = &FIG::file_head("$gdir/MD5SUM");
    chomp $md5;

    my $ch = $next_ch_id++;
    my $acc = $next_acc_id++;

    print CH join("\t", $ch, $acc, "GENOME", '', "$gs imported from The SEED"), "\n";
    print ACC join("\t", $acc, "Initial Import", $now, "Initial import from The SEED", "The SEED"), "\n";

    my ($tax, $seq) = ($genome =~ /^(\d+)\.(\d+)/);

    defined($tax) or die;

    print G join("\t", $seq, $tax, ''), "\n";

    md($ch, 'genome_id', $genome);
    md($ch, 'genome_md5', $md5);
    md($ch, 'taxon_id', $tax);
    md($ch, 'project', $project);
    md($ch, 'taxonomy', $taxonomy);
}
    
close(CH);
close(ACC);
close(MD);
close(G);

#
# Now combine these into a postgres command file.
#

open(CMD, ">load_seed.sql");

write_load(\*CMD, "acc.dat", "accession", 'accession_id_seq', $next_acc_id - 1);
write_load(\*CMD, "ch.dat", "clearing_house", "clearing_house_id_seq", $next_ch_id - 1);
write_load(\*CMD, "md.dat", "meta_data");
write_load(\*CMD, "genome_id.dat", "genome_id");

close(CMD);

sub write_load
{
    my($fh, $file, $table, $serial, $value) = @_;
#    print $fh "DELETE FROM $table;\n";
    print $fh "COPY $table FROM STDIN;\n";
    open(my $r, "<$file");
    while (<$r>)
    {
	print $fh $_;
    }
    close($r);
    print $fh "\\.\n";
    if ($serial)
    {
	print $fh "SELECT setval('$serial', $value);\n";
    }
}


sub md
{
    my($id, $k, $v) = @_;
    $k =~ s/\t/\\t/g;
    $v =~ s/\t/\\t/g;
    $k =~ s/\n/\\n/g;
    $v =~ s/\n/\\n/g;
    print MD join("\t", $id, $k, $v), "\n";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3