[Bio] / FigKernelScripts / load_wikipedia.pl Repository:
ViewVC logotype

View of /FigKernelScripts/load_wikipedia.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Aug 19 21:42:49 2009 UTC (10 years, 6 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Wikipedia cache loader.

#
# Load wikipedia links into the database.
#
# Expensive; run periodically.
#

use strict;
use FIG;

my $fig = new FIG;

my $res = $fig->db_handle->SQL(qq(SELECT genome, gname FROM genome));

my $tmp = "$FIG_Config::temp/wiki.$$";
open(T, ">$tmp") or die "cannot write $tmp: $!";
T->autoflush(1);
my %done;
for my $ent (@$res)
{
    my($genome, $gname) = @$ent;

    my @organism_tokens = split(/\s/, $gname);
    my $gs = join(" ", @organism_tokens[0..1]);
    my $link;
    if (exists($done{$gs}))
    {
	print "$gs already done\n";
	$link = $done{$gs};
    }
    else
    {
	print "$gs lookup\n";
	$link = $fig->wikipedia_link($gs);
	$done{$gs} = $link;
	print T "$gs\t$link\n";
    }
}

close(T);

$fig->reload_table('all', "genome_wikipedia_link",
		       "gname varchar(255), url varchar(255), "
		       . "PRIMARY KEY ( gname )",
		       { },
		       $tmp);

unlink($tmp);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3