[Bio] / FigKernelScripts / update_kegg_data.pl Repository:
ViewVC logotype

View of /FigKernelScripts/update_kegg_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (download) (as text) (annotate)
Mon Feb 1 20:07:37 2010 UTC (9 years, 9 months ago) by dejongh
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.5: +0 -1 lines
removed comment

# script to copy and process KEGG updates

use strict;
use FIG_Config;
use File::Spec;
use File::Copy;
use File::Path;

unless (@ARGV) {
	die("Usage: update_kegg_data [-force] <kegg mirror directory>");
}

# flag to force deletion of old KEGG data
my $force = 0;
if ($ARGV[0] eq "-force") {
	$force = 1;
	shift @ARGV;
}

# KEGG mirror directory
my $kegg_mirror_dir = $ARGV[0];

unless(-e $kegg_mirror_dir) {
	die("directory does not exist: $kegg_mirror_dir");
}

unless(defined $FIG_Config::kegg) {
	die("FIG_Config::kegg is not defined");
}

if (-e $FIG_Config::kegg) {
	if ($force) {
	rmtree([$FIG_Config::kegg]) || die("Couldn't delete $FIG_Config::kegg");
	}
	else {
	die("$FIG_Config::kegg exists, move it out of the way first, or use -force flag");
	}
}

mkdir $FIG_Config::kegg || die("Couldn't make $FIG_Config::kegg: $!");
mkdir "$FIG_Config::kegg/ligand";
mkdir "$FIG_Config::kegg/ligand/compound";
mkdir "$FIG_Config::kegg/ligand/reaction";
mkdir "$FIG_Config::kegg/ligand/enzyme";
mkdir "$FIG_Config::kegg/pathway";
mkdir "$FIG_Config::kegg/pathway/map";

foreach my $file (qw|ligand/compound/compound ligand/enzyme/enzyme ligand/reaction/reaction ligand/reaction/reaction.lst ligand/reaction/reaction_mapformula.lst pathway/map_title.tab pathway/map/cpd_map.tab pathway/map/rn_map.tab pathway/map/ec_map.tab|)
{
	print STDERR "Copying $kegg_mirror_dir/$file ... ";
	copy("$kegg_mirror_dir/$file", "$FIG_Config::kegg/$file") or die("failed: $!");
	print "done\n";
}

print STDERR "Copying maps ... ";
opendir (MAPS, "$kegg_mirror_dir/pathway/map") or die("Couldn't open $kegg_mirror_dir/pathway/map: $!");

foreach my $file (readdir MAPS)
{
	if ($file =~ /map\d+\.(html|png)/) {
	copy("$kegg_mirror_dir/pathway/map/$file", "$FIG_Config::kegg/pathway/map/$file") or die("Couldn't copy $file: $!");
	}
}

closedir MAPS;
print STDERR "done\n";

print STDERR "Running load_kegg ... \n";
system("$FIG_Config::bin/load_kegg");
print STDERR "... done\n";

print STDERR "Generating map coordinate files ... \n";
opendir (NEWMAPS, "$FIG_Config::kegg/pathway/map");
foreach my $file (readdir NEWMAPS)
{
	if ($file =~ /(map\d+)\.html/) {
	my $map = $1;
	print STDERR "\t Processing map $map ... \n";
	open (HTML, "<$FIG_Config::kegg/pathway/map/$map.html");
	open (EC, ">$FIG_Config::kegg/pathway/map/${map}_ec.coord");
	open (RN, ">$FIG_Config::kegg/pathway/map/${map}_rn.coord");
	open (CPD, ">$FIG_Config::kegg/pathway/map/${map}_cpd.coord");
	open (CONF, ">$FIG_Config::kegg/pathway/map/${map}.conf");

	while(<HTML>) {
			if (/<area shape=(\S+)\s+coords=(\S+)\s+href=\S+\s+title=\"(.+)\"/) {
				my $shape = $1;
				my $coords = $2;
				my $title = $3;

				if ($title =~ /(C\d{5})/) {
					if ($shape eq "circle") {
						my @line = ($1);
						(my $x1,my $y1,my $radius) = (split ",", $coords);
						push @line, ($x1-$radius,$y1-$radius,$x1+$radius,$y1+$radius);
						print CPD (join "\t", @line), "\n";
					}
					elsif ($shape eq "rect") {
						my @line = ($1);
						push @line, (split ",", $coords);
						print CPD (join "\t", @line), "\n";
					}
					else {
					    print STDERR "\tIn $map, couldn't handle shape '$shape' for $1\n";
					}
				}
				if ($title =~ /\s(\d+\.\d+\.\d+\.\d+),/) {
					if ($shape eq "rect") {
						my @line = ($1);
						push @line, (split ",", $coords);
						print EC (join "\t", @line), "\n";
					}
					else {
					    print STDERR "\tIn $map, couldn't handle shape '$shape' for $1\n";
					}
				}
				while ($title =~ /(R\d{5})/g) {
					if ($shape eq "rect") {
						my @line = ($1);
						push @line, (split ",", $coords);
						print RN (join "\t", @line), "\n";
					}
					else {
					    print STDERR "\tIn $map, couldn't handle shape '$shape' for $1\n";
					}
				}
				if ($title =~ /map\d{5}/) {
					my ($x1, $y1, $x2, $y2) = split ",", $coords;
					my $line = "$shape ($x1,$y1) ($x2,$y2)\t/kegg/pathway/map/$title.html";
					print CONF $line, "\n";
				}
			}
		}

		close HTML;
		close EC;
		close RN;
		close CPD;
		close CONF;
	}
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3