[Bio] / FigKernelScripts / update_taxonomies.pl Repository:
ViewVC logotype

View of /FigKernelScripts/update_taxonomies.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Wed Oct 25 21:03:45 2006 UTC (13 years, 3 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.7: +1 -1 lines
set longer gap between accessing NCBI

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use FIG;
my $fig = new FIG;

$usage = "usage: update_taxonomies Dir [complete]";
($dir = shift @ARGV)
    || die $usage;

my %genomes;
if ((@ARGV > 0)  && ($ARGV[0] =~ /complete/))
{
    %genomes = map { $_ => 1 } $fig->genomes("complete");
}

opendir(DIR,$dir) || die "could not open $dir";
@orgs = sort { $a <=> $b } grep { ($_ =~ /^\d+\.\d+$/) && ((! defined(%genomes)) || $genomes{$_}) } readdir(DIR);
closedir(DIR);

foreach $org (@orgs)
{
    if ($org =~ /^(\d+)/)
    {
	$taxid = $1;

	if (! ($tax = $taxonomy{$taxid}))
	{
	    if ($tax = &get_tax($taxid))
	    {
		$taxonomy{$taxid} = $tax;
	    }
	}

	if ($tax)
	{
	    print STDERR "setting $org to $tax\n";
	    my @old = `cat $dir/$org/TAXONOMY`;
	    if (@old == 1)
	    {
		chop $old[0];
	    }

	    if ((@old == 1) && ($old[0] eq $tax))
	    {
		print STDERR "    leaving existing value\n";
	    }
	    else
	    {
		open(TMP,">$dir/$org/TAXONOMY") || die "could not open $dir/$org/TAXONOMY";
		print TMP "$tax\n";
		close(TMP);
		chmod(0777,"$dir/$org/TAXONOMY");
	    }
	}
	else
	{
	    print STDERR "could not handle $org\n";
	}
    }
}

sub get_tax {
    my($taxid) = @_;
    my($tax,$gs,$out,@groups);

    my $tmp2 = "$FIG_Config::temp/tmpO.$$";

    if (system("curl -o $tmp2 \"http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=$taxid&lvl=3&p=17&p=20&p=37&p=38&keep=1&srchmode=5&unlock&lin=s\"") == 0)
    {
	my $out = join("",`cat $tmp2`);
	if ($out =~ /<title>Taxonomy browser \(([^<]+)\)<\/title/s)
	{
	    $gs = $1;
	    $start = quotemeta "<em>Lineage</em></a><em>( abbreviated )</em></dt>";
	    if ($out =~ /$start\n([^\n]+)/s)
	    {
		$line = $1;
		@groups = ($line =~ />([^<>]+)<\/a/g);
		$tax = join("; ",(@groups,$gs));
	    }
	}
	sleep 5;
    }
    unlink($tmp2);
    return $tax;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3