[Bio] / FortyEightMeta / SimDB.pm Repository:
ViewVC logotype

View of /FortyEightMeta/SimDB.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Jun 11 21:54:38 2008 UTC (11 years, 9 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_rel_2008_0919, mgrast_rel_2008_1110, mgrast_rel_2008_0917
Changes since 1.1: +1 -1 lines
fix default pathfix default path

#
# module to use for scanning the mgrast database XML description.
#

package FortyEightMeta::SimDB;
use Data::Dumper;
use strict;
use XML::LibXML;
use FIG_Config;

use base 'Class::Accessor';

__PACKAGE__->mk_accessors(qw(doc file));

sub new
{
    my($class, $file) = @_;

    if ($file eq '')
    {
        $file = $FIG_Config::mgrast_database_def or "$FIG_Config::mgrast_data/databases.xml";
    }

    my $p = XML::LibXML->new();
    my $doc = $p->parse_file($file);
    defined($doc) or return undef;
    my $self = {
	doc => $doc,
	file => $file,
    };
    return bless $self, $class;
}

#
# Return the database file info for the given database name & version.
#
sub db_files_for_database_version
{
    my($self, $name, $vers) = @_;

    my @n = $self->doc->findnodes("//database[\@name='$name']/db_version[\@version='$vers']/db_file/tax_data");
    my @ret;
    for my $n (@n)
    {
	my $h = {};
	push(@ret, $h);
	for my $attr ($n->attributes())
	{
	    $h->{$attr->getName()} = $attr->getValue();
	}
    }
    return @ret;
}

#
# Return the SEED peg.synonyms file info for the given database name & version.
#
sub get_pegsyn
{
    my($self, $name, $vers) = @_;

    my @n = $self->doc->findnodes("//database[\@name='$name']/db_version[\@version='$vers']/db_file/\@peg_synonyms");
    if (@n)
    {
	return $n[0]->getValue();
    }
}

#
# Given a fasta file, return the database and version along with a list
# of the tax files.
#
sub db_files_for_fasta_file
{
    my($self, $fasta) = @_;

    my @n = $self->doc->findnodes("//db_file[\@fasta='$fasta']/tax_data");
    my @ret;

    if (@n)
    {
	my $p1 = $n[0]->parentNode->parentNode;
	my $p2 = $p1->parentNode;
	push(@ret, $p2->getAttribute('name'), $p1->getAttribute('version'));

	my $l = [];
	push(@ret, $l);
	for my $n (@n)
	{
	    my $h = {};
	    push(@$l, $h);
	    for my $attr ($n->attributes())
	    {
		$h->{$attr->getName()} = $attr->getValue();
	    }
	}
    }
    return @ret;
}

#
# Walk the XML and return the first db of each type.
#
# If version_info is passed,  it is a has { dbname => version };
# only return database for the given dbname and version.
#
# Data is an array of hashes, each of which is
#  { name => dbname, version => version-string,
#    files => [ { attrs from db_file  record } ]
#  }
sub databases
{
    my($self, $version_info) = @_;

    my @out;

    my $root = $self->{doc}->documentElement();
    for my $dbn ($root->childNodes())
    {
	next unless $dbn->nodeType() == XML_ELEMENT_NODE;

	my $name = $dbn->getAttribute("name");

	next if ($version_info and  not exists($version_info->{$name}));

	my $flist = [];
	my $db = { files => $flist };

	for my $attr ($dbn->attributes)
	{
	    $db->{$attr->getName()} = $attr->getValue();
	}
	
	push @out, $db;

	my $db_to_use;
	for my $dbv ($dbn->childNodes())
	{
	    next unless $dbv->nodeType() == XML_ELEMENT_NODE;

	    if ($dbv->nodeName eq 'db_version')
	    {
		if ($version_info)
		{
		    if($version_info->{$name} eq $dbv->getAttribute('version'))
		    {
			$db_to_use = $dbv;
			last;
		    }
		}
		else
		{
		    $db_to_use = $dbv;
		    last;
		}
	    }
	}
	if ($db_to_use)
	{
	    my $vers = $db_to_use->getAttribute("version");
	    # print "using $name version $vers\n";
	    $db->{version} = $vers;
	    for my $dbf ($db_to_use->childNodes())
	    {
		next unless $dbf->nodeType() == XML_ELEMENT_NODE;
		my $fh = {};
		push @$flist, $fh;
		for my $attr ($dbf->attributes())
		{
		    my $k = $attr->getName();
		    my $v = $attr->getValue();
		    $fh->{$k} = $v;
		}
		     
	    }
	}
	    
    }
    return @out;
}

#
# Retrieve the set of analyses that are possible for the given
# database name and version.
#
#
sub get_analyses
{
    my($self, $dbname, $dbvers) = @_;

    my @out;
    my @n = $self->doc->findnodes("//database[\@name='$dbname']/db_version[\@version='$dbvers']/db_file/tax_data");
    for my $td (@n)
    {
	my $what = $td->getAttribute('name');
	my $info = $self->get_db_info($dbname, $what);
	push(@out, $info) if $info;
    }
    return @out;
}

#
# Pull the dbinfo information for the database name & analysis.
#
sub get_db_info
{
    my($self, $dbname, $what) = @_;


    my $res;

    my @n = $self->doc->findnodes("//database[\@name='$dbname']/db_info[\@name='$what']");
    if (@n)
    {
	my $n = $n[0];
	$res = {};
	for my $attr ($n->attributes())
	{
	    my $k = $attr->getName();
	    my $v = $attr->getValue();
	    $res->{$k} = $v;
	}
	for my $child (grep { $_->nodeType == XML_ELEMENT_NODE } $n->childNodes())
	{
	    $res->{$child->nodeName} = $child->to_literal;
	}
    }
    $res->{db_name} = $dbname;
    return $res;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3