[Bio] / FigKernelPackages / FIGV.pm Repository:
ViewVC logotype

View of /FigKernelPackages/FIGV.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.42 - (download) (as text) (annotate)
Sun Sep 16 21:51:21 2007 UTC (12 years, 6 months ago) by overbeek
Branch: MAIN
Changes since 1.41: +8 -0 lines
add code to search for mobile elements

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package FIGV;

use Carp;
use strict;
use FIG;
use FIG_Config;
use SFXlate;
use SproutFIG;
use Tracer;
use Data::Dumper;
use vars qw($AUTOLOAD);
use DB_File;
use FileHandle;

sub new {
    my($class,$org_dir,$low_level) = @_;

    my $fig;
    if ($low_level && ($low_level =~ /sprout/i))
    {
        $fig = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
    }
    elsif (! $org_dir)
    {
	$fig = new FIG;
	return $fig;
    }
    else
    {
	$fig = new FIG;
    }

    my $self         = {};
    $self->{_fig}    = $fig;
    $self->{_orgdir} = $org_dir;

    $org_dir =~ /(\d+\.\d+)$/;
    $self->{_genome} = $1;
    return bless $self, $class;
}


# return the path of the organism directory
sub organism_directory {
  return $_[0]->{_orgdir};
}

sub is_complete
{
    return 1;
}

#
# Redirect any method invocations that we don't implement out to the
# underlying FIG object.
#
sub AUTOLOAD
{
    my($self, @args) = @_;

    if (ref($self) ne "FIGV") {
	confess "BAD FIGV object passed to AUTOLOAD";
    }

    no strict 'refs';

    my $meth = $AUTOLOAD;
    $meth =~ s/.*:://;
    my $fmeth = "FIG::$meth";

    my $fig = $self->{_fig};
#    my $args = Dumper(\@args);
    if (wantarray)
    {
	my @res = $fig->$meth(@args);
#	my @res = &$fmeth($self, @args);
#	warn "FIGV invoke $meth($args) returns\n", Dumper(\@res);
	return @res;
    }
    else
    {
	my $res = $fig->$meth(@args);
#	my $res = &$fmeth($self, @args);
#	warn "FIGV invoke $meth($args) returns\n", Dumper($res);
	return $res;
    }
}

sub FIG
{
    my($self) = @_;
    return $self;
}

sub sort_fids_by_taxonomy
{
    my($self,@fids) = @_;

    return map     { $_->[0] }
           sort    { $a->[1] cmp $b->[1] }
           map     { [$_,$self->taxonomy_of($self->genome_of($_))] }
           @fids;
}

sub get_basic_statistics
{
    my($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->get_basic_statistics($genome);
    }

    #
    # Check cache.
    #

    my $cache = "$newGdir/cache.basic_statistics";
    my $fh = new FileHandle($cache);
    if ($fh)
    {
	my $stats = {};
	while (<$fh>)
	{
	    chomp;
	    my($k, $v) = split(/\t/);
	    $stats->{$k} = $v;
	}
	close($fh);
	return $stats;
    }


    my $subsystem_data = $self->get_genome_subsystem_data($genome);
  
    my %sscount = map { $_->[0] => 1 } @$subsystem_data;
    my $nss=scalar(keys(%sscount));

    my $statistics = {
	num_subsystems => $nss,
	num_contigs    => scalar($self->all_contigs($genome)),
	num_basepairs  => $self->genome_szdna($genome),
	genome_name    => $self->genus_species($genome),
	genome_domain  => $self->genome_domain($genome),
	genome_pegs    => $self->genome_pegs($genome),
	genome_rnas    => $self->genome_rnas($genome),
	genome_version => $self->genome_version($genome)
	};

    $fh = new FileHandle(">$cache");
    if ($fh)
    {
	while (my($k, $v) = each %$statistics)
	{
	    print $fh join("\t", $k, $v), "\n";
	}
	close($fh);
    }
    
    return $statistics;
}


sub get_peg_statistics {
    my ($self, $genome) = @_;
    
    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};
    
    if ($genome ne $newG)
    {
	return $fig->get_peg_statistics($genome);
    }
    
    #
    # Check cache.
    #

    my $cache = "$newGdir/cache.peg_statistics";
    my $fh = new FileHandle($cache);
    if ($fh)
    {
	my $stats = {};
	while (<$fh>)
	{
	    chomp;
	    my($k, $v) = split(/\t/);
	    $stats->{$k} = $v;
	}
	close($fh);
	return $stats;
    }


    my $subsystem_data = $self->get_genome_subsystem_data($genome);
    my $assignment_data = $self->get_genome_assignment_data($genome);
    
    my $hypo_sub = 0;
    my $hypo_nosub = 0;
    my $nothypo_sub = 0;
    my $nothypo_nosub = 0;
    my %in = map { $_->[2] => 1 } @$subsystem_data;
    my $in = keys(%in);
    
    my %sscount = map { $_->[0] => 1 } @$subsystem_data;
    
    foreach $_ (@$assignment_data)
    {
	my($peg,$func) = @$_;
	my $is_hypo = &FIG::hypo($func);
	
	if    ($is_hypo && $in{$peg})           { $hypo_sub++ }
	elsif ($is_hypo && ! $in{$peg})         { $hypo_nosub++ }
	elsif ((! $is_hypo) && (! $in{$peg}))   { $nothypo_nosub++ }
	elsif ((! $is_hypo) && $in{$peg})       { $nothypo_sub++ }
    }
    my $tot = $hypo_sub + $nothypo_sub + $hypo_nosub + $nothypo_nosub;
    
    my ($fracHS, $fracNHS, $fracHNS, $fracNHNS);
    
    if ($tot == 0) {
	$fracHS = sprintf "%.2f", 0.0;
	$fracNHS = sprintf "%.2f", 0.0;
	$fracHNS = sprintf "%.2f", 0.0;
	$fracNHNS = sprintf "%.2f", 0.0;
    } else {
	$fracHS = sprintf "%.2f", $hypo_sub / $tot * 100;
	$fracNHS = sprintf "%.2f", $nothypo_sub / $tot * 100;
	$fracHNS = sprintf "%.2f", $hypo_nosub / $tot * 100;
	$fracNHNS = sprintf "%.2f", $nothypo_nosub / $tot * 100;
    }
    
    my $statistics = {
	hypothetical_in_subsystem => $hypo_sub,
	hypothetical_not_in_subsystem => $hypo_nosub,
	non_hypothetical_in_subsystem => $nothypo_sub,
	non_hypothetical_not_in_subsystem => $nothypo_nosub,
	hypothetical_in_subsystem_percent => $fracHS,
	hypothetical_not_in_subsystem_percent => $fracHNS,
	non_hypothetical_in_subsystem_percent => $fracNHS,
	non_hypothetical_not_in_subsystem_percent => $fracNHNS
	};

    $fh = new FileHandle(">$cache");
    if ($fh)
    {
	while (my($k, $v) = each %$statistics)
	{
	    print $fh join("\t", $k, $v), "\n";
	}
	close($fh);
    }
    
    return $statistics;
}

#
# To retrieve a subsystem in FIGV, we create the subsystem as normal via $fig->get_subsystem,
# then insert the row for the virtual org dir we are processing.
#

sub get_subsystem
{
    my($self,$ssa) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my $ss = $fig->get_subsystem($ssa);
    return undef unless $ss;

    $self->load_ss_data();

    my $bindings = $self->{_ss_bindings}->{$ssa};
    my $variant = $self->{_ss_variants}->{$ssa};

#    warn "Adding virtual genome " . Dumper(\%bindings);
    $ss->add_virtual_genome($self->genus_species(), $newG, $variant, $bindings);

    return $ss;
}

sub active_subsystems
{
    my($self, $genome, $all) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome  ne $newG)
    {
	return $fig->active_subsystems($genome, $all);
    }

    $self->load_ss_data();

    my $slist = {};

    if ($self->{_ss_variants})
    {
	%{$slist} = %{$self->{_ss_variants}};
    }

    if (not $all)
    {
	for my $ss (keys %$slist)
	{
	    my $var = $slist->{$ss};
	    delete $slist->{$ss} if $var eq 0 or $var eq -1;
	}
    }
    return $slist;
}

sub peg_to_subsystems
{
    my($self, $peg) = @_;

    my $variant = $self->{_ss_variants};
    my %sub = map { $_ => 1 }
              grep { $variant->{$_} !~ /^(-1)|0$/ }
              map { $_->[0] }
              $self->peg_to_roles_in_subsystems($peg);
    return sort keys(%sub);
}

sub peg_to_roles_in_subsystems
{
    my($self,$peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($peg !~ /fig\|$newG\.peg/)
    {
	return $fig->peg_to_roles_in_subsystems($peg);
    }

    $self->load_ss_data();

    my $ret  = $self->{_ss_peg_index}->{$peg};

    return $ret ? @$ret : ();
}

sub subsystems_for_peg
{
    my($self,$peg) = @_;
    return $self->peg_to_roles_in_subsystems($peg);
}

sub genomes {
    my($self,$complete) = @_;
    my $fig = $self->{_fig};

    return ($self->{_genome},$fig->genomes($complete));
}

sub genus_species {
    my($self,$genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($genome eq $newG) && open(GENOME,"<$newGdir/GENOME"))
    {
	my $x = <GENOME>;
	close(GENOME);
	chop $x;
	return $x;
    }
    else
    {
	return $fig->genus_species($genome);
    }
}

sub get_genome_assignment_data {
    my($self,$genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome eq $newG) 
    {
	my @fnfiles = <$newGdir/proposed*functions>;
	if (@fnfiles == 0)
	{
	    @fnfiles = <$newGdir/assigned*functions>;
	}

	if (@fnfiles == 0)
	{
	    return [];
	}
	my %assign = map { ( $_ =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S.*\S)/) ? ($1 => $2) : () }
		             `cat @fnfiles`;
	return [map { [$_,$assign{$_}] } sort { &FIG::by_fig_id($a,$b) } keys(%assign)];
    }
    else
    {
	return $fig->get_genome_assignment_data($genome);
    }
}

sub org_of {
    my($self,$peg) = @_;

    if ($peg =~ /^fig\|(\d+\.\d+)\.peg\.\d+/)
    {
	return $self->genus_species($1);
    }
    return "";
}

sub get_genome_subsystem_data {
    my($self,$genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome eq $newG) 
    {
	return [map { ($_ =~ /^(\S[^\t]+\S)\t(\S[^\t]*\S)\t(\S+)/) ? [$1,$2,$3] : () }
		`cat $newGdir/Subsystems/bindings`];
    }
    else
    {
	return $fig->get_genome_subsystem_data($genome);
    }
}

sub orgname_of_orgid {
    my($self,$genome) = @_;

    return $self->genus_species($genome);
}

sub orgid_of_orgname {
    my($self,$genome_name) = @_;

    my @genomes = $self->genomes('complete');
    my $i;
    for ($i=0; ($i < @genomes) && ($genome_name ne $self->genus_species($genomes[$i])); $i++) {}
    return ($i == @genomes) ? undef : $genomes[$i];
}

sub genus_species_domain {
    my($self,$genome) = @_;

    return [$self->genus_species($genome),$self->genome_domain($genome)];
}

sub protein_subsystem_to_roles {
    my ($self,$peg,$subsystem) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (&FIG::genome_of($peg) ne $newG)
    {
	return $fig->protein_subsystem_to_roles($peg,$subsystem);
    }
    else
    {
	my @roles = map { (($_ =~ /^([^\t]+)\t([^\t]+)\t(\S+)$/) && ($1 eq $subsystem) && ($3 eq $peg)) ?
			  $2 : () } `cat $newGdir/Subsystems/bindings`;
	my %roles = map { $_ => 1 } @roles;
	return [sort keys(%roles)];
    }
}

sub contig_lengths {
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->contig_lengths($genome);
    }
    else
    {
	my $contig_lengths = $self->{_contig_len_index};

	if (!tied(%$contig_lengths))
	{
	     $contig_lengths = {};
	     if (open(CONTIGS,"<$newGdir/contigs"))
	     {
		 $/ = "\n>";
		 while (defined(my $x = <CONTIGS>))
		 {
		     chomp $x;
		     if ($x =~ />?(\S+)[^\n]*\n(.*)/s)
		     {
			 my $id = $1;
			 my $seq = $2;
			 $seq =~ s/\s//gs;
			 $contig_lengths->{$id} = length($seq);
		     }
		 }
		 close(CONTIGS);
		 $/ = "\n";
	     }
	}
	return $contig_lengths;
    }
}

sub contig_ln {
    my ($self, $genome, $contig) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->contig_ln($genome, $contig);
    }
    else
    {
	my $contig_len = $self->{_contig_len_index};

	if (tied(%$contig_len))
	{
	    return $contig_len->{$contig};
	}
	
	if (open(CONTIGS,"<$newGdir/contigs"))
	{
	    local $/ = "\n>";
	    while (defined(my $x = <CONTIGS>))
	    {
		chomp $x;
		if ($x =~ />?(\S+)[^\n]*\n(.*)/s)
		{
		    my $id = $1;
		    my $seq = $2;
		    $seq =~ s/\s//gs;
		    if ($id eq $contig)
		    {
			return length($seq);
		    }
		}
	    }
	    close(CONTIGS);
	}
    }
}

sub contigs_of
{
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->contigs_of($genome);
    }
    else
    {
	my @out;
	$self->load_contigs_index();

	my $contigs = $self->{_contigs_index};
	if (tied(%$contigs))
	{
	    return keys %$contigs;
	}
	
	if (open(CONTIGS,"<$newGdir/contigs"))
	{
	    local $/ = "\n>";
	    while (defined(my $x = <CONTIGS>))
	    {
		chomp $x;
		if ($x =~ />?(\S+)[^\n]*\n(.*)/s)
		{
		    my $id = $1;
		    push(@out, $id);
		}
	    }
	    close(CONTIGS);
	}
	return @out;
    }
}

=head3 dna_seq

usage: $seq = dna_seq($genome,@locations)

Returns the concatenated subsequences described by the list of locations.  Each location
must be of the form

    Contig_Beg_End

where Contig must be the ID of a contig for genome $genome.  If Beg > End the location
describes a stretch of the complementary strand.

=cut
#: Return Type $;
sub dna_seq {
    my($self,$genome,@locations) = @_;
 
    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->dna_seq($genome, @locations);
    }

    my $contigs = $self->{_contigs_index};
    if (!tied %$contigs)
    {
	$contigs = {};
	if (open(CONTIGS,"<$newGdir/contigs"))
	{
	    local $/ = "\n>";
	    while (defined(my $x = <CONTIGS>))
	    {
		chomp $x;
		if ($x =~ />?(\S+)[^\n]*\n(.*)/s)
		{
		    my $id = $1;
		    my $seq = $2;
		    $seq =~ s/\s//gs;
		    $contigs->{$id} = $seq;
		}
	    }
	    close(CONTIGS);
	}
    }

    my(@pieces,$loc,$contig,$beg,$end,$ln,$rdbH);

    @locations = map { split(/,/,$_) } @locations;
    @pieces = ();
    foreach $loc (@locations)
    {
        if ($loc =~ /^(\S+)_(\d+)_(\d+)$/)
        {
            ($contig,$beg,$end) = ($1,$2,$3);
	    my $seq = $contigs->{$contig};
	    
            $ln = length($seq);

            if (! $ln) {
                print STDERR "$genome/$contig: could not get length\n";
                return "";
            }

            if (&FIG::between(1,$beg,$ln) && &FIG::between(1,$end,$ln))
            {
                if ($beg < $end)
                {
                    push(@pieces, substr($seq, $beg - 1, ($end - $beg) + 1));
                }
                else
                {
                    push(@pieces, &FIG::reverse_comp(substr($seq, $end - 1, ($beg - $end) + 1)));
                }
            }
        }
    }
    return lc(join("",@pieces));
}

sub genome_szdna {
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->genome_szdna($genome);
    }
    else
    {
	my $contig_lens = $self->contig_lengths($genome);
	my $tot = 0;
	while ( my($contig,$len) = each %$contig_lens)
	{
	    $tot += $len;
	}
	return $tot;
    }
}

sub genome_version {
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->genome_version($genome);
    }
    else
    {
	return "$genome.0";
    }
}

sub genome_pegs {
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->genome_pegs($genome);
    }
    else
    {
	my @tmp = $self->all_features($genome,"peg");
	my $n = @tmp;
	return $n;
    }
}

sub genome_rnas {
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->genome_rnas($genome);
    }
    else
    {
	my @tmp = $self->all_features($genome,"rna");
	my $n = @tmp;
	return $n;
    }
}

sub genome_domain {
    my ($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->genome_domain($genome);
    }
    else
    {
	my $tax = $self->taxonomy_of($genome);
	return ($tax =~ /^([^ \t;]+)/) ? $1 : "unknown";
    }
}

sub genes_in_region {
    my($self,$genome,$contig,$beg,$end) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->genes_in_region($genome,$contig,$beg,$end);
    }
    else
    {
	&load_tbl($self);
	my $tblH = $self->{_tbl};
	my $maxV = 0;
	my $minV = 1000000000;
	my $genes = [];
	while ( my($fid,$tuple) = each %$tblH)
	{
	    if (($tuple->[0]->[0] =~ /^(\S+)_(\d+)_\d+$/) && ($1 eq $contig))
	    {
		my $beg1 = $2;
		my $last = @{$tuple->[0]} - 1;
		if (($tuple->[0]->[$last] =~ /^(\S+)_\d+_(\d+)$/) && ($1 eq $contig))
		{
		    my $end1 = $2;
		    if (&overlaps($beg,$end,$beg1,$end1))
		    {
			$minV = &FIG::min($minV,$beg1,$end1);
			$maxV = &FIG::max($maxV,$beg1,$end1);
			push(@$genes,$fid);
		    }
		}
	    }
	}
	return ($genes,$minV,$maxV);
    }
}

sub overlaps {
    my($b1,$e1,$b2,$e2) = @_;

    if ($b1 > $e1) { ($b1,$e1) = ($e1,$b1) }
    if ($b2 > $e2) { ($b2,$e2) = ($e2,$b2) }
    return &FIG::between($b1,$b2,$e1) || &FIG::between($b2,$b1,$e2);
}

sub all_contigs {
    my($self,$genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->all_contigs($genome);
    }
    else
    {
	&load_tbl($self);
	my %contigs;
	my $tblH = $self->{_tbl};
	while ( my($fid,$tuple) = each %$tblH)
	{
	    if ($tuple->[0]->[0] =~ /^(\S+)_\d+_\d+$/)
	    {
		$contigs{$1} = 1;
	    }
	}
	return keys(%contigs);
    }
}

sub all_features {
    my($self,$genome,$type) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->all_features($genome,$type);
    }
    else
    {
	&load_tbl($self);
	my $tblH = $self->{_tbl};
	return sort { &FIG::by_fig_id($a,$b) } 
	       grep { ($_ =~ /^fig\|\d+\.\d+\.([^\.]+)/) && ($1 eq $type) } 
	       keys(%$tblH);
    }
}

sub all_features_detailed_fast {
    my($self,$genome, $regmin, $regmax, $contig) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->all_features_detailed_fast($genome, $regmin, $regmax, $contig);
    }
    else
    {
	&load_tbl($self);
	my $tblH = $self->{_tbl};
	my $feat_details = [];
	while ( my($fid,$tuple) = each %$tblH)
	{
	    if ($fid =~ /^fig\|\d+\.\d+\.(\S+)\.\d+/)
	    {
		my $type = $1;
		if ($tuple->[0]->[0] =~ /^(\S+)_(\d+)_(\d+)$/)
		{
		    my($ctg, $beg, $end) = ($1, $2, $3);
		    next if (defined($contig) and $contig ne $ctg);

		    my($min,$max);
		    if ($beg < $end)
		    {
			$min = $beg;
			$max = $end;
		    }
		    else
		    {
			$min = $end;
			$max = $beg;
		    }

		    if (not defined($regmin) or not defined($regmax) or
			($min < $regmin and $regmin < $max) or ($min < $regmax and $regmax < $max) or ($min > $regmin and $max < $regmax))
		    {
			push(@$feat_details,[$fid,$tuple->[0]->[0],join(",",@{$tuple->[1]}),$type,$min,$max,$self->function_of($fid),'master','']);
		    }
		}
	    }
	}
	return $feat_details;
    }
}
	
sub compute_clusters {
    # Get the parameters.
    my ($self, $pegList, $subsystem, $distance) = @_;
    if (! defined $distance) {
        $distance = 5000;
    }

    my($peg,%by_contig);
    foreach $peg (@$pegList)
    {
        my $loc;
        if ($loc = $self->feature_location($peg))
        {
            my ($contig,$beg,$end) = &FIG::boundaries_of($loc);
            my $genome = &FIG::genome_of($peg);
            push(@{$by_contig{"$genome\t$contig"}},[($beg+$end)/2,$peg]);
        }
    }

    my @clusters = ();
    foreach my $tuple (keys(%by_contig))
    {
        my $x = $by_contig{$tuple};
        my @pegs = sort { $a->[0] <=> $b->[0] } @$x;
        while ($x = shift @pegs)
        {
            my $clust = [$x->[1]];
            while ((@pegs > 0) && (abs($pegs[0]->[0] - $x->[0]) <= $distance))
            {
                $x = shift @pegs;
                push(@$clust,$x->[1]);
            }

            if (@$clust > 1)
            {
                push(@clusters,$clust);
            }
        }
    }
    return sort { @$b <=> @$a }  @clusters;
}

sub boundaries_of {
    my($self,@args) = @_;
    
    my $fig     = $self->{_fig};
    return $fig->bundaries_of(@args);
}
    


sub feature_location {
    my($self,$fid) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($fid =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_tbl($self);
	if (my $x = $self->{_tbl}->{$fid})
	{
	    return join(",",@{$x->[0]});
	}
	else
	{
	    return undef;
	}
    }
    else
    {
	return scalar $fig->feature_location($fid);
    }
}

sub function_of {
    my($self,$fid) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($fid =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_functions($self);
	return $self->{_functions}->{$fid};
    }
    else
    {
	return scalar $fig->function_of($fid);
    }
}

sub feature_aliases {
    my($self,$fid) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my @aliases;
    if (($fid =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_tbl($self);
	if (my $x = $self->{_tbl}->{$fid})
	{
	    @aliases = @{$x->[1]};
	}
	else
	{
	    @aliases = ();
	}
    }
    else
    {
	@aliases = $fig->feature_aliases($fid);
    }
    return wantarray() ? @aliases : join(",",@aliases);
}

sub feature_annotations {
    my($self,$fid,$rawtime) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my @annotations;
    if (($fid =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_ann($self);
	if (my $x = $self->{_ann}->{$fid})
	{
	    @annotations = @{$x};
	}
	else
	{
	    @annotations = ();
	}

	if ($rawtime)
	{
	    return @annotations;
	}
	else
	{
	    return map { $_->[1] = localtime($_->[1]); $_ } @annotations;
	}
    }
    else
    {
	return $fig->feature_annotations($fid);
    }
}

sub get_translation {
    my($self,$peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($peg =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_pseq($self);
	return $self->{_pseq}->{$peg};
    }
    else
    {
	return $fig->get_translation($peg);
    }
}

sub translation_length
{
    my($self, $peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($peg =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_pseq($self);
	return length($self->{_pseq}->{$peg});
    }
    else
    {
	return $fig->translation_length($peg);
    }
}

sub translatable
{
    my($self, $peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($peg =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_pseq($self);
	return length($self->{_pseq}->{$peg}) > 0 ? 1 : 0;;
    }
    else
    {
	return $fig->translatable($peg);
    }
}

sub is_real_feature
{
    my($self, $fid) = @_;

    if ($self->is_virtual_feature($fid))
    {
	return 1;
    }
    else
    {
	return $self->{_fig}->is_real_feature($fid);
    }
    
}

sub pegs_of
{
    my($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->pegs_of($genome);
    }

    $self->load_tbl();
    return grep { /\.peg\./ } keys %{$self->{_tbl}};
}


sub rnas_of
{
    my($self, $genome) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($genome ne $newG)
    {
	return $fig->pegs_of($genome);
    }

    $self->load_tbl();
    return grep { /\.rna\./ } keys %{$self->{_tbl}};
}

sub is_virtual_feature
{
    my($self, $peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($peg =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_pseq($self);
	return 1;
    }
    else
    {
	return 0;
    }
}

####################################################
#
# Following are some MG-RAST specific features. FIGV seems a good a place as any to include them.#
#
#

=head3 taxa_to_seed_ids

Given a prefix of a taxonomy string, return the list of metagenome fids that
mapped to SEED organisms in that taxonomy.

=cut

sub taxa_to_seed_ids
{
    my($self, $tax) = @_;

    my $btree_file = "$self->{_orgdir}/taxa_summary_by_blast.btree";
    my %btree;

    my $btree_tie = tie %btree, 'DB_File', $btree_file, O_RDONLY, 0666, $DB_BTREE;

    if (!$btree_tie)
    {
	warn "Cannot tie $btree_file: $!\n";
	return undef;
    }

    my $key = $tax;
    my ($res, $value);
    my @vals;
    my %seen;

    for ($res = $btree_tie->seq($key, $value, R_CURSOR);
	 $res == 0 and $key =~ /^$tax/;
	 $res = $btree_tie->seq($key, $value, R_NEXT))
    {
	print "$key\n";
	push(@vals, map { [$key, $_] } grep { not $seen{$_}++ } split(/$;/, $value));
    }
    return @vals;
}

sub load_pseq {
    my($self) = @_;

    if ($self->{_pseq}) { return };

    my $newGdir = $self->{_orgdir};
    my $pseq     = {};
    if (open(FASTA,"<$newGdir/Features/peg/fasta"))
    {
	local $/ = "\n>";
	my $x;
	while (defined($x = <FASTA>))
	{
	    chomp $x;
	    if ($x =~ />?(\S+)[^\n]*\n(.*)/s)
	    {
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$pseq->{$peg} = $seq;
	    }
	}
	close(FASTA);
    }
    $self->{_pseq} = $pseq;
}

sub load_ss_data
{
    my($self) = @_;

    return if defined($self->{_ss_bindings});

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    open(S, "<$newGdir/Subsystems/bindings") or die "Cannot open $newGdir/Subsystems/bindings: $!";

    my $peg_index;
    my $bindings;
    my $genome_index;
    my %seen;
    while (<S>)
    {
	chomp;
	my($sname, $role, $peg) = split(/\t/);

	my($genome) = ($peg =~ /fig\|(\d+\.\d+)/);

	# my $genome = &FIG::genome_of($peg);
	
	push(@{$bindings->{$sname}->{$role}}, $peg);
	push(@{$peg_index->{$peg}}, [$sname, $role]);

	if (!$seen{$genome, $sname, $role})
	{
	    push(@{$genome_index->{$genome}}, [$sname, $role]);
	}
	    
    }
    close(S);

    open(S, "<$newGdir/Subsystems/subsystems") or die "Cannot open $newGdir/Subsystems/subsystems: $!";
    my $variant;
    while (<S>)
    {
	chomp;
	my($sname, $v) = split(/\t/);
	$variant->{$sname} = $v;
    }
    close(S);

    $self->{_ss_bindings} = $bindings;
    $self->{_ss_variants} = $variant;
    $self->{_ss_peg_index} = $peg_index;
    $self->{_ss_genome_index} = $genome_index;
}

sub load_tbl {
    my($self) = @_;

    if ($self->{_tbl}) { return };

    my $newGdir = $self->{_orgdir};
    my $tbl     = {};
    foreach my $x (`cat $newGdir/Features/*/tbl`)
    {
	if ($x =~ /^(\S+)\t(\S+)(\t(\S.*\S))?/)
	{
	    my $fid = $1;
	    my $loc = [split(/,/,$2)];
	    my $aliases = $4 ? [split(/\t/,$4)] : [];
	    $tbl->{$fid} = [$loc,$aliases];
	}
    }
    $self->{_tbl} = $tbl;
}

sub load_functions {
    my($self) = @_;

    if ($self->{_functions}) { return };

    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my $functions  = {};
    foreach my $x (`cat $newGdir/*functions`)
    {
	if (($x =~ /^(fig\|(\d+\.\d+)\.\S+)\t(\S[^\t]*\S)/) && ($2 eq $newG))
	{
	    $functions->{$1} = $3;
	}
    }
    $self->{_functions} = $functions;
}


sub bbhs
{
    my($self,$peg,$cutoff) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    $self->load_bbh_index();

    $cutoff = 1.0e-10 unless defined($cutoff);

    my $tie = $self->{_bbh_tie};

    my @bbhs = $tie->get_dup($peg);

    # @bbhs now a list of comma-separated tuples (id2, score, bitscore)
#    print Dumper(\@bbhs);

    @bbhs = grep { $_->[1] < $cutoff } map { [split(/,/)] } @bbhs;

    if (not ($peg =~ /^fig\|(\d+\.\d+)/ && ($1 eq $newG)))
    {
	#
	# If this isn't one of our pegs, we retrieve the bbhs from the underlying
	# SEED and merge in the bbhs that we have here.
	#

	my @fbbhs = $fig->bbhs($peg, $cutoff);
#	print Dumper(\@fbbhs);
	push(@bbhs, @fbbhs);
    }
    return sort { $a->[1] <=> $b->[1] } @bbhs;
}    

sub sims
{
    my($self,$pegarg,$max,$maxP,$select, $max_expand, $filters) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};
    $max     = $max ? $max : 10000;
    $maxP    = $maxP ? $maxP : 1.0e-5;
    $select     = $select ? $select : "all";

    my $prefix = "_sims";
    my $flip_prefix = "_flips";

    if ($select eq 'raw')
    {
	$prefix = "_raw_sims";
	$flip_prefix = "_raw_flips";
    }

    #
    # Partition pegs into one set that is part of this organism
    # and another set that is not.
    #
    my @pegs = ref($pegarg) ? @$pegarg : ($pegarg);
    my(@part, @not_part, %part);
    my %sims;
    
    for my $peg  (@pegs)
    {
	if ($peg =~ /^fig\|(\d+\.\d+)/ and $1 eq $newG)
	{
	    push(@part, $peg);
	    $part{$peg}++;
	}
	else
	{
	    push(@not_part, $peg);
	    $sims{$peg} = [];
	}
    }

    #
    # Retrieve a batch of the sims for the not-part pegs, and partition
    # into %sims.
    #
    for my $sim ($fig->sims(\@not_part, $max, $maxP, $select, $max_expand, $filters))
    {
	push(@{$sims{$sim->id1}}, $sim);
    }

    my @out;
    my $start_len;

    for my $peg (@pegs)
    {
	$start_len = @out;
	if (not $part{$peg})
	{
	    my @flips = $self->retrieve_sims($peg, 	$flip_prefix, $maxP, $select);
	    
	    @flips = sort { $b->bsc <=> $a->bsc } @flips;
	    
	    # my @old = $fig->sims($peg,$max,$maxP,$select);
	    
	    my @old = sort { $b->bsc <=> $a->bsc } @{$sims{$peg}};

	    # my @merged = ();
	    my $i1 = 0;
	    my $i2 = 0;
	    while (($i1 < @flips) || ($i2 < @old))
	    {
		if (($i1 == @flips) || (($i2 < @old) && ($flips[$i1]->[11] < $old[$i2]->[11])))
		{
		    # push(@merged,$old[$i2]);
		    push(@out,$old[$i2]);
		    $i2++;
		}
		else
		{
		    # push(@merged,$flips[$i1]);
		    push(@out,$flips[$i1]);
		    $i1++;
		}
	    }
	}
	else
	{
	    my @sims = $self->retrieve_sims($peg, $prefix, $maxP, $select);
	    push(@out, @sims);
	}

	if (@out - $start_len > $max)
	{
	    $#out = $start_len + $max - 1;
	}
    }
    
    return @out;
}

sub retrieve_sims
{
    my($self, $peg, $prefix, $maxP, $select) = @_;
    
    $self->load_sims_index();

    my $info = $self->{"${prefix}_index"}->{$peg};

    if ($info !~ /^(\d+),(\d+)$/)
    {
	return;
    }
    my($seek, $len) = ($1, $2);

    my $sims_txt = &FIG::read_block($self->{"${prefix}_fh"}, $seek, $len);

    my @sims = map { bless $_, 'Sim' } grep { $_->[10] <= $maxP } map { [split(/\t/)] } @$sims_txt;

    if ($select =~ /^figx?$/)
    {
	@sims = grep { $_->[1] =~ /^fig/ } @sims;
    }

    return @sims;
}

sub sims_old {
    my($self,$peg,$max,$maxP,$select) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};
    $max     = $max ? $max : 10000;
    $maxP    = $maxP ? $maxP : 1.0e-5;
    $select     = $select ? $select : "all";

    if (($peg =~ /^fig\|(\d+\.\d+)/) && ($1 eq $newG))
    {
	&load_sims($self);
	my @sims = ();
	my $raw_sims = $self->{_sims}->{$peg};
	if ($raw_sims)
	{
	    foreach my $sim ( grep { $_->[10] <= $maxP } @$raw_sims )
	    {
		my $id2 = $sim->id2;
		my $id1 = $sim->id1;
		my @relevant = ();

		my @maps_to = $fig->mapped_prot_ids( $id2 );
		my $ref_len = $maps_to[0]->[1];

		@maps_to = grep { $_->[0] !~ /^xxx\d+/ } @maps_to;
		
		if ( $select =~ /^figx?$/ )          # Only fig
		{
		    @relevant = grep { $_->[0] =~ /^fig/ } @maps_to;
		}
		else                                 # All
		{
		    @relevant = @maps_to;
		}

		my $seen = {};
		foreach my $x ( @relevant )
		{
		    my ( $x_id, $x_ln ) = @$x;
		    
		    next if $seen->{$x_id};
		    $seen->{$x_id} = 1;

		    my $delta2  = $ref_len - $x_ln;   # Coordinate shift
		    my $sim1    = [ @$sim ];                  # Make a copy
		    $sim1->[1]  = $x_id;
		    $sim1->[8] -= $delta2;
		    $sim1->[9] -= $delta2;
		    bless( $sim1, "Sim" );
		    push( @sims, $sim1 );
		}
	    }
	}

	if (@sims > $max) { $#sims = $max-1; }
	return @sims;
    }
    else
    {
	return $fig->sims($peg,$max,$maxP,$select);
    }
}


sub coupled_to
{
    my($self,$peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($peg =~ /^fig\|$newG\.peg/)
    {
	
	$self->load_coupling_index();
	
	my $tie = $self->{_pch_tie};

	if (not defined($tie))
	{
	    return;
	}
	
	my @dat = $tie->get_dup($peg);
	
	return map { [split(/$;/, $_)] } @dat;
    }
    else
    {
	return $fig->coupled_to($peg);
    }

}

sub coupling_evidence
{
    my($self,$peg1, $peg2) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($peg1 =~ /^fig\|$newG\.peg/)
    {
	$self->load_coupling_index();
	
	my $tie = $self->{_pch_ev_tie};
	
	if (not defined($tie))
	{
	    return;
	}

	my @dat = $tie->get_dup("$peg1$;$peg2");

	my @a;
	return map { @a = split(/$;/, $_); [@a[0,1,4]] } @dat;
    }
    else
    {
	return $fig->coupling_evidence($peg1, $peg2);
    }

}

sub coupling_and_evidence
{
    my($self,$peg1) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($peg1 =~ /^fig\|$newG\.peg/)
    {
	$self->load_coupling_index();
	
	my $tie = $self->{_pch_tie};
	my $evtie = $self->{_pch_ev_tie};

	if (not(defined($tie) and defined($evtie)))
	{
	    return;
	}

	my @out;
	my @coupled_to = $tie->get_dup($peg1);
	for my $ent (@coupled_to)
	{
	    my ($peg2, $score) = split(/$;/, $ent);
	    
	    my @ev = $evtie->get_dup("$peg1$;$peg2");

	    my $l = [];
	    for my $event (@ev)
	    {
		my($peg3, $peg4, $iden3, $iden4, $rep) = split(/$;/, $event);
		push(@$l, [$peg3, $peg4]);
	    }
	    push(@out, [$score, $peg2, $l]);
	}
	return @out;
    }
    else
    {
	return $fig->coupling_and_evidence($peg1);
    }

}

sub load_coupling_index
{
    my($self) = @_;

    return if defined($self->{_pch_index});

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my $pch_btree = "$newGdir/pchs.btree";
    my $ev_btree = "$newGdir/pchs.evidence.btree";

    my $pch_index = {};
    my $ev_index = {};

    my $tied = tie %$pch_index, 'DB_File', $pch_btree, O_RDONLY, 0666, $DB_BTREE;
    my $evtied = tie %$ev_index, 'DB_File', $ev_btree, O_RDONLY, 0666, $DB_BTREE;

    #
    # Set these even if failed so we don't keep trying to open and failing.
    #
    $self->{_pch_index} = $pch_index;
    $self->{_pch_tie} = $tied;
    $self->{_pch_ev_index} = $ev_index;
    $self->{_pch_ev_tie} = $evtied;
    
    if (not $tied)
    {
	warn "Cannot tie pch index $pch_btree: $!\n";
    }
}

sub load_bbh_index
{
    my($self) = @_;

    return if defined($self->{_bbh_index});

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my $bbh_file = "$newGdir/bbhs";
    my $bbh_index_file = "$bbh_file.index";
    my $bbh_index = {};

    my $tied = tie %$bbh_index, 'DB_File', $bbh_index_file, O_RDONLY, 0666, $DB_BTREE;

    #
    # Set these even if failed so we don't keep trying to open and failing.
    #
    $self->{_bbh_index} = $bbh_index;
    $self->{_bbh_tie} = $tied;
    
    if (not $tied)
    {
	warn "Cannot tie bbh index $bbh_index_file: $!\n";
    }
}

sub load_contigs_index
{
    my($self) = @_;

    return if defined($self->{_contigs_index});

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my $contig_index_file = "$newGdir/contigs.btree";
    my $contig_index = {};
    my $contig_len_index_file = "$newGdir/contig_len.btree";
    my $contig_len_index = {};

    my $tied = tie %$contig_index, 'DB_File', $contig_index_file, O_RDONLY, 0666, $DB_BTREE;
    if (not $tied)
    {
	warn "Cannot tie contig index $contig_index_file: $!\n";
    }

    my $ltied = tie %$contig_len_index, 'DB_File', $contig_len_index_file, O_RDONLY, 0666, $DB_BTREE;
    if (not $ltied)
    {
	warn "Cannot tie contig length index $contig_len_index_file: $!\n";
    }

    #
    # Set these even if failed so we don't keep trying to open and failing.
    #
    $self->{_contigs_index} = $contig_index;
    $self->{_contigs_tie} = $tied;
    $self->{_contig_len_index} = $contig_len_index;
    $self->{_contig_len_tie} = $ltied;
    
}

sub load_sims_index
{
    my($self) = @_;

    return if defined($self->{_sims_index});

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    my $sims_file = "$newGdir/expanded_similarities";
    my $sims_index_file = "$sims_file.index";
    my $sims_index = {};

    my $flips_file = "$newGdir/expanded_similarities.flips";
    my $flips_index_file = "$flips_file.index";
    my $flips_index = {};

    my $raw_sims_file = "$newGdir/similarities";
    my $raw_sims_index_file = "$raw_sims_file.index";
    my $raw_sims_index = {};

    my $raw_flips_file = "$newGdir/similarities.flips";
    my $raw_flips_index_file = "$raw_flips_file.index";
    my $raw_flips_index = {};

    $self->open_sims($sims_index, $sims_file, $sims_index_file, "_sims");
    $self->open_sims($flips_index, $flips_file, $flips_index_file, "_flips");
    $self->open_sims($raw_sims_index, $raw_sims_file, $raw_sims_index_file, "_raw_sims");
    $self->open_sims($raw_flips_index, $raw_flips_file, $raw_flips_index_file, "_raw_flips");
}

#
# Open a sims file, tie it to a hash, and store the info into the $self obj.
#
sub open_sims
{
    my($self, $hash, $sims_file, $index_file, $prefix) = @_;

    my $tied = tie %$hash, 'DB_File', $index_file, O_RDONLY, 0666, $DB_BTREE;

    #
    # Set these even if failed so we don't keep trying to open and failing.
    #
    $self->{"${prefix}_index"} = $hash;
    $self->{"${prefix}_tie"} = $tied;
    
    if (not $tied)
    {
	warn "Cannot tie sims index $index_file: $!\n";
    }

    #
    # open the sims file as well.
    #

    $self->{"${prefix}_fh"} = new FileHandle("<$sims_file");

    if (!$self->{"${prefix}_fh"})
    {
	warn "Cannot open sims file $sims_file: $!\n";
    }
}

sub load_sims {
    my($self) = @_;

    if ($self->{_sims}) { return };

    my $newGdir = $self->{_orgdir};
    
    my $sims     = {};
    foreach my $x (`cat $newGdir/similarities`)
    {
	chop; $x;
	if ($x =~ /^(\S+)/)
	{
	    push(@{$sims->{$1}},bless([split(/\t/,$x)],'Sim'));
	}
    }
    $self->{_sims} = $sims;
}

sub get_attributes {
    my($self,$peg) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if (($peg =~ /^fig\|(\d+\.\d+)\.peg\.\d+$/) && ($1 eq $newG))
    {
	&load_attr($self);
	if (my $x = $self->{_attr}->{$peg})
	{
	    return @$x;
	}
	else
	{
	    return ();
	}
    }
    else
    {
	return $fig->get_attributes($peg);
    }
}

sub load_attr {
    my($self) = @_;

    if ($self->{_attr}) { return };

    my $newGdir = $self->{_orgdir};
    my $attr     = {};
    foreach my $x (`cat $newGdir/evidence.codes`)
    {
	if ($x =~ /^(\S+)\t(\S+)/)
	{
	    push(@{$attr->{$1}},[$1,"evidence_code",$2,""]);
	}
    }
    $self->{_attr} = $attr;
}
    
sub load_ann {
    my($self) = @_;

    if ($self->{_ann}) { return };

    my $newGdir = $self->{_orgdir};
    my $ann     = {};
    if (open(ANN,"<$newGdir/annotations"))
    {
	$/ = "\n//\n";
	while (defined(my $x = <ANN>))
	{
	    chomp $x;
	    if ($x =~ /^(\S+)\n([^\n]+)\n([^\n]+)\n(.*)/s)
	    {
		push(@{$ann->{$1}},[$1,$2,$3,"$4\n"]);
	    }
	}
	$/ = "\n";
	close(ANN);
    }
    $self->{_ann} = $ann;
}

sub taxonomy_of {
    my($self,$genome) = @_;
    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};

    if ($newG ne $genome)
    {
	return $fig->taxonomy_of($genome);
    }

    my $tax;
    if (open(TAX,"<$newGdir/TAXONOMY") && ($tax = <TAX>))
    {
	chop $tax;
	return $tax;
    }
    else
    {
	return "unknown";
    }
}

sub build_tree_of_complete {
    my($self,$min_for_label) = @_;
    return $self->build_tree_of_all($min_for_label, "complete");
}

sub build_tree_of_all {
    my($self, $min_for_label, $complete)=@_;
    my(@last,@tax,$i,$prefix,$lev,$genome,$tax);

    $min_for_label = $min_for_label ? $min_for_label : 10;
    open(TMP,">/tmp/tree$$") || die "could not open /tmp/tree$$";
    print TMP "1. root\n";

    @last = ();


    foreach $genome (grep { ! $self->is_environmental($_) } $self->sort_genomes_by_taxonomy($self->genomes($complete)))
    {
        $tax = $self->taxonomy_of($genome);
        @tax = split(/\s*;\s*/,$tax);
        push(@tax,$genome);
        for ($i=0; ((@last > $i) && (@tax > $i) && ($last[$i] eq $tax[$i])); $i++) {}
        while ($i < @tax)
        {
            $lev = $i+2;
            $prefix = " " x (4 * ($lev-1));
            print TMP "$prefix$lev\. $tax[$i]\n";
            $i++;
        }
        @last = @tax;
    }
    close(TMP);
    my $tree = &tree_utilities::build_tree_from_outline("/tmp/tree$$");
    $tree->[0] = 'All';
    &FIG::limit_labels($tree,$min_for_label);
    unlink("/tmp/tree$$");
    return ($tree,&tree_utilities::tips_of_tree($tree));
}

sub sort_genomes_by_taxonomy {
    my($self,@genomes) = @_;

    return map     { $_->[0] }
           sort    { $a->[1] cmp $b->[1] }
           map     { [$_,$self->taxonomy_of($_)] }
           @genomes;
}
    
sub taxonomic_groups_of_complete {
    my($self,$min_for_labels) = @_;

    my($tree,undef) = $self->build_tree_of_complete($min_for_labels);
    return &FIG::taxonomic_groups($tree);
}

=head2 Search Database

Searches the database for objects that match the query string in some way.

Returns a list of results if the query is ambiguous or an unique identifier
otherwise.

=cut

sub search_database {
    # get parameters
    my ($self, $query, $options) = @_;

    my $fig     = $self->{_fig};
    my $newG    = $self->{_genome};
    my $newGdir = $self->{_orgdir};
    
    #
    # Check for local ids and genome.
    #

    
    if ($query eq $newG or lc($query) eq lc($self->genus_species($newG)))
    {
	return { type => 'organism', result => $newG };
    }

    if ($query =~ /^fig\|(\d+\.\d+)\.peg/ and $1 eq $newG)
    {
	return { type => 'feature', result => $query };
    }

    #
    # Match on functions
    #

    &load_functions($self);

    my @protlist;
    my $fn = $self->{_functions};
    for my $id (keys %$fn)
    {
	if ($fn->{$id} =~ /$query/i)
	{
	    push @protlist, [$id, $fn->{$id}, $newG];
	}
    }

    #
    # Pull FIG lookups.
    #

    my $res = $fig->search_database($query, $options);

    #
    # And integrate our protein list
    #

    my $res_prot;
    if (ref($res) eq 'ARRAY')
    {
	for my $ent (@$res)
	{
	    if ($ent->{type} eq 'proteins')
	    {
		$res_prot = $ent->{result};
	    }
	}
	if (!$res_prot)
	{
	    $res_prot = {type => 'proteins', result => \@protlist}
	}
	else
	{
	    push(@$res_prot, @protlist);
	}
    }
    
    return $res;
	
}


=head3 model_directory

C<< FIG->model_directory($organism); >>

Returns the model directory of an organism.

=over 4

=item $organism

The seed-taxonomy id of the organism, e.g. 83333.1. If you pass the
string 'All', you will get the directory for the global model.

=back

=cut

sub model_directory {
  my ($self, $organism) = @_;

  my $directory;

  if ($organism eq $self->{_genome})
  {
      $directory = $self->{_orgdir} . "/Models";
      $directory .= "/$organism" if defined($organism);
  }
  elsif(!defined $organism && defined $self->{_genome})
  {
      $directory = $self->{_orgdir} . "/Models";
  }
  else {
      $directory = $self->{_fig}->model_directory($organism);
  }

  return $directory;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3