[Bio] / FigKernelPackages / FIGM.pm Repository:
ViewVC logotype

View of /FigKernelPackages/FIGM.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu Jun 26 21:11:11 2008 UTC (11 years, 8 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_0625, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_08_07
Add annotation support to FIGV. Add FIGM.

# -*- perl -*-
#########################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#########################################################################

package FIGM;

use FIGV;
use Carp;
use strict;
use FIG;
use FIG_Config;
use SFXlate;
use SproutFIG;
use Tracer;
use Data::Dumper;
use vars qw($AUTOLOAD);
use DB_File;
use FileHandle;

#
# Create a new FIGM.
# Since creating a FIGV is only a data structure manipulation
# we go ahead and create one for each orgdir listed. We need
# to poke at the orgdir to find the genome id that it represents
# anyway.
#
sub new {
    my($class, $fig, @org_dirs) = @_;

    if (!ref($fig))
    {
	$fig = new FIG;
    }

    my $self         = {};
    $self->{_fig}    = $fig;
    $self->{_org_dirs} =  [@org_dirs];
    $self->{_figv_cache} = {};

    bless $self, $class;

    for my $dir (@org_dirs)
    {
	my $figv = new FIGV($dir, undef, $fig);
	if ($figv)
	{
	    $self->{_figv_cache}->{$figv->genome_id()} = $figv;
	}
    }

    return $self;
}

sub is_complete
{
    return 1;
}

#
# Redirect any method invocations that we don't implement out to the
# underlying FIG object.
#
sub AUTOLOAD
{
    my($self, @args) = @_;

    if (ref($self) ne "FIGM") {
	confess "BAD FIGM object passed to AUTOLOAD";
    }

    no strict 'refs';

    my $meth = $AUTOLOAD;
    $meth =~ s/.*:://;
    my $fmeth = "FIG::$meth";

    my $fig = $self->{_fig};
#    my $args = Dumper(\@args);
    if (wantarray)
    {
	my @res = $fig->$meth(@args);
#	warn "FIGV invoke $meth($args) returns\n", Dumper(\@res);
	return @res;
    }
    else
    {
	my $res = $fig->$meth(@args);
#	warn "FIGV invoke $meth($args) returns\n", Dumper($res);
	return $res;
    }
}

sub FIG
{
    my($self) = @_;
    return $self;
}

sub find_figv
{
    my($self, $genome) = @_;

    my $figv = $self->{_figv_cache}->{$genome};
    if (ref($figv))
    {
	return $figv;
    }
    else
    {
	return $self->{_fig};
    }
}

sub find_figv_for_fid
{
    my($self, $fid) = @_;
    if ($fid =~ /^fig\|(\d+.\d+)\./)
    {
	return $self->find_figv($1);
    }
    else
    {
	return $self->{_fig};
    }
}

sub sort_fids_by_taxonomy
{
    my($self,@fids) = @_;

    return map     { $_->[0] }
           sort    { $a->[1] cmp $b->[1] }
           map     { [$_,$self->taxonomy_of($self->genome_of($_))] }
           @fids;
}

sub genomes
{
    my($self, $complete) = @_;

    my $fig = $self->{_fig};
    my @base = $fig->genomes($complete);

    return @base, keys %{$self->{_figv_cache}};
}

sub get_basic_statistics
{
    my($self, $genome) = @_;

    my $figv = $self->find_figv($genome);

    return $figv->get_basic_statistics($genome);
}


sub get_peg_statistics {
    my ($self, $genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->get_peg_statistics($genome);
}

#
# To retrieve a subsystem in FIGV, we create the subsystem as normal via $fig->get_subsystem,
# then insert the row for the virtual org dir we are processing.
#
# The FIGM solution needs work.
#

sub get_subsystem
{
    my($self,$ssa) = @_;

    my $fig     = $self->{_fig};

    my $ss = $fig->get_subsystem($ssa);
    return $ss;
}

sub active_subsystems
{
    my($self, $genome, $all) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->active_subsystems($genome, $all);
}

sub genus_species {
    my($self,$genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->genus_species($genome);
}

sub get_genome_assignment_data {
    my($self,$genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->get_genome_assignment_data($genome);
}

sub org_of {
    my($self,$peg) = @_;

    if ($peg =~ /^fig\|(\d+\.\d+)\.peg\.\d+/)
    {
	return $self->genus_species($1);
    }
    return "";
}

sub get_genome_subsystem_data {
    my($self,$genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->get_genome_subsystem_data($genome);
}

sub get_genome_subsystem_count
{
    my($self,$genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->get_genome_subsystem_count($genome);
}

sub orgname_of_orgid {
    my($self,$genome) = @_;

    return $self->genus_species($genome);
}

sub orgid_of_orgname {
    my($self,$genome_name) = @_;

    my @genomes = $self->genomes('complete');
    my $i;
    for ($i=0; ($i < @genomes) && ($genome_name ne $self->genus_species($genomes[$i])); $i++) {}
    return ($i == @genomes) ? undef : $genomes[$i];
}

sub genus_species_domain {
    my($self,$genome) = @_;

    return [$self->genus_species($genome),$self->genome_domain($genome)];
}

sub protein_subsystem_to_roles {
die;
}

sub contig_lengths {
    my ($self, $genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->contig_lengths($genome);
}

sub contig_ln {
    my ($self, $genome, $contig) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->contig_ln($genome, $contig);
}

sub contigs_of
{
    my ($self, $genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->contigs_of($genome);
}

=head3 dna_seq

usage: $seq = dna_seq($genome,@locations)

Returns the concatenated subsequences described by the list of locations.  Each location
must be of the form

    Contig_Beg_End

where Contig must be the ID of a contig for genome $genome.  If Beg > End the location
describes a stretch of the complementary strand.

=cut
#: Return Type $;
sub dna_seq {
    my($self,$genome,@locations) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->dna_seq($genome, @locations);
}

sub genome_szdna {
    my ($self, $genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->genome_szdna($genome);
}

sub genome_version {
    my ($self, $genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->genome_version($genome);
}

sub genome_pegs {
    my ($self, $genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->genome_pegs($genome);
}

sub genome_rnas {
    my ($self, $genome) = @_;

    my $figv = $self->find_figv($genome);
    return $figv->genome_rnas($genome);
}

sub genome_domain {
    my ($self, $genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->genome_domain($genome);
}

sub genes_in_region {
    my($self,$genome,$contig,$beg,$end) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->genes_in_region($genome,$contig,$beg,$end);
}

sub overlaps {
    my($b1,$e1,$b2,$e2) = @_;

    if ($b1 > $e1) { ($b1,$e1) = ($e1,$b1) }
    if ($b2 > $e2) { ($b2,$e2) = ($e2,$b2) }
    return &FIG::between($b1,$b2,$e1) || &FIG::between($b2,$b1,$e2);
}

sub all_contigs {
    my($self,$genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->all_contigs($genome);
}

sub all_features {
    my($self,$genome,$type) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->all_features($genome,$type);
}

sub all_features_detailed_fast {
    my($self,$genome, $regmin, $regmax, $contig) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->all_features_detailed_fast($genome, $regmin, $regmax, $contig);
}

sub compute_clusters {
    # Get the parameters.
    my ($self, $pegList, $subsystem, $distance) = @_;
    if (! defined $distance) {
        $distance = 5000;
    }

    my($peg,%by_contig);
    foreach $peg (@$pegList)
    {
        my $loc;
        if ($loc = $self->feature_location($peg))
        {
            my ($contig,$beg,$end) = &FIG::boundaries_of($loc);
            my $genome = &FIG::genome_of($peg);
            push(@{$by_contig{"$genome\t$contig"}},[($beg+$end)/2,$peg]);
        }
    }

    my @clusters = ();
    foreach my $tuple (keys(%by_contig))
    {
        my $x = $by_contig{$tuple};
        my @pegs = sort { $a->[0] <=> $b->[0] } @$x;
        while ($x = shift @pegs)
        {
            my $clust = [$x->[1]];
            while ((@pegs > 0) && (abs($pegs[0]->[0] - $x->[0]) <= $distance))
            {
                $x = shift @pegs;
                push(@$clust,$x->[1]);
            }

            if (@$clust > 1)
            {
                push(@clusters,$clust);
            }
        }
    }
    return sort { @$b <=> @$a }  @clusters;
}

sub boundaries_of {
    my($self,@args) = @_;

    my $fig     = $self->{_fig};
    return $fig->boundaries_of(@args);
}

sub feature_location {
    my($self,$fid) = @_;

    my $figv = $self->find_figv_for_fid($fid);
    return scalar $figv->feature_location($fid);
}

sub function_of {
    my($self,$fid) = @_;

    my $fig     = $self->{_fig};

    my $figv = $self->find_figv_for_fid($fid);
    return $figv->function_of($fid);
}

sub assign_function
{
    my($self,$fid, $user, $function, $confidence) = @_;

    my $fig     = $self->{_fig};

    my $figv = $self->find_figv_for_fid($fid);
    return $figv->assign_function($fid, $user, $function, $confidence);
}

sub add_annotation
{
    my($self, $feature_id,$user,$annotation, $time_made) = @_;

    my $fig     = $self->{_fig};

    my $figv = $self->find_figv_for_fid($feature_id);
    return $figv->add_annotation($feature_id,$user,$annotation, $time_made);
}

sub feature_aliases {
    my($self,$fid) = @_;
    my $figv = $self->find_figv_for_fid($fid);
    return $figv->feature_aliases($fid);
}

sub feature_annotations {
    my($self,$fid,$rawtime) = @_;
    my $figv = $self->find_figv_for_fid($fid);
    return $figv->feature_annotations($fid);
}

sub get_translation {
    my($self,$peg) = @_;
    my $figv = $self->find_figv_for_fid($peg);
    return $figv->get_translation($peg);
}

sub translation_length
{
    my($self, $peg) = @_;
    my $figv = $self->find_figv_for_fid($peg);
    return $figv->translation_length($peg);
}

sub translatable
{
    my($self, $peg) = @_;
    my $figv = $self->find_figv_for_fid($peg);
    return $figv->translatable($peg);
}

sub pick_gene_boundaries {
    return &FIG::pick_gene_boundaries(@_);
}

sub call_start {
    return &FIG::call_start(@_);
}

sub is_real_feature
{
    my($self, $fid) = @_;

    my $figv = $self->find_figv_for_fid($fid);
    return $figv->is_real_feature($fid);

}

sub pegs_of
{
    my($self, $genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->pegs_of($genome);
}


sub rnas_of
{
    my($self, $genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->pegs_of($genome);
}

sub is_virtual_feature
{
    my($self, $peg) = @_;
    my $figv = $self->find_figv_for_fid($peg);

    return ref($figv) =~ /FIGV/ ? 1 : 0;
}

sub bbhs
{
    my($self,$peg,$cutoff) = @_;

    my $figv = $self->find_figv_for_fid($peg);

    return $figv->bbhs($peg, $cutoff);
}

sub sims
{
    my($self,$pegarg,$max,$maxP,$select, $max_expand, $filters) = @_;

    my $figv = $self->find_figv_for_fid($pegarg);
    return $figv->sims($pegarg, $max, $maxP, $select, $max_expand, $filters);
}

sub coupled_to
{
    my($self,$peg) = @_;

    my $figv = $self->find_figv_for_fid($peg);
    return $figv->coupled_to($peg);
}

sub coupling_evidence
{
    my($self,$peg1, $peg2) = @_;
    my $figv = $self->find_figv_for_fid($peg1);
    return $figv->coupling_evidence($peg1, $peg2);
}

sub coupling_and_evidence
{
    my($self,$peg1) = @_;
    my $figv = $self->find_figv_for_fid($peg1);
    return $figv->coupling_and_evidence($peg1);

}

sub in_pch_pin_with
{
    my($self, $peg1, $diverse) = @_;

    my @all = $self->in_pch_pin_with_and_evidence($peg1);

    if ($diverse)
    {
	return map { $_->[0] } grep { $_->[1] == 1 } @all;
    }
    else
    {
	return map { $_->[0] } @all;
    }
}

sub in_pch_pin_with_and_evidence
{
    my($self,$peg1) = @_;

    my $figv = $self->find_figv_for_fid($peg1);
    return $figv->in_pch_pin_with_and_evidence($peg1);
}

sub get_attributes {
    my($self, $id, $attr) = @_;

    my $fig     = $self->{_fig};

    if ($id =~ /^fig\|(\d+\.\d+)\.peg\.\d+$/ or $id =~ /^(\d+\.\d+)/)
    {
	my $figv = $self->find_figv($1);
	return $figv->get_attributes($id, $attr);
    }
    else
    {
	return $fig->get_attributes($id, $attr);
    }
}

sub taxonomy_of {
    my($self,$genome) = @_;
    my $figv = $self->find_figv($genome);
    return $figv->taxonomy_of($genome);
}

sub build_tree_of_complete {
    my($self,$min_for_label) = @_;
    return $self->build_tree_of_all($min_for_label, "complete");
}

sub build_tree_of_all {
    my($self, $min_for_label, $complete)=@_;
    my(@last,@tax,$i,$prefix,$lev,$genome,$tax);

    $min_for_label = $min_for_label ? $min_for_label : 10;
    open(TMP,">/tmp/tree$$") || die "could not open /tmp/tree$$";
    print TMP "1. root\n";

    @last = ();


    foreach $genome (grep { ! $self->is_environmental($_) } $self->sort_genomes_by_taxonomy($self->genomes($complete)))
    {
        $tax = $self->taxonomy_of($genome);
        @tax = split(/\s*;\s*/,$tax);
        push(@tax,$genome);
        for ($i=0; ((@last > $i) && (@tax > $i) && ($last[$i] eq $tax[$i])); $i++) {}
        while ($i < @tax)
        {
            $lev = $i+2;
            $prefix = " " x (4 * ($lev-1));
            print TMP "$prefix$lev\. $tax[$i]\n";
            $i++;
        }
        @last = @tax;
    }
    close(TMP);
    my $tree = &tree_utilities::build_tree_from_outline("/tmp/tree$$");
    $tree->[0] = 'All';
    &FIG::limit_labels($tree,$min_for_label);
    unlink("/tmp/tree$$");
    return ($tree,&tree_utilities::tips_of_tree($tree));
}

sub sort_genomes_by_taxonomy {
    my($self,@genomes) = @_;

    return map     { $_->[0] }
           sort    { $a->[1] cmp $b->[1] }
           map     { [$_,$self->taxonomy_of($_)] }
           @genomes;
}

sub taxonomic_groups_of_complete {
    my($self,$min_for_labels) = @_;

    my($tree,undef) = $self->build_tree_of_complete($min_for_labels);
    return &FIG::taxonomic_groups($tree);
}

=head2 Search Database

Searches the database for objects that match the query string in some way.

Returns a list of results if the query is ambiguous or an unique identifier
otherwise.

=cut


=head3 model_directory

    FIG->model_directory($organism);

Returns the model directory of an organism.

=over 4

=item $organism

The seed-taxonomy id of the organism, e.g. 83333.1. If you pass the
string 'All', you will get the directory for the global model.

=back

=cut

sub model_directory {
  my ($self, $organism) = @_;

  my $figv = $self->find_figv($organism);
  my $directory = $figv->model_directory($organism);
  return $directory;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3