[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

View of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Sat Feb 17 17:14:11 2007 UTC (13 years, 1 month ago) by overbeek
Branch: MAIN
Changes since 1.2: +102 -0 lines
add possible_frameshift logic to FIGO.pm

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package FIGO;

use strict;
use FIG;
use FIG_Config;
use SFXlate;
use SproutFIG;
use Tracer;
use Data::Dumper;
use FigFams;
use gjoparseblast;

sub new {
    my($class,$low_level) = @_;

    my $fig;
    if ($low_level && ($low_level =~ /sprout/i))
    {
        $fig = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
    }
    else
    {
	$fig = new FIG;
    }

    my $self = {};
    $self->{_fig} = $fig;
    $self->{_tmp_dir} = $FIG_Config::temp;
    return bless $self, $class;
}

sub genomes {
    my($self,@constraints) = @_;
    my $fig = $self->{_fig};

    my %constraints = map { $_ => 1 } @constraints;
    my @genomes = ();

    if ($constraints{complete})
    {
	@genomes = $fig->genomes('complete');
    }
    else
    {
	@genomes = $fig->genomes;
    }

    if ($constraints{prokaryotic})
    {
	@genomes = grep { $fig->is_prokaryotic($_) } @genomes;
    }
    
    if ($constraints{eukaryotic})
    {
	@genomes = grep { $fig->is_eukaryotic($_) } @genomes;
    }
    
    if ($constraints{bacterial})
    {
	@genomes = grep { $fig->is_bacterial($_) } @genomes;
    }
    
    if ($constraints{archaeal})
    {
	@genomes = grep { $fig->is_archaeal($_) } @genomes;
    }
    
    if ($constraints{nmpdr})
    {
	@genomes = grep { $fig->is_NMPDR_genome($_) } @genomes;
    }
    
    return map { &GenomeO::new('GenomeO',$self,$_) } @genomes;
}

sub subsystems {
    my($self) = @_;
    my $fig = $self->{_fig};

    return map { &SubsystemO::new('SubsystemO',$self,$_) } $fig->all_subsystems;
}

sub functional_roles {
    my($self) = @_;
    my $fig = $self->{_fig};

    my @functional_roles = ();
    
    return @functional_roles;
}

sub all_figfams {
    my($self) = @_;
    my $fig = $self->{_fig};
    my $fams = new FigFams($fig);
    return map { &FigFamO::new('FigFamO',$self,$_) } $fams->all_families;
}

sub family_containing {
    my($self,$seq) = @_;

    my $fig = $self->{_fig};
    my $fams = new FigFams($fig);
    my($fam,$sims) = $fams->place_in_family($seq);
    if ($fam)
    {
	return (&FigFamO::new('FigFamO',$self,$fam->family_id),$sims);
    }
    else
    {
	return undef;
    }
}

package GenomeO;

use Data::Dumper;

sub new {
    my($class,$figO,$genomeId) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $genomeId;
    return bless $self, $class;
}    

sub id {
    my($self) = @_;

    return $self->{_id};
}

sub genus_species {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    return $fig->genus_species($self->{_id});
}

sub contigs_of {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    return map { &ContigO::new('ContigO',$figO,$self->id,$_) } $fig->contigs_of($self->id);
}

sub features_of {
    my($self,$type) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};

    return map { &FeatureO::new('FeatureO',$figO,$_) } $fig->all_features($self->id,$type);
}

sub display {
    my($self) = @_;

    print join("\t",("Genome",$self->id,$self->genus_species)),"\n";
}

package ContigO;

use Data::Dumper;

sub new {
    my($class,$figO,$genomeId,$contigId) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $contigId;
    $self->{_genome} = $genomeId;
    return bless $self, $class;
}    

sub id {
    my($self) = @_;

    return $self->{_id};
}

sub genome {
    my($self) = @_;

    return $self->{_genome};
}

sub contig_length {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    my $contig_lengths = $fig->contig_lengths($self->genome);
    return $contig_lengths->{$self->id};
}

sub dna_seq {
    my($self,$beg,$end) = @_;
    
    my $fig = $self->{_figO}->{_fig};
    my $max = $self->contig_length;
    if (($beg && (&FIG::between(1,$beg,$max))) &&
	($end && (&FIG::between(1,$end,$max))))
    {
	return $fig->dna_seq($self->genome,join("_",($self->id,$beg,$end)));
    }
    else
    {
	return undef;
    }
}

sub display {
    my($self) = @_;

    print join("ContigO",$self->genome,$self->id,$self->contig_length),"\n";
}

package FeatureO;

use Data::Dumper;

sub new {
    my($class,$figO,$fid) = @_;

    ($fid =~ /^fig\|\d+\.\d+\.[^\.]+\.\d+$/) || return undef;
    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $fid;
    return bless $self, $class;
}    

sub id {
    my($self) = @_;

    return $self->{_id};
}

sub genome {
    my($self) = @_;

    $self->id =~ /^fig\|(\d+\.\d+)/;
    return $1;
}

sub type {
    my($self) = @_;

    $self->id =~ /^fig\|\d+\.\d+\.([^\.]+)/;
    return $1;
}

sub location {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    return scalar $fig->feature_location($self->id);
}

sub dna_seq {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    my $fid = $self->id;
    my @loc = $fig->feature_location($fid);
    return $fig->dna_seq(&FIG::genome_of($fid),@loc);
}

sub prot_seq {
    my($self) = @_;

    ($self->type eq "peg") || return undef;
    my $fig = $self->{_figO}->{_fig};
    my $fid = $self->id;
    return $fig->get_translation($fid);
}

sub function_of {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    my $fid = $self->id;
    return scalar $fig->function_of($fid);
}

sub coupled_to {
    my($self) = @_;

    ($self->type eq "peg") || return undef;
    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $peg1 = $self->id;
    my @coupled = ();
    foreach my $tuple ($fig->coupled_to($peg1))
    {
	my($peg2,$sc) = @$tuple;
	push(@coupled, &CouplingO::new('CouplingO',$figO,$peg1,$peg2,$sc));
    }
    return @coupled;
}

sub annotations {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    
    return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);
}

sub possibly_truncated {
    my($self) = @_;
    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    
    return $fig->possibly_truncated($self->id);
}

sub possible_frameshift {
    my($self) = @_;
    my $figO = $self->{_figO};
    my($tmp_dir) = $figO->{_tmp_dir};

    if (! $self->possibly_truncated)
    {
	my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);
	if (my $sim = shift @sims)
	{
	    my $peg2 = $sim->id2;
	    my $ln1  = $sim->ln1;
	    my $ln2  = $sim->ln2;
	    my $b2   = $sim->b2;
	    my $e2   = $sim->e2;
	    my $adjL = 100 + (($b2-1) * 3);
	    my $adjR = 100 + (($ln2 - $e2) * 3);
	    if ($ln2 > (1.2 * $ln1))
	    {
		my $loc = $self->location;
		if ($loc =~ /^(\S+)_(\d+)_(\d+)/)
		{
		    my $contig = $1;
		    my $beg    = $2;
		    my $end = $3;
		    my $contigO = new ContigO($figO,$self->genome,$contig);
		    my $begA = &max(1,$beg - $adjL);
		    my $endA = &min($end+$adjR,$contigO->contig_length);
		    my $dna  = $contigO->dna_seq($begA,$endA);
		    open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";
		    print TMP ">dna\n$dna\n";
		    close(TMP);

		    my $peg2O = new FeatureO($figO,$peg2);
		    my $prot  = $peg2O->prot_seq;
		    open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";
		    print TMP ">tmp_prot\n$prot\n";
		    close(TMP);
		    &run("formatdb -i $tmp_dir/tmp_dna -pF");
		    open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")
			|| die "could not blast";

		    my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);
		    my @hsps       = sort { $a->[0] <=> $b->[0] }
			             map { [$_->[9],$_->[10],$_->[12],$_->[13]] } 
			             grep { $_->[1] < 1.0e-50 } 
			             @{$db_seq_out->[6]};
		    my @prot = map { [$_->[0],$_->[1]] } @hsps;
		    my @dna  = map { [$_->[2],$_->[3]] } @hsps;
		    if (&covers(\@prot,length($prot),3) && &covers(\@dna,3*length($prot),9))
		    {
			return 1;
		    }
		}
	    }
	}
    }
    return 0;
}

sub run {
    my($cmd) = @_;
    (system($cmd) == 0) || Confess("FAILED: $cmd");
}

sub max {
    my($x,$y) = @_;
    return ($x < $y) ? $y : $x;
}

sub min {
    my($x,$y) = @_;
    return ($x < $y) ? $x : $y;
}

sub covers {
    my($hsps,$ln,$diff) = @_;

    my $hsp1 = shift @$hsps;
    my $hsp2;
    while ($hsp1 && ($hsp2 = shift @$hsps) && ($hsp1 = &merge($hsp1,$hsp2,$diff))) {}
    return ($hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));
}

sub merge {
    my($hsp1,$hsp2,$diff) = @_;

    my($b1,$e1) = @$hsp1;
    my($b2,$e2) = @$hsp2;
    return (($e2 > $e1) && (abs($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;
}

use Sim;
sub sims {
    my($self,%args) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};

    my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
    my $all    = $args{-all}    ? $args{-all}    : "fig";
    my $max    = $args{-max}    ? $args{-max}    : 10000;

    return $fig->sims($self->id,$max,$cutoff,$all);
}

sub bbhs {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};

    my @bbhs  = $fig->bbhs($self->id);
    return map { my($peg2,$sc,$bs) = @$_; bless({ _peg1 => $self->id, 
						  _peg2 => $peg2, 
						  _psc => $sc,
						  _bit_score => $bs 
						},'BBHO') } @bbhs;
}

sub display {
    my($self) = @_;

    print join("\t",$self->id,$self->location,$self->function_of),"\n",
          $self->dna_seq,"\n",
          $self->prot_seq,"\n";
}

package BBHO;

sub new {
    my($class,$peg1,$peg2,$sc,$normalized_bitscore) = @_;

    my $self = {};
    $self->{_peg1}      = $peg1;
    $self->{_peg2}      = $peg2;
    $self->{_psc}       = $sc;
    $self->{_bit_score} = $normalized_bitscore

}    

sub peg1 {
    my($self) = @_;

    return $self->{_peg1};
}

sub peg2 {
    my($self) = @_;

    return $self->{_peg2};
}

sub psc {
    my($self) = @_;

    return $self->{_psc};
}

sub norm_bitscore {
    my($self) = @_;

    return $self->{_bit_score};
}

package AnnotationO;

sub new {
    my($class,$fid,$timestamp,$who,$text) = @_;

    my $self = {};
    $self->{_fid} = $fid;
    $self->{_timestamp} = $timestamp;
    $self->{_who} = $who;
    $self->{_text} = $text;
    return bless $self, $class;
}    

sub fid {
    my($self) = @_;

    return $self->{_fid};
}

sub timestamp {
    my($self,$convert) = @_;

    if ($convert) 
    {
	return scalar localtime($self->{_timestamp});
    }
    else
    {
	return $self->{_timestamp};
    }
}

sub made_by {
    my($self) = @_;

    my $who = $self->{_who};
    $who =~ s/^master://i;
    return $who;
}

sub text {
    my($self) = @_;

    my $text = $self->{_text};
    return $text;
}

sub display {
    my($self) = @_;

    print join("\t",($self->fid,$self->timestamp(1),$self->made_by)),"\n",$self->text,"\n";
}

package CouplingO;

use Data::Dumper;

sub new {
    my($class,$figO,$peg1,$peg2,$sc) = @_;

    ($peg1 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;
    ($peg2 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;
    my $self = {};
    $self->{_figO} = $figO;
    $self->{_peg1} = $peg1;
    $self->{_peg2} = $peg2;
    $self->{_sc}   = $sc;
    return bless $self, $class;
}    

sub peg1 {
    my($self) = @_;

    return $self->{_peg1};
}

sub peg2 {
    my($self) = @_;

    return $self->{_peg2};
}

sub sc {
    my($self) = @_;

    return $self->{_sc};
}

sub evidence {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my @ev = ();
    foreach my $tuple ($fig->coupling_evidence($self->peg1,$self->peg2))
    {
	my($peg3,$peg4,$rep) = @$tuple;
	push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),
		  &FeatureO::new('FeatureO',$figO,$peg4),
		  $rep]);
    }
    return @ev;
}

sub display {
    my($self) = @_;

    print join("\t",($self->peg1,$self->peg2,$self->sc)),"\n";
}

package SubsystemO;

use Data::Dumper;
use Subsystem;

sub new {
    my($class,$figO,$name) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $name;

    return bless $self, $class;
}    

sub id {
    my($self) = @_;

    return $self->{_id};
}

sub usable {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    return $fig->usable_subsystem($self->id);
}

sub genomes {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return map { &GenomeO::new('GenomeO',$figO,$_) } $subO->get_genomes;
}

sub roles {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return map { &FunctionalRoleO::new('FunctionalRoleO',$figO,$_) }  $subO->get_roles($self->id);
}

sub curator {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return $subO->get_curator;
}

sub variant {
    my($self,$genome) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return $subO->get_variant_code_for_genome($genome->id);
}
    
sub pegs_in_cell {
    my($self,$genome,$role) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return $subO->get_pegs_from_cell($genome->id,$role->id);
}

package FunctionalRoleO;

use Data::Dumper;

sub new {
    my($class,$figO,$fr) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $fr;
    return bless $self, $class;
}    

sub id {
    my($self) = @_;

    return $self->{_id};
}

package FigFamO;

use FigFams;
use FigFam;

sub new {
    my($class,$figO,$id) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $id;
    return bless $self, $class;
}    

sub id {
    my($self) = @_;

    return $self->{_id};
}

sub function {
    my($self) = @_;

    my $fig  = $self->{_figO}->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return $famO->family_function;
}

sub members {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return map { &FigFamO::new('FigFamO',$figO,$_) } $famO->list_members;
}

sub rep_seqs {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return $famO->representatives;
}

sub should_be_member {
    my($self,$seq) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return $famO->should_be_member($seq);
}



sub display {
    my($self) = @_;

    print join("\t",($self->id,$self->function)),"\n";
}



package Attribute;

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3