[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

View of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (download) (as text) (annotate)
Mon Feb 26 21:06:58 2007 UTC (13 years, 1 month ago) by overbeek
Branch: MAIN
Changes since 1.8: +183 -63 lines
More POD documentation

########################################################################
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
########################################################################

=head1 Overview

This module is a set of packages encapsulating the SEED's core methods
using an "OOP-like" style. 

There are several modules clearly related to "individual genomes:"
FIGO, GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).

There are also modules that deal with complex relationships between
pairs or sets of features in one, two, or more genomes,
rather than any particular single genome:
BBHO, CouplingO, SubsystemO, FunctionalRoleO, FigFamO.

Finally, the methods in "Attribute" might in principle attach
"atributes" to any type of object. 
(Likewise, in principle one might like to attach an "annotation"
to any type of object

Four of the modules dealing with "genomes" have a reasonable clear
"implied heirarchy:"

=over 4

    FIGO > GenomeO > ContigO > FeatureO

=back

However, inheritance is B<NOT> implemented using the C<@ISA> mechanism,
because some methods deal with "pairwise" or "setwise" relations between objects
or other more complex relationships that do not naturally fit into any heirarchy ---
which would get us into the whole quagmire of "multiple inheritance."

We have chosen to sidestep the entire issue of inheritance via an I<ad hoc> mechanism:
If a "child" object needs access to its "ancestors'" methods,
we pass it references to its "ancestors" using subroutine arguments.
This is admittedly ugly, clumsy, and potentially error-prone ---
but it has the advantage that, unlike multiple inheritance, 
we understand how to do it...

MODULE DEPENDENCIES: FIG, FIG_Config, FigFams, SFXlate, SproutFIG, Tracer,
    gjoparseblast, Data::Dumper.

=cut

########################################################################
package FIGO;
########################################################################
use strict;
use FIG;
use FIG_Config;
use SFXlate;
use SproutFIG;
use Tracer;
use Data::Dumper;
use FigFams;
use gjoparseblast;

=head1 FIGO

The effective "base class" containing a few "top-level" methods.

=cut


=head3 new

Constructs a new FIGO object.

=over 4 

=item USAGE: 

C<< my $figo = FIGO->new();           #...Subclass defaults to FIG >>

C<< my $figo = FIGO->new('SPROUT');   #...Subclass is a SPROUT object >>

=back

=cut

sub new {
    my($class,$low_level) = @_;

    my $fig;
    if ($low_level && ($low_level =~ /sprout/i))
    {
        $fig = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
    }
    else
    {
	$fig = new FIG;
    }

    my $self = {};
    $self->{_fig} = $fig;
    $self->{_tmp_dir} = $FIG_Config::temp;
    return bless $self, $class;
}



=head3 genomes

Returns a list of Taxonomy-IDs, possibly constrained by selection criteria.
(Default: Empty constraint returns all Tax-IDs in the SEED or SPROUT.)

=over 4

=item USAGE:

C<< my @tax_ids = $figo->genomes(); >>

C<< my @tax_ids = $figo->genomes( @constraints ); >>

=item @constraints

    One or more element of: complete, prokaryotic, eukaryotic, bacterial, archaeal, nmpdr.

=item RETURNS: List of Tax-IDs.

=item EXAMPLE:

    L<Display all complete, prokaryotic genomes>

=back

=cut

sub genomes {
    my($self,@constraints) = @_;
    my $fig = $self->{_fig};

    my %constraints = map { $_ => 1 } @constraints;
    my @genomes = ();

    if ($constraints{complete})
    {
	@genomes = $fig->genomes('complete');
    }
    else
    {
	@genomes = $fig->genomes;
    }

    if ($constraints{prokaryotic})
    {
	@genomes = grep { $fig->is_prokaryotic($_) } @genomes;
    }
    
    if ($constraints{eukaryotic})
    {
	@genomes = grep { $fig->is_eukaryotic($_) } @genomes;
    }
    
    if ($constraints{bacterial})
    {
	@genomes = grep { $fig->is_bacterial($_) } @genomes;
    }
    
    if ($constraints{archaeal})
    {
	@genomes = grep { $fig->is_archaeal($_) } @genomes;
    }
    
    if ($constraints{nmpdr})
    {
	@genomes = grep { $fig->is_NMPDR_genome($_) } @genomes;
    }
    
    return map { &GenomeO::new('GenomeO',$self,$_) } @genomes;
}



=head3 subsystems

=over 4

=item RETURNS:

    List of all subsystems.

=item EXAMPLE:

    L<Accessing Subsystem data>

=back

=cut

sub subsystems {
    my($self) = @_;
    my $fig = $self->{_fig};

    return map { &SubsystemO::new('SubsystemO',$self,$_) } $fig->all_subsystems;
}


=head3 functional_roles

(Not yet implemented)

=over 

=item RETURNS:

=item EXAMPLE:

=back

=cut

sub functional_roles {
    my($self) = @_;
    my $fig = $self->{_fig};

    my @functional_roles = ();
    
    return @functional_roles;
}



=head3 all_figfams

Returns a list of all FIGfam objects.

=over 4

=item USAGE:

    C<< foreach $fam ($figO->all_figfams) { #...Do something } >>

=item RETURNS:

    List of FIGfam Objects

=item EXAMPLE:

    L<Accessing FIGfams>

=back

=cut

sub all_figfams {
    my($self) = @_;
    my $fig = $self->{_fig};
    my $fams = new FigFams($fig);
    return map { &FigFamO::new('FigFamO',$self,$_) } $fams->all_families;
}



=head3 family_containing

=over 4

=item USAGE:

    C<< my ($fam, $sims) = $figO->family_containing($seq); >>

=item $seq:

    A protein translation string.

=item RETURNS:
 
      $fam:  A FIGfam Object.

      $sims: A set of similarity objects.

=item EXAMPLE: L<Placing a sequence into a FIGfam>

=back

=cut

sub family_containing {
    my($self,$seq) = @_;

    my $fig = $self->{_fig};
    my $fams = new FigFams($fig);
    my($fam,$sims) = $fams->place_in_family($seq);
    if ($fam)
    {
	return (&FigFamO::new('FigFamO',$self,$fam->family_id),$sims);
    }
    else
    {
	return undef;
    }
}


########################################################################
package GenomeO;
########################################################################
use Data::Dumper;

=head1 GenomeO

=cut

=head3 new

Constructor of GenomeO objects.

=over 4
    
=item USAGE:

    C<< my $org = GenomeO->new($figo, $tax_id); >>

=item RETURNS:

    A new GenomeO object.

=back

=cut

sub new {
    my($class,$figO,$genomeId) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $genomeId;
    return bless $self, $class;
}    



=head3 id

=over 4

=item USAGE:

    C<< my $tax_id = $org->id(); >>

=item RETURNS:

    Taxonomy-ID of GenomeO object.

=back

=cut

sub id {
    my($self) = @_;

    return $self->{_id};
}



=head3 genus_species

=over 4

=item USAGE:

    C<< $gs = $genome->genus_species(); >>

=item RETURNS:

    Genus-species-strain string

=back

=cut

sub genus_species {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    return $fig->genus_species($self->{_id});
}


=head3 contigs_of

=over 4

=item RETURNS:

    List of C<contig> objects contained in a C<GenomeO> object.

=item EXAMPLE:

    L<Show how to access contigs and extract sequence>

=back

=cut

sub contigs_of {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    return map { &ContigO::new('ContigO',$figO,$self->id,$_) } $fig->contigs_of($self->id);
}



=head3 features_of

=cut

sub features_of {
    my($self,$type) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};

    return map { &FeatureO::new('FeatureO',$figO,$_) } $fig->all_features($self->id,$type);
}


=head3 display

Prints the genus, species, and strain information about a genome to STDOUT.

=over 4

=item USAGE:

    C<< $genome->display(); >>

=item RETURNS:

    (Void)

=back

=cut

sub display {
    my($self) = @_;

    print join("\t",("Genome",$self->id,$self->genus_species)),"\n";
}



########################################################################
package ContigO;
########################################################################
use Data::Dumper;

=head1 ContigO

Methods for working with DNA sequence objects.

=cut

=head3 new

Contig constructor.

=over 4

=item USAGE:   

    C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>

=item $figO:

    Parent FIGO object.

=item $genomeId:

    Taxon-ID for the genome the contig is from.

=item $contigId:

    Identifier for the contig

=item RETURNS:

    A "ContigO" object.

=back

=cut

sub new {
    my($class,$figO,$genomeId,$contigId) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $contigId;
    $self->{_genome} = $genomeId;
    return bless $self, $class;
}    



=head3 id

=over 4

=item RETURNS:

    Sequence ID string of "ContigO" object

=back

=cut

sub id {
    my($self) = @_;

    return $self->{_id};
}


=head3 genome

=over 4

=item USAGE:
    
    C<< my $tax_id = $contig->genome(); >>

=item RETURNS:

    Tax-ID of the GenomeO object containing the contig object.

=back

=cut

sub genome {
    my($self) = @_;

    return $self->{_genome};
}



=head3 contig_length

=over 4

=item USAGE:

    C<< my $len = $contig->contig_length(); >>

=item RETURNS:

    Length of contig's DNA sequence.

=back

=cut

sub contig_length {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    my $contig_lengths = $fig->contig_lengths($self->genome);
    return $contig_lengths->{$self->id};
}


=head3 dna_seq

=over 4

=item USAGE:

    C<< my $seq = $contig->dna_seq(beg, $end); >>

=item $beg:

    Begining point of DNA subsequence

=item $end:

    End point of DNA subsequence

=item RETURNS:

    string of DNA sequence running from $beg to $end
    (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)

=back

=cut

sub dna_seq {
    my($self,$beg,$end) = @_;
    
    my $fig = $self->{_figO}->{_fig};
    my $max = $self->contig_length;
    if (($beg && (&FIG::between(1,$beg,$max))) &&
	($end && (&FIG::between(1,$end,$max))))
    {
	return $fig->dna_seq($self->genome,join("_",($self->id,$beg,$end)));
    }
    else
    {
	return undef;
    }
}


=head3 display

Prints summary information about a "ContigO" object to STDOUT:

Genus, species, strain

Contig ID

Contig length

=over 4

=item RETURNS:

    (Void)

=back

=cut

sub display {
    my($self) = @_;

    print join("ContigO",$self->genome,$self->id,$self->contig_length),"\n";
}



########################################################################
package FeatureO;
########################################################################
use Data::Dumper;

=head1 FeatureO

Methods for working with features on "ContigO" objects.

=cut

=head3 new

Constructor of "FeatureO" objects

=cut

sub new {
    my($class,$figO,$fid) = @_;

    ($fid =~ /^fig\|\d+\.\d+\.[^\.]+\.\d+$/) || return undef;
    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $fid;
    return bless $self, $class;
}    


=head3 id

=cut

sub id {
    my($self) = @_;

    return $self->{_id};
}



=head3 genome

=cut

sub genome {
    my($self) = @_;

    $self->id =~ /^fig\|(\d+\.\d+)/;
    return $1;
}



=head3 type

=cut

sub type {
    my($self) = @_;

    $self->id =~ /^fig\|\d+\.\d+\.([^\.]+)/;
    return $1;
}




=head3 location

=cut

sub location {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    return scalar $fig->feature_location($self->id);
}



=head3 dna_seq

=cut

sub dna_seq {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    my $fid = $self->id;
    my @loc = $fig->feature_location($fid);
    return $fig->dna_seq(&FIG::genome_of($fid),@loc);
}



=head3 prot_seq

=cut

sub prot_seq {
    my($self) = @_;

    ($self->type eq "peg") || return undef;
    my $fig = $self->{_figO}->{_fig};
    my $fid = $self->id;
    return $fig->get_translation($fid);
}



=head3 function_of

=cut

sub function_of {
    my($self) = @_;

    my $fig = $self->{_figO}->{_fig};
    my $fid = $self->id;
    return scalar $fig->function_of($fid);
}



=head3 coupled_to

=cut

sub coupled_to {
    my($self) = @_;

    ($self->type eq "peg") || return undef;
    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $peg1 = $self->id;
    my @coupled = ();
    foreach my $tuple ($fig->coupled_to($peg1))
    {
	my($peg2,$sc) = @$tuple;
	push(@coupled, &CouplingO::new('CouplingO',$figO,$peg1,$peg2,$sc));
    }
    return @coupled;
}



=head3 annotations

=cut

sub annotations {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    
    return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);
}

sub in_subsystems {
    my($self) = @_;
    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    
    return map { new SubsystemO($figO,$_) } $fig->peg_to_subsystems($self->id);
}


=head3 possibly_truncated

=cut

sub possibly_truncated {
    my($self) = @_;
    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    
    return $fig->possibly_truncated($self->id);
}



=head3 possible_frameshift

=cut

sub possible_frameshift {
    my($self) = @_;
    my $figO = $self->{_figO};
    my($tmp_dir) = $figO->{_tmp_dir};

    if (! $self->possibly_truncated)
    {
	my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);
	if (my $sim = shift @sims)
	{
	    my $peg2 = $sim->id2;
	    my $ln1  = $sim->ln1;
	    my $ln2  = $sim->ln2;
	    my $b2   = $sim->b2;
	    my $e2   = $sim->e2;
	    my $adjL = 100 + (($b2-1) * 3);
	    my $adjR = 100 + (($ln2 - $e2) * 3);
	    if ($ln2 > (1.2 * $ln1))
	    {
		my $loc = $self->location;
		if ($loc =~ /^(\S+)_(\d+)_(\d+)/)
		{
		    my $contig = $1;
		    my $beg    = $2;
		    my $end = $3;
		    my $contigO = new ContigO($figO,$self->genome,$contig);
		    my $begA = &max(1,$beg - $adjL);
		    my $endA = &min($end+$adjR,$contigO->contig_length);
		    my $dna  = $contigO->dna_seq($begA,$endA);
		    open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";
		    print TMP ">dna\n$dna\n";
		    close(TMP);

		    my $peg2O = new FeatureO($figO,$peg2);
		    my $prot  = $peg2O->prot_seq;
		    open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";
		    print TMP ">tmp_prot\n$prot\n";
		    close(TMP);
		    &run("formatdb -i $tmp_dir/tmp_dna -pF");
		    open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")
			|| die "could not blast";

		    my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);
		    my @hsps       = sort { $a->[0] <=> $b->[0] }
			             map { [$_->[9],$_->[10],$_->[12],$_->[13]] } 
			             grep { $_->[1] < 1.0e-50 } 
			             @{$db_seq_out->[6]};
		    my @prot = map { [$_->[0],$_->[1]] } @hsps;
		    my @dna  = map { [$_->[2],$_->[3]] } @hsps;
		    if (&covers(\@prot,length($prot),3) && &covers(\@dna,3*length($prot),9))
		    {
			return 1;
		    }
		}
	    }
	}
    }
    return 0;
}



=head3 run

=sub

cut run {
    my($cmd) = @_;
    (system($cmd) == 0) || Confess("FAILED: $cmd");
}



=head3 max

=cut

sub max {
    my($x,$y) = @_;
    return ($x < $y) ? $y : $x;
}



=head3 min

=cut

sub min {
    my($x,$y) = @_;
    return ($x < $y) ? $x : $y;
}



=head3 covers

=cut

sub covers {
    my($hsps,$ln,$diff) = @_;

    my $hsp1 = shift @$hsps;
    my $hsp2;
    while ($hsp1 && ($hsp2 = shift @$hsps) && ($hsp1 = &merge($hsp1,$hsp2,$diff))) {}
    return ($hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));
}



=head3 merge

=cut

sub merge {
    my($hsp1,$hsp2,$diff) = @_;

    my($b1,$e1) = @$hsp1;
    my($b2,$e2) = @$hsp2;
    return (($e2 > $e1) && (abs($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;
}



=head3 sims

=cut

use Sim;
sub sims {
    my($self,%args) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};

    my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
    my $all    = $args{-all}    ? $args{-all}    : "fig";
    my $max    = $args{-max}    ? $args{-max}    : 10000;

    return $fig->sims($self->id,$max,$cutoff,$all);
}



=head3 bbhs

=cut

sub bbhs {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};

    my @bbhs  = $fig->bbhs($self->id);
    return map { my($peg2,$sc,$bs) = @$_; bless({ _figO => $figO,
	                                          _peg1 => $self->id, 
						  _peg2 => $peg2, 
						  _psc => $sc,
						  _bit_score => $bs 
						},'BBHO') } @bbhs;
}

=head3 display

=cut

sub display {
    my($self) = @_;

    print join("\t",$self->id,$self->location,$self->function_of),"\n",
          $self->dna_seq,"\n",
          $self->prot_seq,"\n";
}



########################################################################
package BBHO;
########################################################################

=head1 BBHO

=cut


=head3 new

=cut

sub new {
    my($class,$figO,$peg1,$peg2,$sc,$normalized_bitscore) = @_;

    my $self = {};
    $self->{_figO}      = $figO;
    $self->{_peg1}      = $peg1;
    $self->{_peg2}      = $peg2;
    $self->{_psc}       = $sc;
    $self->{_bit_score} = $normalized_bitscore

}    


=head3 peg1

=cut

sub peg1 {
    my($self) = @_;

    my $figO = $self->{_figO};
    return new FeatureO($figO,$self->{_peg1});
}

=head3 peg2

=cut

sub peg2 {
    my($self) = @_;

    my $figO = $self->{_figO};
    return new FeatureO($figO,$self->{_peg2});
}



=head3 psc

=cut

sub psc {
    my($self) = @_;

    return $self->{_psc};
}



=head3 norm_bitscore

=cut

sub norm_bitscore {
    my($self) = @_;

    return $self->{_bit_score};
}



########################################################################
package AnnotationO;
########################################################################

=head1 AnnotationO

=cut



=head3 new

=cut

sub new {
    my($class,$fid,$timestamp,$who,$text) = @_;

    my $self = {};
    $self->{_fid} = $fid;
    $self->{_timestamp} = $timestamp;
    $self->{_who} = $who;
    $self->{_text} = $text;
    return bless $self, $class;
}    



=head3 fid

=cut

sub fid {
    my($self) = @_;

    return $self->{_fid};
}



=head3 timestamp

=cut

sub timestamp {
    my($self,$convert) = @_;

    if ($convert) 
    {
	return scalar localtime($self->{_timestamp});
    }
    else
    {
	return $self->{_timestamp};
    }
}



=head3 made_by

=cut

sub made_by {
    my($self) = @_;

    my $who = $self->{_who};
    $who =~ s/^master://i;
    return $who;
}



=head3 text

=cut

sub text {
    my($self) = @_;

    my $text = $self->{_text};
    return $text;
}


=head3 display

=cut

sub display {
    my($self) = @_;

    print join("\t",($self->fid,$self->timestamp(1),$self->made_by)),"\n",$self->text,"\n";
}



########################################################################
package CouplingO;
########################################################################
use Data::Dumper;

=head3 new

=cut

sub new {
    my($class,$figO,$peg1,$peg2,$sc) = @_;

    ($peg1 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;
    ($peg2 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;
    my $self = {};
    $self->{_figO} = $figO;
    $self->{_peg1} = $peg1;
    $self->{_peg2} = $peg2;
    $self->{_sc}   = $sc;
    return bless $self, $class;
}    



=head3 peg1

=cut

sub peg1 {
    my($self) = @_;

    my $figO = $self->{_figO};
    return new FeatureO($figO,$self->{_peg1});
}



=head3 peg1

=cut

sub peg2 {
    my($self) = @_;

    my $figO = $self->{_figO};
    return new FeatureO($figO,$self->{_peg2});
}



=head3 sc

=cut

sub sc {
    my($self) = @_;

    return $self->{_sc};
}



=head3 evidence

=cut

sub evidence {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my @ev = ();
    foreach my $tuple ($fig->coupling_evidence($self->peg1,$self->peg2))
    {
	my($peg3,$peg4,$rep) = @$tuple;
	push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),
		  &FeatureO::new('FeatureO',$figO,$peg4),
		  $rep]);
    }
    return @ev;
}



=head3 display

=cut

sub display {
    my($self) = @_;

    print join("\t",($self->peg1,$self->peg2,$self->sc)),"\n";
}



########################################################################
package SubsystemO;
########################################################################
use Data::Dumper;
use Subsystem;

=head1 SubsystemO

=cut



=head3 new

=cut

sub new {
    my($class,$figO,$name) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $name;

    return bless $self, $class;
}    



=head3 id

=cut

sub id {
    my($self) = @_;

    return $self->{_id};
}



=head3 usable

=cut 

sub usable {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    return $fig->usable_subsystem($self->id);
}



=head3 genomes

=cut

sub genomes {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return map { &GenomeO::new('GenomeO',$figO,$_) } $subO->get_genomes;
}



=head3 roles

=cut

sub roles {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return map { &FunctionalRoleO::new('FunctionalRoleO',$figO,$_) }  $subO->get_roles($self->id);
}



=head3 curator

=cut

sub curator {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return $subO->get_curator;
}




=head3 variant

=cut

sub variant {
    my($self,$genome) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return $subO->get_variant_code_for_genome($genome->id);
}



=head3 pegs_in_cell

=cut

sub pegs_in_cell {
    my($self,$genome,$role) = @_;

    my $figO = $self->{_figO};
    my $subO = $self->{_subO};
    if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }

    return $subO->get_pegs_from_cell($genome->id,$role->id);
}



########################################################################
package FunctionalRoleO;
########################################################################
use Data::Dumper;

=head1 FunctionalRoleO

Methods for accessing the functional roles of features.

=cut


=head3 new

=cut

sub new {
    my($class,$figO,$fr) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $fr;
    return bless $self, $class;
}    



=head3 id

=cut

sub id {
    my($self) = @_;

    return $self->{_id};
}



########################################################################
package FigFamO;
########################################################################
use FigFams;
use FigFam;


=head1 FigFamO

=cut


=head3 new

=cut

sub new {
    my($class,$figO,$id) = @_;

    my $self = {};
    $self->{_figO} = $figO;
    $self->{_id} = $id;
    return bless $self, $class;
}    



=head3 id

=cut

sub id {
    my($self) = @_;

    return $self->{_id};
}


=head3 function

=cut

sub function {
    my($self) = @_;

    my $fig  = $self->{_figO}->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return $famO->family_function;
}



=head3 members

=cut

sub members {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return map { &FigFamO::new('FigFamO',$figO,$_) } $famO->list_members;
}



=head3 rep_seqs

=cut

sub rep_seqs {
    my($self) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return $famO->representatives;
}



=head3 should_be_member

=cut

sub should_be_member {
    my($self,$seq) = @_;

    my $figO = $self->{_figO};
    my $fig  = $figO->{_fig};
    my $famO = $self->{_famO};
    if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
    
    return $famO->should_be_member($seq);
}



=head3 display

=cut

sub display {
    my($self) = @_;

    print join("\t",($self->id,$self->function)),"\n";
}



########################################################################
package Attribute;
########################################################################
=head1 Attribute

(Note yet implemented.)

=cut

1;
__END__

=head1 Examples

=head3 Display all complete, prokaryotic genomes

use FIGO;
my $figO = new FIGO;

foreach $genome ($figO->genomes('complete','prokaryotic'))
{
    $genome->display;
}

#---------------------------------------------

use FIG;
my $fig = new FIG;

foreach $genome (grep { $fig->is_prokaryotic($_) } $fig->genomes('complete'))
{
    print join("\t",("Genome",$genome,$fig->genus_species($genome))),"\n";
}

###############################################

=head3 Show how to access contigs and extract sequence

use FIGO;
my $figO = new FIGO;

$genomeId = '83333.1';
my $genome = new GenomeO($figO,$genomeId);

foreach $contig ($genome->contigs_of)
{
    $tag1 = $contig->dna_seq(1,10);
    $tag2 = $contig->dna_seq(10,1);
    print join("\t",($tag1,$tag2,$contig->id,$contig->contig_length)),"\n";
}

#---------------------------------------------

use FIG;
my $fig = new FIG;

$genomeId = '83333.1';

$contig_lengths = $fig->contig_lengths($genomeId);

foreach $contig ($fig->contigs_of($genomeId))
{
    $tag1 = $fig->dna_seq($genomeId,join("_",($contig,1,10)));
    $tag2 = $fig->dna_seq($genomeId,join("_",($contig,10,1)));
    print join("\t",($tag1,$tag2,$contig,$contig_lengths->{$contig})),"\n";
}

###############################################

### accessing data related to features

use FIGO;
my $figO = new FIGO;

my $genome = new GenomeO($figO,"83333.1");
my $peg  = "fig|83333.1.peg.4";
my $pegO = new FeatureO($figO,$peg);

print join("\t",$pegO->id,$pegO->location,$pegO->function_of),"\n",
      $pegO->dna_seq,"\n",
      $pegO->prot_seq,"\n";

foreach $fidO ($genome->features_of('rna'))
{
    print join("\t",$fidO->id,$fidO->location,$fidO->function_of),"\n";
}

#---------------------------------------------


use FIG;
my $fig = new FIG;

my $genome = "83333.1";
my $peg  = "fig|83333.1.peg.4";

print join("\t",$peg,scalar $fig->feature_location($peg),scalar $fig->function_of($peg)),"\n",
      $fig->dna_seq($genome,$fig->feature_location($peg)),"\n",
      $fig->get_translation($peg),"\n";

foreach $fid ($fig->all_features($genome,'rna'))
{
    print join("\t",$fid,scalar $fig->feature_location($fid),scalar $fig->function_of($fid)),"\n";
}

###############################################

### accessing similarities

use FIGO;
my $figO = new FIGO;

$peg  = "fig|83333.1.peg.4";
$pegO = new FeatureO($figO,$peg);

@sims = $pegO->sims;  # use sims( -all => 1, -max => 10000, -cutoff => 1.0e-20) to all
                      # sims (including non-FIG sequences
foreach $sim (@sims)
{
    $peg2  = $sim->id2;
    $pegO2 = new FeatureO($figO,$peg2);
    $func  = $pegO2->function_of;
    $sc    = $sim->psc;
    print join("\t",($peg2,$sc,$func)),"\n";
}

#---------------------------------------------


use FIG;
my $fig = new FIG;

$peg  = "fig|83333.1.peg.4";

@sims = $fig->sims($peg,1000,1.0e-5,"fig");
foreach $sim (@sims)
{
    $peg2  = $sim->id2;
    $func  = $fig->function_of($peg2);
    $sc    = $sim->psc;
    print join("\t",($peg2,$sc,$func)),"\n";
}

###############################################

### accessing BBHs

use FIGO;
my $figO = new FIGO;

$peg  = "fig|83333.1.peg.4";
$pegO = new FeatureO($figO,$peg);

@bbhs = $pegO->bbhs;
foreach $bbh (@bbhs)
{
    $peg2  = $bbh->peg2;
    $pegO2 = new FeatureO($figO,$peg2);
    $func  = $pegO2->function_of;
    $sc    = $bbh->psc;
    print join("\t",($peg2,$sc,$func)),"\n";
}

#---------------------------------------------

use FIG;
my $fig = new FIG;

$peg  = "fig|83333.1.peg.4";

@bbhs = $fig->bbhs($peg);
foreach $bbh (@bbhs)
{
    ($peg2,$sc,$bit_score) = @$bbh;
    $func  = $fig->function_of($peg2);
    print join("\t",($peg2,$sc,$func)),"\n";
}

###############################################

### accessing annotations

use FIGO;
my $figO = new FIGO;

$peg  = "fig|83333.1.peg.4";
$pegO = new FeatureO($figO,$peg);

@annotations = $pegO->annotations;

foreach $ann (@annotations)
{
    print join("\n",$ann->fid,$ann->timestamp(1),$ann->made_by,$ann->text),"\n\n";
}

#---------------------------------------------

use FIG;
my $fig = new FIG;

$peg = "fig|83333.1.peg.4";
@annotations = $fig->feature_annotations($peg);
foreach $_ (@annotations)
{
    (undef,$ts,$who,$text) = @$_;
    $who =~ s/master://i;
    print "$ts\n$who\n$text\n\n";
}

###############################################

### accessing coupling data


use FIGO;
my $figO = new FIGO;

my $peg  = "fig|83333.1.peg.4";
my $pegO = new FeatureO($figO,$peg);
foreach $coupled ($pegO->coupled_to)
{
    print join("\t",($coupled->peg1,$coupled->peg2,$coupled->sc)),"\n";
    foreach $tuple ($coupled->evidence)
    {
	my($peg3O,$peg4O,$rep) = @$tuple;
	print "\t",join("\t",($peg3O->id,$peg4O->id,$rep)),"\n";
    }
    print "\n";
}

#---------------------------------------------


use FIG;
my $fig = new FIG;

my $peg1  = "fig|83333.1.peg.4";
foreach $coupled ($fig->coupled_to($peg1))
{
    ($peg2,$sc) = @$coupled;
    print join("\t",($peg1,$peg2,$sc)),"\n";
    foreach $tuple ($fig->coupling_evidence($peg1,$peg2))
    {
	my($peg3,$peg4,$rep) = @$tuple;
	print "\t",join("\t",($peg3,$peg4,$rep)),"\n";
    }
    print "\n";
}

###############################################

=head3 Accessing Subsystem data

use FIGO;
my $figO = new FIGO;

foreach $sub ($figO->subsystems)
{
    if ($sub->usable)
    {
	print join("\t",($sub->id,$sub->curator)),"\n";

	print "\tRoles\n";
	@roles = $sub->roles;
	foreach $role (@roles)
	{
	    print "\t\t",join("\t",($role->id)),"\n";
	}
	
	print "\tGenomes\n";
	foreach $genome ($sub->genomes)
	{
	    print "\t\t",join("\t",($sub->variant($genome),
				    $genome->id,
				    $genome->genus_species)),"\n";
	    @pegs = ();
	    foreach $role (@roles)
	    {
		push(@pegs,$sub->pegs_in_cell($genome,$role));
	    }
	    print "\t\t\t",join(",",@pegs),"\n";
	}
    }
}

#---------------------------------------------

use FIG;
my $fig = new FIG;

foreach $sub (grep { $fig->usable_subsystem($_) } $fig->all_subsystems)
{
    $subO = new Subsystem($sub,$fig);
    $curator = $subO->get_curator;
    print join("\t",($sub,$curator)),"\n";

    print "\tRoles\n";
    @roles = $subO->get_roles;
    foreach $role (@roles)
    {
	print "\t\t",join("\t",($role)),"\n";
    }
	
    print "\tGenomes\n";
    foreach $genome ($subO->get_genomes)
    {
	print "\t\t",join("\t",($subO->get_variant_code_for_genome($genome),
	                        $genome,
	                        $fig->genus_species($genome))),"\n";
	foreach $role (@roles)
	{
	    push(@pegs,$subO->get_pegs_from_cell($genome,$role));
	}
	print "\t\t\t",join(",",@pegs),"\n";
    }
    print "\n";
}

###############################################

=head3 Accessing FIGfams

use FIGO;
my $figO = new FIGO;

foreach $fam ($figO->all_figfams)
{
    print join("\t",($fam->id,$fam->function)),"\n";
    foreach $pegO ($fam->members)
    {
	$peg = $pegO->id;
	print "\t$peg\n";
    }
}

#---------------------------------------------

use FIG;
use FigFam;
use FigFams;

my $fig = new FIG;
my $figfams = new FigFams($fig);

foreach $fam ($figfams->all_families)
{
    my $figfam = new FigFam($fig,$fam);
    print join("\t",($fam,$figfam->family_function)),"\n";
    foreach $peg ($figfam->list_members)
    {
	print "\t$peg\n";
    }
}

###############################################

=head3 Placing a sequence into a FIGfam

use FIGO;
my $figO = new FIGO;

$seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
RKLMMNHQ";
$seq =~ s/\n//gs;

my($fam,$sims) = $figO->family_containing($seq);
    
if ($fam)
{
    print join("\t",($fam->id,$fam->function)),"\n";
    print &Dumper($sims);
}
else
{
    print "Could not place it in a family\n";
}

#---------------------------------------------

use FIG;
use FigFam;
use FigFams;

my $fig = new FIG;
my $figfams = new FigFams($fig);

$seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
RKLMMNHQ";
$seq =~ s/\n//gs;

my($fam,$sims) = $figfams->place_in_family($seq);
    
if ($fam)
{
    print join("\t",($fam->family_id,$fam->family_function)),"\n";
    print &Dumper($sims);
}
else
{
    print "Could not place it in a family\n";
}

###############################################

=head3 Getting representative sequences for a FIGfam

use FIGO;
my $figO = new FIGO;

$fam         = "FIG102446";
my $famO     = &FigFamO::new('FigFamO',$figO,$fam);
my @rep_seqs = $famO->rep_seqs;

foreach $seq (@rep_seqs)
{
    print ">query\n$seq\n";
}

#---------------------------------------------

use FIG;
use FigFam;
use FigFams;

my $fig = new FIG;

$fam         = "FIG102446";
my $famO     = new FigFam($fig,$fam);
my @rep_seqs = $famO->representatives;

foreach $seq (@rep_seqs)
{
    print ">query\n$seq\n";
}


###############################################


=head3 Testing for membership in FIGfam

use FIGO;
my $figO = new FIGO;

$seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
RKLMMNHQ";
$seq =~ s/\n//gs;

$fam                  = "FIG102446";
my $famO              = &FigFamO::new('FigFamO',$figO,$fam);
my($should_be, $sims) = $famO->should_be_member($seq);

if ($should_be)
{
    print join("\t",($famO->id,$famO->function)),"\n";
    print &Dumper($sims);
}
else
{
    print "Sequence should not be added to family\n";
}

#---------------------------------------------

use FIG;
use FigFam;
use FigFams;

my $fig = new FIG;

$seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
RKLMMNHQ";
$seq =~ s/\n//gs;

$fam                  = "FIG102446";
my $famO              = new FigFam($fig,$fam);
my($should_be, $sims) = $famO->should_be_member($seq);

if ($should_be)
{
    print join("\t",($famO->family_id,$famO->family_function)),"\n";
    print &Dumper($sims);
}
else
{
    print "Sequence should not be added to family\n";
}

=cut


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3