[Bio] / FigKernelPackages / ParalogResolution.pm Repository:
ViewVC logotype

View of /FigKernelPackages/ParalogResolution.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Jun 22 19:35:59 2009 UTC (10 years, 11 months ago) by overbeek
Branch: MAIN
added stuff

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2009 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

package ParalogResolution;

use strict;
use FIG;

use Data::Dumper;
use Carp;

sub context_tags {
    my($pegs,$dist) = @_;

    my $fig = new FIG;
    my($i,%before,%after,%funcs);
    foreach my $peg (@$pegs)
    {
	my $loc = $fig->feature_location($peg);
	if ($loc)
	{
	    my($contig,$beg,$end) = $fig->boundaries_of($loc);
	    if ($contig && $beg && $end)
	    {
		my $min = &FIG::min($beg,$end) - $dist;
		my $max = &FIG::max($beg,$end) + $dist;
		my $feat;
		($feat,undef,undef) = $fig->genes_in_region(&FIG::genome_of($peg),$contig,$min,$max);
		my(@before,@after,$seen_peg);
		for ($i=0; ($i < @$feat); $i++)
		{
		    my $fid = $feat->[$i];
		    if (&FIG::ftype($fid) eq 'peg')
		    {
			if ($fid eq $peg) 
			{
			    $seen_peg = 1;
			    next;
			}
			my $func = $fig->function_of($fid,"",1);
			$funcs{$func}++;

			if ((($beg < $end) && (! $seen_peg)) || 
			    (($beg > $end) && $seen_peg))
			{
			    push(@before,[$fid,$func]);
			}
			else
			{
			    push(@after,[$fid,$func]);
			}
		    }
		}
		$before{$peg} = [($beg < $end) ? @before : reverse @before];
		$after{$peg}  = [($beg < $end) ? @after  : reverse @after];
	    }
	}
    }
    my @hits = sort { $funcs{$b} <=> $funcs{$a} } 
               grep { $funcs{$_} > 1 } 
               keys(%funcs);

    my %val;
    my %val2func;
    for ($i=0; ($i < @hits); $i++)
    {
	$val{$hits[$i]} = $i+1;
	$val2func{$i+1} = $hits[$i];
    }
    my $tags = {};
    foreach my $peg (@$pegs)
    {
	my $left   = &process_neigh($peg,\%before,\%val);
	my $right  = &process_neigh($peg,\%after,\%val);
	$tags->{$peg} = $left . "::" . $right;
    }
    return ($tags,[map { [$_,$val2func{$_}] } sort { $a <=> $b } keys(%val2func)]);
}

sub process_neigh {
    my($peg,$neigh,$val) = @_;

    my @pieces = ();
    my $tuples = $neigh->{$peg};
    foreach my $tuple (@$tuples)
    {
	my($peg1,$func1) = @$tuple;
	my $x = $val->{$func1};
	if ($x)
	{
	    push(@pieces,$x);
	}
    }
    return join(" ",@pieces);
}
    
sub reference_set_for_paralogs {
    my($genomes,$roles,$keep_multifunctional,$max_sc) = @_;

    if (! $max_sc) { $max_sc = 1.0e-5 }
    my %ref_genomes = map { $_ => 1 } @$genomes;
    my %ref_roles = map { $_ => 1 } @$roles;
    my $fig    = new FIG;
    my %pegs;
    foreach my $role (@$roles)
    {
	foreach my $peg ($fig->prots_for_role($role))
	{
	    if ($ref_genomes{&FIG::genome_of($peg)} && (! $fig->screwed_up($peg)))
	    {
		my $func = $fig->function_of($peg,"",1);
		if ($ref_roles{$func} || ($keep_multifunctional && &role_in($func,$role)))
		{
#		    print STDERR "got $peg\n";
		    $pegs{$peg} = $func;
		}
	    }
	}
    }
    my @seqs = map { my $peg = $_; 
		     my $gs = $fig->genus_species(&FIG::genome_of($peg));
		     my $func_of_peg = $pegs{$peg};
		     [$peg,"$func_of_peg \[$gs\]",$fig->get_translation($peg)] 
		   } 
                   keys(%pegs);

#   Now add paralogs from given genomes
    my @to_add = ();
    foreach my $tuple (@seqs)
    {
	my($peg,$comment,$seq) = @$tuple;
	my $genome = &FIG::genome_of($peg);
	if (! -s "$FIG_Config::organisms/$genome/Features/peg/fasta.psq")
	{
	    system "$FIG_Config::ext_bin/formatdb -p T -i $FIG_Config::organisms/$genome/Features/peg/fasta";
	}
	my @blastout = &FIG::blastitP($peg,$seq,"$FIG_Config::organisms/$genome/Features/peg/fasta",$max_sc,"-FF");

	foreach my $sim (@blastout)
	{
	    if (! $pegs{$sim->id2})
	    {
		$pegs{$sim->id2} = $fig->function_of($sim->id2);
		my $gs = $fig->genus_species(&FIG::genome_of($sim->id2));
		push(@to_add,[$sim->id2,"$pegs{$sim->id2} \[$gs\]",$fig->get_translation($sim->id2)]);
	    }
	}
    }
    push(@seqs,@to_add);
    if (@seqs < 2) { return undef }
    return &align_with_muscle(\@seqs);
}

sub role_in {
    my($func,$role) = @_;
    
    my @roles_of_function = &FIG::roles_of_function($func);
    my $i;
    for ($i=0; ($i < @roles_of_function) && ($role ne $roles_of_function[$i]); $i++) {}
    return ($i < @roles_of_function);
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3