[Bio] / FigKernelPackages / ParalogResolution.pm Repository:
ViewVC logotype

View of /FigKernelPackages/ParalogResolution.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Wed Jul 1 00:08:52 2009 UTC (10 years, 4 months ago) by golsen
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, mgrast_dev_04052011, mgrast_dev_02222011
Changes since 1.4: +86 -32 lines
Modifications to support line-drawing character set in tree printer plots.

# -*- perl -*-
########################################################################
# Copyright (c) 2003-2009 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
########################################################################

package ParalogResolution;

use strict;
use FIG;

use Data::Dumper;
use Carp;
use gjoalignment;

sub context_tags {
    my($pegs,$dist) = @_;

    my $fig = new FIG;
    my($i,%before,%after,%funcs);
    foreach my $peg (@$pegs)
    {
	my $loc = $fig->feature_location($peg);
	if ($loc)
	{
	    my($contig,$beg,$end) = $fig->boundaries_of($loc);
	    if ($contig && $beg && $end)
	    {
		my $min = &FIG::min($beg,$end) - $dist;
		my $max = &FIG::max($beg,$end) + $dist;
		my $feat;
		($feat,undef,undef) = $fig->genes_in_region(&FIG::genome_of($peg),$contig,$min,$max);
		my(@before,@after,$seen_peg);
		for ($i=0; ($i < @$feat); $i++)
		{
		    my $fid = $feat->[$i];
		    if (&FIG::ftype($fid) eq 'peg')
		    {
			if ($fid eq $peg) 
			{
			    $seen_peg = 1;
			    next;
			}
			my $func = $fig->function_of($fid,"",1);
			$funcs{$func}++;

			if ((($beg < $end) && (! $seen_peg)) || 
			    (($beg > $end) && $seen_peg))
			{
			    push(@before,[$fid,$func]);
			}
			else
			{
			    push(@after,[$fid,$func]);
			}
		    }
		}
		$before{$peg} = [($beg < $end) ? @before : reverse @before];
		$after{$peg}  = [($beg < $end) ? @after  : reverse @after];
	    }
	}
    }
    my @hits = sort { $funcs{$b} <=> $funcs{$a} } 
               grep { $funcs{$_} > 1 } 
               keys(%funcs);

    my %val;
    my %val2func;
    for ($i=0; ($i < @hits); $i++)
    {
	$val{$hits[$i]} = $i+1;
	$val2func{$i+1} = $hits[$i];
    }
    my $tags = {};
    foreach my $peg (@$pegs)
    {
	my $left   = &process_neigh($peg,\%before,\%val);
	my $right  = &process_neigh($peg,\%after,\%val);
	$tags->{$peg} = $left . "::" . $right;
    }
    return ($tags,[map { [$_,$val2func{$_}] } sort { $a <=> $b } keys(%val2func)]);
}

sub process_neigh {
    my($peg,$neigh,$val) = @_;

    my @pieces = ();
    my $tuples = $neigh->{$peg};
    foreach my $tuple (@$tuples)
    {
	my($peg1,$func1) = @$tuple;
	my $x = $val->{$func1};
	if ($x)
	{
	    push(@pieces,$x);
	}
    }
    return join(" ",@pieces);
}


sub reference_set_for_paralogs {
    my( $genomes, $roles, $parms ) = @_;

    $parms ||= {};

    my $keep_multifunctional = $parms->{'keep_multifunctional'};
    if (! defined($keep_multifunctional)) { $keep_multifunctional = 1 }

    my $max_sc               = $parms->{'max_sc'};
    if (! defined($max_sc))  { $max_sc = 1.0e-5 }

    my $min_cov              = $parms->{'min_cov'};
    if (! defined($min_cov)) { $min_cov = 0.7 }

    if (! $max_sc) { $max_sc = 1.0e-5 }
    my %ref_genomes = map { $_ => 1 } @$genomes;
    my %ref_roles = map { $_ => 1 } @$roles;
    my $fig    = new FIG;
    my %pegs;
    foreach my $role ( @$roles )
    {
	foreach my $peg ($fig->prots_for_role($role))
	{
	    if ( $ref_genomes{ &FIG::genome_of( $peg ) } && ( ! $fig->screwed_up( $peg ) ) )
	    {
		my $func = $fig->function_of($peg,"",1);
		#  Original version could be confused by comments
		if ( 0 )
		{
		    if ($ref_roles{$func} || ($keep_multifunctional && &role_in($func,$role)))
		    {
#		        print STDERR "got $peg\n";
		        $pegs{$peg} = $func;
		    }
		}
                else
                {
                    my $funcF = $func;
                    $funcF =~ s/ ##? .*$//;
                    $pegs{ $peg } = $func if ! ( $funcF =~ / \/ / );
                }
            }
        }
    }
    my @seqs = map { my $peg = $_; 
		     my $gs = $fig->genus_species(&FIG::genome_of($peg));
		     my $func_of_peg = $pegs{$peg};
		     [$peg,"$func_of_peg \[$gs\]",$fig->get_translation($peg)] 
		   } 
                   keys(%pegs);

#   Now add paralogs from given genomes
#
#  Original code does not add paralogs in genomes that do not already have a
#  gene annotated with a requested role.

    my @to_add = ();
    if ( 0 )
    {
        foreach my $tuple (@seqs)
        {
            my($peg,$comment,$seq) = @$tuple;
            my $genome = &FIG::genome_of($peg);
            if (! -s "$FIG_Config::organisms/$genome/Features/peg/fasta.psq")
            {
                system "$FIG_Config::ext_bin/formatdb -p T -i $FIG_Config::organisms/$genome/Features/peg/fasta";
            }
            my @blastout = &FIG::blastitP($peg,$seq,"$FIG_Config::organisms/$genome/Features/peg/fasta",$max_sc,"-FF");
    
            foreach my $sim (@blastout)
            {
                if ((! $pegs{$sim->id2}) && $fig->is_real_feature($sim->id2) &&
                    ((($sim->e2 + 1 - $sim->b2) /$sim->ln2) >= $min_cov))
                {
                    $pegs{$sim->id2} = $fig->function_of($sim->id2);
                    my $gs = $fig->genus_species(&FIG::genome_of($sim->id2));
                    push(@to_add,[$sim->id2,"$pegs{$sim->id2} \[$gs\]",$fig->get_translation($sim->id2)]);
                }
            }
        }
    }
    else
    {
        foreach my $genome ( @$genomes )
        {
            #  Verify the blast db
            my $fasta = "$FIG_Config::organisms/$genome/Features/peg/fasta";
            if ( ! -s "$fasta.psq" )
            {
                next if ! -s $fasta;
                system "$FIG_Config::ext_bin/formatdb -p T -i $fasta";
                next if ! -s "$fasta.psq";
            }

            my $gs = $fig->genus_species( $genome );
            my %seq_done;
            foreach my $tuple ( @seqs )
            {
                my( $peg, $comment, $seq ) = @$tuple;
                next if $seq_done{ $seq }++;
                foreach my $sim ( &FIG::blastitP( $peg, $seq, $fasta, $max_sc, "-FF" ) )
                {
                    my $id2 = $sim->id2;
                    if ( ( ! $pegs{ $id2 } )
                      && $fig->is_real_feature( $id2 )
                      && ( ( ( $sim->e2 + 1 - $sim->b2 ) / $sim->ln2 ) >= $min_cov )
                       )
                    {
                        $pegs{ $id2 } = $fig->function_of( $id2 ) || 'undefined';
                        push( @to_add, [ $id2, "$pegs{$id2} \[$gs\]", $fig->get_translation($id2) ] );
                    }
                }
            }
        }
    }
    push( @seqs, @to_add );

    ( @seqs > 1 ) ? &gjoalignment::align_with_muscle( \@seqs )  # align, or align & tree
                  : wantarray ? () : undef;
}


sub role_in {
    my($func,$role) = @_;
    
    my @roles_of_function = &FIG::roles_of_function($func);
    my $i;
    for ($i=0; ($i < @roles_of_function) && ($role ne $roles_of_function[$i]); $i++) {}
    return ($i < @roles_of_function);
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3