[Bio] / FigKernelScripts / compute_potential_fusions.pl Repository:
ViewVC logotype

View of /FigKernelScripts/compute_potential_fusions.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (download) (as text) (annotate)
Tue Jan 10 16:35:55 2006 UTC (14 years, 1 month ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.11: +10 -1 lines
fix to fusions

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use strict;
use FIG;
my $fig = new FIG;

# usage: compute_potential_fusions PEG1 PEG2... < pegs  [ NOTE: You need to give it STDIN, even as /dev/null ]
my($peg);

foreach $peg (@ARGV)
{
    &compute($fig,$peg);
}

while (defined($_ = <STDIN>))
{
    if ($_ =~ /^(fig\|\d+\.\d+\.peg\.\d+)/)
    {
	&compute($fig,$1);
    }
}

sub compute {
    my($fig,$peg) = @_;
    my($fusion,$sim);

#   print STDERR "processing peg $peg\n";
    my $genome1 = &FIG::genome_of($peg);

    if (! $fig->possibly_truncated($peg))
    {
	my $fusions = {};
	my @sims = grep { ! $fig->possibly_truncated($_->id2) } $fig->sims($peg,500,1.0e-20,"fig");

	my($genome2,%by_genome);
	undef %by_genome;
	foreach $sim (@sims)
	{
	    $genome2 = &FIG::genome_of($sim->id2);
	    push(@{$by_genome{$genome2}},$sim);
	}

	foreach $genome2 (keys(%by_genome))
	{
#	    print STDERR "    checking $genome2\n";
	    my $sims1 = $by_genome{$genome2};

	    my $sim = $sims1->[0];
	    my $b1  = $sim->b1;
	    my $e1  = $sim->e1;
	    my $ln1 = $sim->ln1;
	    my $b2  = $sim->b2;
	    my $e2  = $sim->e2;
	    my $ln2 = $sim->ln2;
	    
	    if (&covers($b1,$e1,$ln1) && &just_portion($b2,$e2,$ln2) && 
		((@$sims1 == 1) || &big_gap($sims1->[0]->psc,$sims1->[1]->psc)))
	    {
#	        print STDERR "        checking in fusion",$fig->genus_species($fig->genome_of($sim->id2)),"\n";
		my @sims2 = grep { ! $fig->possibly_truncated($_->id2) }
		            grep { &FIG::genome_of($_->id2) eq $genome1 }
	                    $fig->sims($sim->id2,500,1.0e-20,"fig");

		if (@sims2 > 0)
		{
		    &check_fusion($fig,$fusions,$sim->id2,\@sims2);
		}
	    }
	    elsif (&covers($b2,$e2,$ln2) && &just_portion($b1,$e1,$ln1))
	    {
#	        print STDERR "        checking fusion",$fig->genome_of($sim->id2),"\n";
		if (@$sims1 > 0)
		{
		    &check_fusion($fig,$fusions,$peg,$sims1);
		}
	    }
	}

	foreach $fusion (keys(%$fusions))
	{
	    &display_fusion($fig,$fusions->{$fusion});
	}
    }
 }

sub big_gap {
    my($psc1,$psc2) = @_;

    return &FIG::neg_log($psc1) > (&FIG::neg_log($psc2) + 10);
}

sub covers {
    my($b,$e,$ln) = @_;

    return (($e - $b) > (0.75 * $ln));
}

sub just_portion {
    my($b,$e,$ln) = @_;

    return (($b > 100) || ($e < ($ln - 100)));
}

sub check_fusion {
    my($fig,$fusions,$peg,$sims) = @_;
    my($sim,@pieces,$sum_covered,$i,$last,$key);

#   print STDERR &Dumper(["checking",$peg,$sims]);
    my $ln = $sims->[0]->ln1;
    foreach $sim (@$sims)
    {
	my $b1  = $sim->b1;
	my $e1  = $sim->e1;
	my $ln1 = $sim->ln1;
	my $b2  = $sim->b2;
	my $e2  = $sim->e2;
	my $ln2 = $sim->ln2;

	if (&covers($b2,$e2,$ln2) && &just_portion($b1,$e1,$ln1))
	{
	    push(@pieces,[$b1,$e1,$sim->id2,$b2,$e2,$ln2]);
	}
    }
    @pieces = sort { $a->[0] <=> $b->[0] } @pieces;
#    print STDERR &Dumper(["pieces",\@pieces]);

    $sum_covered = 0;
    my @good_pieces = ();
    for ($i = 0,$last = -1; ($i < @pieces); $i++)
    {
	if ($pieces[$i]->[0] > ($last + 1))
	{
	    push(@good_pieces,$pieces[$i]);
	    $sum_covered += ($pieces[$i]->[1] - $pieces[$i]->[0]) + 1;

	}
	$last = $pieces[$i]->[1];
    }

#   print STDERR "ln=$ln covered=$sum_covered\n";
    if (($sum_covered > (0.75 * $ln)) && (@good_pieces > 1) && &all_good($fig,$peg,\@good_pieces))
    {
	$key = join("\t",($peg,map { $_->[2] } @good_pieces));
#	print STDERR "GOT: $key\n";
	if (! $fusions->{$key})
	{
	    $fusions->{$key} = [$peg,$ln,@good_pieces];
	}
    }
}
	
sub all_good {
    my($fig,$peg1,$good_pieces) = @_;

    my $genome1 = &FIG::genome_of($peg1);
    my($i,$j);
    for ($i=0; ($i < @$good_pieces); $i++)
    {
	for ($j = $i+1; ($j < @$good_pieces); $j++)
	{
	    if ($good_pieces->[$i]->[2] eq $good_pieces->[$j]->[2]) { return 0 }
	}
    }

    foreach my $piece (@$good_pieces)
    {
	my $peg2 = $piece->[2];
	my @sims = grep { &FIG::genome_of($_->id2) eq $genome1 }
	           $fig->sims($peg2,500,1.0e-20,"fig");
	if ((@sims == 0) || ($sims[0]->id2 ne $peg1))
	{
	    return 0;
	}
    }
    return 1;
}

sub display_fusion {
    my($fig,$fusion) = @_;
    
    my $peg = shift @$fusion;
    my $ln  = shift @$fusion;
    print "$peg\t$ln\t",join("\t", map { join(",",@$_) } @$fusion),"\n";
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3