[Bio] / FigKernelScripts / possible_projections.pl Repository:
ViewVC logotype

View of /FigKernelScripts/possible_projections.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu Mar 1 20:50:45 2012 UTC (7 years, 8 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, mgrast_version_3_2, rast_rel_2014_0912, HEAD
for veronika projections

use strict;
use FIG;
my $fig = new FIG;
use Data::Dumper;
use SeedEnv;

my $usage = "usage:: possible_projections CorrDir anno.data > new.assignments";

my($corrD,$anno_data);
(
 ($corrD       = shift @ARGV) && (-d $corrD) &&
 ($anno_data   = shift @ARGV)
)
    || die $usage;

my %func_of;
open(ANNO,"<$anno_data") || die "could not open $anno_data";
my %genomes;
while (defined($_ = <ANNO>))
{
    chop;
    my($role,$peg,$func) = split(/\t/,$_);
    &set_funcs_of_md5($fig,$peg,$func,\%func_of);
    my $g = &SeedUtils::genome_of($peg);
    $genomes{$g}->{$role}->{$peg} = $func;
}
close(ANNO);

opendir(CORR,$corrD) || die "could not open $corrD";
foreach $_ (grep { $_ !~ /^\./ } readdir(CORR))
{
    if ($_ =~ /^(\d+\.\d+)-(\d+\.\d+)$/)
    {
	my($g1,$g2) = ($1,$2);
	if ($genomes{$g1})
	{
	    &process($fig,$corrD,\%genomes,$g1,$g2,0,\%func_of);
	}

	if ($genomes{$g2})
	{
	    &process($fig,$corrD,\%genomes,$g1,$g2,1,\%func_of);
	}
    }
}

sub set_funcs_of_md5 {
    my($fig,$peg,$func,$func_of) = @_;

    if ($func_of{$peg} eq $func) { return }

    my $md5    = $fig->md5_of_peg($peg);
    my @pegs   = $fig->pegs_with_md5($md5);
    foreach my $peg2 (@pegs)
    {
	my $curr = &function_of($fig,$peg2,$func_of);
	if (($curr ne $func) && $fig->is_real_feature($peg2))
	{
	    print "$peg2\t$func\n";
	    $func_of->{$peg2} = $func;
	}
    }
}

sub function_of {
    my($fig,$peg,$func_of) = @_;

    if ($func_of{$peg})  { return $func_of{$peg} }
    return scalar $fig->function_of($peg);
}

sub process {
    my($figs,$corrD,$genomes,$g1,$g2,$flipped,$func_of) = @_;

    my $roleH = $genomes->{$g1};
    if (! $roleH) { return }
    my @roles = keys(%$roleH);
    my %from_pegs;
    foreach my $role (@roles)
    {
	my $pegH = $roleH->{$role};
	foreach my $peg (keys(%$pegH))
	{
	    $from_pegs{$peg} = $role;
	}
    }

    my @mapped = map { $from_pegs{$_->[0]} ? [$_->[0],$_->[1]] : [$_->[1],$_->[0]] }
                 grep {($from_pegs{$_->[0]} || $from_pegs{$_->[1]}) && &solid($_) }
                 map { chop; [split(/\t/,$_)] }
                 `cat $corrD/$g1-$g2`;

    my %mapped_roles = map { ($from_pegs{$_->[0]} => 1) } @mapped;
    if (keys(%mapped_roles) == @roles)
    {
	foreach my $pair (@mapped)
	{
	    my($from,$to) = @$pair;
	    &set_funcs_of_md5($fig,$to,&function_of($fig,$from,$func_of),$func_of);
	}
    }
}

sub one_hits {
    my($roleH,$hits) = @_;
    
    foreach my $peg (keys(%$roleH))
    {
	if ($hits->{$peg})
	{
	    return 1;
	}
    }
    return 0;
}

sub solid {
    my($x) = @_;    
# $x is a correspondence entry (see http://pubseed.theseed.org/sapling/server.cgi?pod=ServerThing#Gene_Correspondence_List)
    if (($x->[8] eq "<=>") &&     # bi-directional best-hit
	($x->[9] >= 60)    &&     # require 60% identity
	((($x->[12]+1 - $x->[11]) / $x->[13]) >= 0.7) &&   # require coverage of 70% on protein1
	((($x->[15]+1 - $x->[14]) / $x->[16]) >= 0.7) &&   # require coverage of 70% on protein2
	($x->[2] >= 4))           # at least 4 genes in context
    {
	return 1;
    }
#   if ($x->[2] >= 2) { print STDERR &Dumper($x) }
    return 0;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3