[Bio] / FigKernelScripts / expand_sims.pl Repository:
ViewVC logotype

View of /FigKernelScripts/expand_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Feb 18 21:31:38 2008 UTC (11 years, 9 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.1: +3 -1 lines
*** empty log message ***

#
# Expand a sims file.
#
# Usage: expand_sims peg.synonmyms sims expanded_sims
#
# Writes expanded_sims, expanded_sims.index, expanded_sims.flips, expanded_sims.flips.index
#

use strict;
use FIG;
use FIGV;
use DB_File;

my $orgdir;
while ((@ARGV > 0) && ($ARGV[0] =~ /^-/))
{
    my $arg = shift @ARGV;
    if ($arg =~ /^-orgdir/)
    {
	$orgdir = shift @ARGV;
    }
    else
    {
	die "Unknown option $arg\n";
    }
}

@ARGV == 3 or die "Usage: expand_sims [-orgdir organism-dir] peg.synonyms sims expanded_sims";

my $peg_syn = shift;
my $sims = shift;
my $exp_sims = shift;

my $fig;
if ($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIG;
}
    
#
# See if there are preprocessed indexes available.
#
   
my $pegsyn_to = "$peg_syn.index.t";
my $pegsyn_from = "$peg_syn.index.f";

my %peg_mapping;
my(%ps_to, %ps_from);
my $get_mapping;
if (-f $pegsyn_to and -f $pegsyn_from)
{

    my $tie = tie %ps_to, 'DB_File', $pegsyn_to, O_RDONLY, 0666, $DB_BTREE;

    $tie or &fatal("cannot tie $pegsyn_to: $!");

    my $tie = tie %ps_from, 'DB_File', $pegsyn_from, O_RDONLY, 0666, $DB_BTREE;

    $tie or &fatal("cannot tie $pegsyn_from: $!");
    $get_mapping = \&get_tied_mapping;
}
else
{
    warn "Start processing syns ($pegsyn_to $pegsyn_from)\n";
    my($to, $to_len, $from);
    while (defined($_ = <SYNS>))
    {
	if (($to, $to_len, $from) =  /^([^,]+),(\d+)\t(\S+)/)
	{
	    my @from = map { [ split(/,/, $_) ] } split(/;/,$from);
	    if (@from > 0)
	    {
		$peg_mapping{$to} = [$to_len, \@from];
	    }
	}
    }
    warn "finished with syns\n";

    $get_mapping = sub { $peg_mapping{$_[0]} };
}
close(SYNS);

#
# Now perform the expansion.
#

open(SIMS, "<$sims") or &fatal("Cannot open similarities file $sims: $!\n");
open(EXP, ">$exp_sims") or &fatal("Cannot open expanded similarities file $exp_sims for writing: $!\n");

while (<SIMS>)
{
    chomp;
    my @s = split(/\t/, $_);
    
    my $id2 = $s[1];
    my $id1 = $s[0];
    my @relevant = ();

    #
    # Find contig / location info
    #
    
    my $loc1 = $fig->feature_location($id1);
    my($contig1, $beg1, $end1) = $loc1 =~ /(.*)_(\d+)_(\d+)$/;
    ($beg1, $end1) = ($end1, $beg1) if $end1 < $beg1;

    my ($genome1) = $id1 =~ /^fig\|(\d+\.\d+)/;
    
#    my @maps_to = $fig->mapped_prot_ids( $id2 );

    my $mapping = &$get_mapping($id2);
#    my $mapping = $peg_mapping{$id2};
    if ($mapping)
    {
	my($ref_len, $pairs) = @$mapping;
	
	my @maps_to = grep { $_->[0] !~ /^xxx\d+/ } @$pairs;
	
	my $seen = {};
	foreach my $x ( @maps_to )
	{
	    next if @$x == 0;
	    my ( $x_id, $x_ln ) = @$x;
	    
	    #
	    # Find contig / location info
	    #

	    my @loc;
	    
	    if  ($x_id =~ /^fig\|(\d+\.\d+)/)
	    {
		my $genome2 = $1;
		my $loc2 = $fig->feature_location($x_id);
		my($contig2, $beg2, $end2) = $loc2 =~ /(.*)_(\d+)_(\d+)$/;
		($beg2, $end2) = ($end2, $beg2) if $end2 < $beg2;
		@loc = ($genome1, $contig1, $beg1, $end1, $genome2, $contig2, $beg2, $end2);
	    }
			    
	    next if $seen->{$x_id};
	    $seen->{$x_id} = 1;
	    
	    my $delta2  = $ref_len - $x_ln; # Coordinate shift
	    my $sim1    = [ @s ]; # Make a copy
	    $sim1->[1]  = $x_id;
	    $sim1->[8] -= $delta2;
	    $sim1->[9] -= $delta2;
	    
	    print EXP join("\t", @$sim1, @loc), "\n";
	}
    }
    else
    {
	my @loc;
	if  ($id2 =~ /^fig\|(\d+\.\d+)/)
	{
	    my $genome2 = $1;
	    my $loc2 = $fig->feature_location($id2);
	    my($contig2, $beg2, $end2) = $loc2 =~ /(.*)_(\d+)_(\d+)$/;
	    ($beg2, $end2) = ($end2, $beg2) if $end2 < $beg2;
	    @loc = ($genome1, $contig1, $beg1, $end1, $genome2, $contig2, $beg2, $end2);
	}
	print EXP join("\t", @s, @loc), "\n";
    }
}
close(SIMS);
close(EXP);
undef %peg_mapping;

#
# And index.
#
-f $exp_sims or die "expanded sims file $exp_sims does not exist\n";
my $index_file = "$exp_sims.index";

index_sims($exp_sims, $index_file);

#
# Flip and index the flips.
#

my $flipped_sims = "$exp_sims.flips";
my $flipped_sims_index = "$exp_sims.flips.index";
my $rc = system("$FIG_Config::bin/flip_sims", $exp_sims, $flipped_sims);
if ($rc != 0)
{
    &fatal("Error flipping $exp_sims to $flipped_sims\n");
}

index_sims($flipped_sims, $flipped_sims_index);


exit(0);

#
# Retrieve mapping data thru hashes.
#

sub get_tied_mapping
{
    my($peg) = @_;

    my $dat = $ps_to{$peg};
    my($to_len, $from) = split(/:/, $dat, 2);
    my @from = map { [ split(/,/, $_) ] } split(/;/,$from);
    if (@from > 0)
    {
	return [$to_len, \@from];
    }
    else
    {
	return ();
    }
}


#
# Use the C index_sims_file app to create a berkeley db index
# of the sims file.
#

sub index_sims
{
    my($sims, $index_file) = @_;

    my $path = &FIG::find_fig_executable("index_sims_file");

    open(IDX, "$path 0 < $sims |") or die "Cannot open index_sims_file pipe: $!\n";

    my %index;
    my $tied = tie %index, 'DB_File', $index_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;

    $tied or &fatal("Creation of hash $index_file failed: $!\n");

    while (<IDX>)
    {
	chomp;
	my($peg, undef, $seek, $len) = split(/\t/);
	
	$index{$peg} = "$seek,$len";
    }
    close(IDX);
    
    $tied->sync();
    untie %index;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3