[Bio] / FortyEightMeta / ExpandSims.pm Repository:
ViewVC logotype

View of /FortyEightMeta/ExpandSims.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri Mar 23 20:41:31 2007 UTC (12 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
initial checking of metagenome 48 stuff

use FIG;
use DB_File;
use FIG_Config;
use strict;

use vars '@EXPORT';

@EXPORT = qw(load_peg_syns expand_sims);

sub load_peg_syns
{
    my($fh) = @_;
    warn "Start processing syns\n";
    my($to, $to_len, $from);
    my $peg_mapping;
    while (defined($_ = <$fh>))
    {
	if (($to, $to_len, $from) =  /^([^,]+),(\d+)\t(\S+)/)
	{
	    my @from = map { [ split(/,/, $_) ] } split(/;/,$from);
	    if (@from > 0)
	    {
		$peg_mapping->{$to} = [$to_len, \@from];
	    }
	}
    }
    warn "finished with syns\n";
    close($fh);

    return $peg_mapping;
}

sub expand_sims
{
    my($fig, $sim_dest, $exp_sim_dest, $peg_mapping) = @_;

    open(SIMS, "<$sim_dest") or die("Cannot open similarities file $sim_dest: $!\n");
    open(EXP, ">$exp_sim_dest") or die("Cannot open expanded similarities file $exp_sim_dest for writing: $!\n");

    while (<SIMS>)
    {
	chomp;
	my @s = split(/\t/, $_);
	
	my $id2 = $s[1];
	my $id1 = $s[0];
	my @relevant = ();
	
	#
	# Find contig / location info
	#
	
	my $loc1 = $fig->feature_location($id1);
	my($contig1, $beg1, $end1) = $loc1 =~ /(.*)_(\d+)_(\d+)$/;
	($beg1, $end1) = ($end1, $beg1) if $end1 < $beg1;
	
	my ($genome1) = $id1 =~ /^fig\|(\d+\.\d+)/;
    
	#    my @maps_to = $fig->mapped_prot_ids( $id2 );
	my $mapping = $peg_mapping->{$id2};
	if ($mapping)
	{
	    my($ref_len, $pairs) = @$mapping;
	    
	    my @maps_to = grep { $_->[0] !~ /^xxx\d+/ } @$pairs;
	    
	    my $seen = {};
	    foreach my $x ( @maps_to )
	    {
		next if @$x == 0;
		my ( $x_id, $x_ln ) = @$x;
		
		#
		# Find contig / location info
		#
		
		my @loc;
		
		if  ($x_id =~ /^fig\|(\d+\.\d+)/)
		{
		    my $genome2 = $1;
		    my $loc2 = $fig->feature_location($x_id);
		    my($contig2, $beg2, $end2) = $loc2 =~ /(.*)_(\d+)_(\d+)$/;
		    ($beg2, $end2) = ($end2, $beg2) if $end2 < $beg2;
		    @loc = ($genome1, $contig1, $beg1, $end1, $genome2, $contig2, $beg2, $end2);
		}
		
		next if $seen->{$x_id};
		$seen->{$x_id} = 1;
		
		my $delta2  = $ref_len - $x_ln; # Coordinate shift
		my $sim1    = [ @s ]; # Make a copy
		$sim1->[1]  = $x_id;
		$sim1->[8] -= $delta2;
		$sim1->[9] -= $delta2;
		
		print EXP join("\t", @$sim1, @loc), "\n";
	    }
	}
	else
	{
	    my @loc;
	    if  ($id2 =~ /^fig\|(\d+\.\d+)/)
	    {
		my $genome2 = $1;
		my $loc2 = $fig->feature_location($id2);
		my($contig2, $beg2, $end2) = $loc2 =~ /(.*)_(\d+)_(\d+)$/;
		($beg2, $end2) = ($end2, $beg2) if $end2 < $beg2;
		@loc = ($genome1, $contig1, $beg1, $end1, $genome2, $contig2, $beg2, $end2);
	    }
	    print EXP join("\t", @s, @loc), "\n";
	}
    }
    close(SIMS);
    close(EXP);

    index_sims($exp_sim_dest);
}

#
# And index.
#
# Use the C index_sims_file app to create a berkeley db index
# of the sims file.
#

sub index_sims
{
    my($exp_sim_dest) = @_;
    
    -f $exp_sim_dest or die "expanded sims file $exp_sim_dest does not exist\n";
    open(IDX, "$FIG_Config::bin/index_sims_file 0 < $exp_sim_dest |") or die "Cannot open index_sims_file pipe: $!\n";
    
    my $index_file = "$exp_sim_dest.index";
    my %index;
    my $tied = tie %index, 'DB_File', $index_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;
    
    $tied or &fatal("Creation of hash $index_file failed: $!\n");
    
    while (<IDX>)
    {
	chomp;
	my($peg, undef, $seek, $len) = split(/\t/);
	
	$index{$peg} = "$seek,$len";
    }
    close(IDX);
    
    $tied->sync();
    untie %index;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3