[Bio] / FigKernelScripts / pg_form_mobile_element_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_form_mobile_element_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Wed Apr 10 19:15:29 2013 UTC (6 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.2: +8 -2 lines
Modifications to pangenome code to move common code to PG.pm.

use strict;
use Data::Dumper;
use Getopt::Long;
use BlastInterface;
use gjoseqlib;
use PG;

my $usage = "usage: pg_form_mobile_element_families -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

my $pg = new PG($dataD);

my($peg_to_seq,$seq_to_pegs)  = $pg->load_seqs();

my %me_pegs = map { (($_ =~ /^(\S+)\t(\S.*\S)?$/) ) ? ($1 => ($2 ? $2 : '')) : () } 
              `cat $dataD/possibly.alien.pegs`;
#my %me_pegs = map { (($_ =~ /^(\S+)\t(\d+)\t(\S.*\S)?$/) && ($2 >= 60)) ? ($1 => ($3 ? $3 : '')) : () } 
#              `cat $dataD/possibly.alien.pegs`;
my @seqs = grep { length($_->[2]) > 30 } map { [$_,$me_pegs{$_},$peg_to_seq->{$_}] } keys(%me_pegs);
my @sims = &BlastInterface::blast(\@seqs,\@seqs,'blastp',{excludeSelf => 1,minIden => 0.5 });
open(MEFAMS,"| cluster_objects > $dataD/mobile.element.families") || die "could not open $dataD/mobile.element.families";
foreach my $sim (@sims)
{
    print MEFAMS "$sim->[0]\t$sim->[1]\n";
}
close(MEFAMS);

sub load_seqs {
    my($dataD) = @_;

    my $peg_to_seq = {};
    my $seq_to_pegs = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	$/ = "\n>";
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/Features/peg/fasta`)
	{
	    chomp;
	    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	    {
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$peg_to_seq->{$peg} = $seq;
		push(@{$seq_to_pegs->{$seq}},$peg);
	    }
	}
	$/ = "\n";
    }

    if (-s "$dataD/anno.seed")
    {
	foreach my $g (`cat $dataD/anno.seed`)
	{
	    chop $g;
	    $/ = "\n>";
	    foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/Features/peg/fasta`)
	    {
		chomp;
		if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
		{
		    my $peg = $1;
		    my $seq = $2;
		    $seq =~ s/\s//gs;
		    $peg_to_seq->{$peg} = $seq;
		    push(@{$seq_to_pegs->{$seq}},$peg);
		}
	    }
	    $/ = "\n";
	}
    }
    return ($peg_to_seq,$seq_to_pegs);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3