[Bio] / FigKernelScripts / pg_make_md5_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_make_md5_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Wed Apr 10 19:15:29 2013 UTC (6 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.4: +4 -1 lines
Modifications to pangenome code to move common code to PG.pm.

use strict;
use Data::Dumper;
use Digest::MD5;
use Getopt::Long;
use PG;

my $usage = "usage: pg_make_md5_families -d Data";

my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

my $pg = new PG($dataD);

my $n = 1;
my($peg_to_seq,$seq_to_pegs)  = $pg->load_seqs();
open(OUT,">$dataD/md5.fams") || die "could not open $dataD/md5.fams";

foreach $_ (`cat $dataD/protein.families`)
{
    chop;
    my @pegs = split(/\t/,$_);
    foreach my $peg (@pegs)
    {
	my $seq = $peg_to_seq->{$peg};
	my $md5 = Digest::MD5::md5_hex( uc $seq );
	print OUT "$n\t$md5\n";
    }
    $n++;
}

sub load_seqs {
    my($dataD) = @_;

    my $peg_to_seq = {};
    my $seq_to_pegs = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	$/ = "\n>";
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/Features/peg/fasta`)
	{
	    chomp;
	    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	    {
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$peg_to_seq->{$peg} = $seq;
		push(@{$seq_to_pegs->{$seq}},$peg);
	    }
	}
	$/ = "\n";
    }

    if (-s "$dataD/anno.seed")
    {
	foreach my $g (`cat $dataD/anno.seed`)
	{
	    chop $g;
	    $/ = "\n>";
	    foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/Features/peg/fasta`)
	    {
		chomp;
		if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
		{
		    my $peg = $1;
		    my $seq = $2;
		    $seq =~ s/\s//gs;
		    $peg_to_seq->{$peg} = $seq;
		    push(@{$seq_to_pegs->{$seq}},$peg);
		}
	    }
	    $/ = "\n";
	}
    }
    return ($peg_to_seq,$seq_to_pegs);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3