[Bio] / FigKernelScripts / pg_compute_md5s.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_compute_md5s.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Apr 10 19:15:29 2013 UTC (6 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Modifications to pangenome code to move common code to PG.pm.

use strict;
use Data::Dumper;
use SeedUtils;
use Getopt::Long;
use FIGM;
use JSON::XS;

my $usage = "usage: pg_compute_md5s -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

#
# Compute a table of all the md5s of the pegs appearing in the genomes.
#

my $jobs = "/vol/rast-prod/jobs";

open(G, "<", "$dataD/genomes.with.job.and.genomeID") or die "cannot open $dataD/genomes.with.job.and.genomeID: $!";
my(@genomes, @dirs);
while (<G>)
{
    chomp;
    my($n, $orig, $rast, $genome) = split(/\t/);
    push(@genomes, $genome);
    push(@dirs, "$jobs/$rast/rp/$genome");
}
close(G);

my $fig = FIGM->new(undef, @dirs);

#
# We need to handle the annotator seed genomes as well
#
my @anno_genomes;
open(A, "<", "$dataD/anno.seed");
while (<A>)
{
    if (/(\d+\.\d+)/)
    {
	push(@anno_genomes, $1);
    }
}
close(A);

open(O, ">", "$dataD/md5sums") or die "Cannot open $dataD/md5sums: $!";
my %all;
for my $g (@anno_genomes, @genomes)
{
    print STDERR "$g\n";

    for my $peg ($fig->all_features($g, 'peg'))
    {
	my $trans = $fig->get_translation($peg);
	my $md5 = Digest::MD5::md5_hex( uc $trans );
	print O "$peg\t$md5\n";
    }
}
close(O);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3