[Bio] / FortyEight / rp_compute_peer_sims.pl Repository:
ViewVC logotype

View of /FortyEight/rp_compute_peer_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Fri Nov 7 22:42:29 2008 UTC (11 years ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, HEAD
Changes since 1.1: +1 -2 lines
First working versoin of the peer sims code.

#
# Compute the all-to-all sims between a set of organism directories.
#
# Sims are stored in the RAST organism directory, in a directory named
# by the target genome ID:
#	 jobdir/rp/XXXXXX.YY/sims/QQQQQ.RR
#
# Within that directory the forward sims (from XXXXXX.YY => QQQQQ.RR) are in a file sims,
# with a btree index sims.index.
# Expanded sims are in sims_exp with an index in sims_exp.indx.
#
# Flipped sims not stored; they will be found in the other organism's job dir.
#

use strict;
use Data::Dumper;
use Carp;
use DB_File;
use strict;
use FIG;
use FIG_Config;
use File::Basename;
use GenomeMeta;
use FileHandle;
use Sim;

my $meta;

@ARGV > 1 or die "Usage: $0 work-dir job-id job-id ...\n";

my $workdir = shift;
my @job_ids = @ARGV;

if (! -d $workdir)
{
    mkdir($workdir) or &fatal("mkdir $workdir failed: $!\n");
}

my @blast_args = qw(-p blastp -m 8 -e 1.0e-5);

#
# Map the job ids to org dirs, and collect a fasta file.
#

my $info = new FileHandle "$workdir/INFO", "w";

my @org_dirs;
my @fasta_files;
my %genome_to_dir;

for my $job_id (@job_ids)
{
    my $org_dir;
    my $genome;
    if ($job_id =~ /^\d+$/)
    {
	my $job_dir = "$FIG_Config::rast_jobs/$job_id";
	$genome = &FIG::file_head("$job_dir/GENOME_ID", 1);
	chomp $genome;
	$genome or &fatal("cannot find genome id for job $job_dir\n");

	$org_dir = "$job_dir/rp/$genome";
    }
    elsif ($job_id =~ m,^/,)
    {
	# Handle the case where we pass job directories or org dirs.

	if (-f "$job_id/GENOME_ID" and -d "$job_id/rp")
	{
	    my $job_dir = $job_id;
	    $genome = &FIG::file_head("$job_dir/GENOME_ID", 1);
	    chomp $genome;
	    $genome or &fatal("cannot find genome id for job $job_dir\n");
	    
	    $org_dir = "$job_dir/rp/$genome";
	}
	elsif (-f "$job_id/GENOME" and -d "$job_id/Features" and $job_id =~ m,/^(\d+\.\d+)$,)
	{
	    $org_dir = $job_id;
	    $genome = $1;
	}
	else
	{
	    &fatal("Unknown jobid $job_id\n");
	}
    }
    else
    {
	&fatal("Unknown jobid $job_id\n");
    }
    -d $org_dir or &fatal("org_dir $org_dir does not exist\n");

    my $fasta_file = "$org_dir/Features/peg/fasta";
    if (! -f $fasta_file)
    {
	warn "No fasta found for $org_dir\n";
	next;
    }

    push(@fasta_files, $fasta_file);
    
    print $info join("\t", $genome, $org_dir, $fasta_file), "\n";
    push(@org_dirs, $org_dir);
    $genome_to_dir{$genome} = $org_dir;
}

#
# Now write the combined fasta.
#

my $all_fasta = "$workdir/fasta";
my $fasta_fh = new FileHandle $all_fasta, "w";

#goto xx;
for my $file (@fasta_files)
{
    open(my $fh, "<", $file) or &fatal("cannot open $file: $!");
    my $buf;
    while (read($fh, $buf, 4096))
    {
	print $fasta_fh $buf;
    }
    close($fh);
}
close($fasta_fh);

&run("$FIG_Config::ext_bin/formatdb", "-p", "t", "-i", $all_fasta);

&run("$FIG_Config::ext_bin/blastall", @blast_args, "-i", $all_fasta, "-d", $all_fasta,
     "-o", "$workdir/sims.raw");

&run("$FIG_Config::bin/reformat_sims $all_fasta < $workdir/sims.raw > $workdir/sims.final");

#
# Read the generated sims (id1, id2, vals).
# A sim for id1, id2 is written to the file
# orgdir(genome-of(id1))/sims/genome-of(id2)
#
xx:
my $cur_genome;
open(S, "<", "$workdir/sims.final") or &fatal("Cannot open $workdir/sims.final: $!");
my($id1, $id2, $g1, $g2);
my($last_g1, $last_g2);
my %fhh;
my @new_files;
while (<S>)
{
    if (($id1, $g1, $id2, $g2) = /^(fig\|(\d+\.\d+)\S+)\t(fig\|(\d+\.\d+)\S+)/)
    {
	my $fh = $fhh{$g1, $g2};

	if (!$fh)
	{
	    my $sdir = "$genome_to_dir{$g1}/sims";
	    -d $sdir or mkdir $sdir or &fatal("cannot mkdir $sdir: $!");
	    
	    open($fh, ">", "$sdir/$g2") or &fatal("cannot write $sdir/$g2: $!");
	    push(@new_files, "$sdir/$g2");
	    $fhh{$g1, $g2} = $fh;
	}
	print $fh $_;
    }
}
close(S);
map { close($_) } values(%fhh);

for my $nf (@new_files)
{
    print "Index $nf\n";
    &index_sims($nf, "$nf.index");
}

exit(0);


#
# Use the C index_sims_file app to create a berkeley db index
# of the sims file.
#

sub index_sims
{
    my($sims, $index_file) = @_;

    my $path = &FIG::find_fig_executable("index_sims_file");

    open(IDX, "$path 0 < $sims |") or &fatal("Cannot open index_sims_file pipe: $!\n");

    my %index;
    my $tied = tie %index, 'DB_File', $index_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;

    $tied or &fatal("Creation of hash $index_file failed: $!\n");

    while (<IDX>)
    {
	chomp;
	my($peg, undef, $seek, $len) = split(/\t/);
	
	$index{$peg} = "$seek,$len";
    }
    close(IDX);
    
    $tied->sync();
    untie %index;
}

sub fatal
{
    my($msg) = @_;

    if ($meta)
    {
	$meta->add_log_entry($0, ['fatal error', $msg]);
	$meta->set_metadata("scenario.running", "no");
	$meta->set_metadata("status.scenario", "error");
    }

    croak "$0: $msg";
}
    
sub run
{
    my(@cmd) = @_;
    print "Run @cmd\n";
    my $rc = system(@cmd);
    if ($rc != 0)
    {
	&fatal("error $rc running @cmd\n");
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3