[Bio] / FortyEight / rp_compute_pchs.pl Repository:
ViewVC logotype

View of /FortyEight/rp_compute_pchs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Tue Jan 19 00:09:03 2010 UTC (9 years, 10 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.4: +2 -0 lines
set metadata in_progress

#
# Compute Pchs from expanded sims.
#

use DB_File;

use Data::Dumper;
use Carp;
use strict;
use FIG;
use FIG_Config;
use File::Basename;
use GenomeMeta;
use Sim;

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $hostname = `hostname`;
chomp $hostname;

my $genome = &FIG::file_head("$jobdir/GENOME_ID");
chomp $genome;
$genome =~ /^\d+\.\d+/ or die "$0: Cannnot find genome ID for jobdir $jobdir\n";

my $meta = new GenomeMeta($genome, "$jobdir/meta.xml");

$meta->set_metadata("status.pchs", "in_progress");

my $genome_dir = "$jobdir/rp/$genome";

my $sims_file = "$jobdir/rp/$genome/expanded_similarities";
my $raw_pch_file = "$jobdir/rp/$genome/pchs.raw";
my $proc_pch_file = "$jobdir/rp/$genome/pchs";
my $pch_btree_file = "$jobdir/rp/$genome/pchs.btree";
my $pch_ev_btree_file = "$jobdir/rp/$genome/pchs.evidence.btree";
my $scored_pch_file = "$jobdir/rp/$genome/pchs.scored";
my $compute_pch_err_file = "$jobdir/rp.errors/compute_pchs.stderr";
my $filter_pch_err_file = "$jobdir/rp.errors/remove_clustered_pchs.stderr";
my $score_pch_err_file = "$jobdir/rp.errors/score_pchs.stderr";

my $cluster_cutoff = 70;

my $genome_sim_cache_file = "$FIG_Config::fortyeight_data/genome_similarity.cache";
my $genome_sim_cache;
if (-f $genome_sim_cache_file)
{
    $genome_sim_cache = "-cache $genome_sim_cache_file";
}
else
{
    $meta->add_log_entry($0, "warning: missing genome sim cache $genome_sim_cache_file");
}

#
# Compute PCHs
#

$meta->add_log_entry($0, "start PCH processing on $hostname in $jobdir");

my $cmd = "$FIG_Config::bin/compute_pchs_from_sims $sims_file $raw_pch_file 2>&1 >$compute_pch_err_file";
warn "Compute: $cmd\n";
my $rc = system($cmd);
if ($rc != 0)
{
    &fatal("pchs computation failed with rc=$rc");
}

#
# Remove clustered PCHs.
#

my $cmd = "$FIG_Config::bin/remove_clustered_pchs3 -orgdir $genome_dir $genome_sim_cache $cluster_cutoff ";
$cmd .= " < $raw_pch_file > $proc_pch_file 2>$filter_pch_err_file";

$meta->add_log_entry($0, "remove PCH clusters: $cmd");

my $rc = system($cmd);
if ($rc != 0)
{
    &fatal("remove_clustered_pchs3 computation failed with rc=$rc");
}
#
# compute simple scores
#

my $cmd = "$FIG_Config::bin/compute_simple_scores 4  < $proc_pch_file > $scored_pch_file 2>$score_pch_err_file";

$meta->add_log_entry($0, "score PCHs: $cmd");

my $rc = system($cmd);
if ($rc != 0)
{
    &fatal("compute_simple_scores computation failed with rc=$rc");
}

#
# And create btree database file.
#

$DB_BTREE->{flags} = R_DUP;
my %index;
unlink($pch_btree_file);
my $tied = tie %index, 'DB_File', $pch_btree_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;

if (!$tied)
{
    &fatal("cannot create $pch_btree_file: $!");
}

if (open(SC, "<$scored_pch_file"))
{
    while (<SC>)
    {
	chomp;
	my($p1, $p2, $sc) = split(/\t/);
	$index{$p1, $p2} = $sc;
	$index{$p1} = join($;, $p2, $sc);
    }
}
untie $tied;
#
# Coupling evidence. This one requires duplicate keys.
#

$DB_BTREE->{flags} = R_DUP;
my %index;
unlink($pch_ev_btree_file);
my $tied = tie %index, 'DB_File', $pch_ev_btree_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;

if (!$tied)
{
    &fatal("cannot create $pch_ev_btree_file: $!");
}

if (open(PCH, "<$proc_pch_file"))
{
    while (<PCH>)
    {
	chomp;
	my($p1, $p2, $p3, $p4, $iden3, $iden4, undef, undef, $rep) = split(/\t/);
	$index{$p1, $p2} = join($;, $p3, $p4, $iden3, $iden4, $rep);
    }
}
untie $tied;

$meta->add_log_entry($0, "finish PCH computation on $jobdir");
$meta->set_metadata("status.pchs", "complete");
$meta->set_metadata("pchs.running", "no");
exit(0);

sub fatal
{
    my($msg) = @_;

    if ($meta)
    {
	$meta->add_log_entry($0, ['fatal error', $msg]);
	$meta->set_metadata("status.pchs", "error");
	$meta->set_metadata("pchs.running", "no");
    }

    croak "$0: $msg";
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3