[Bio] / FortyEight / rp_compute_pchs.pl Repository:
ViewVC logotype

Annotation of /FortyEight/rp_compute_pchs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : olson 1.1
2 :     #
3 :     # Compute Pchs from expanded sims.
4 :     #
5 :    
6 :     use DB_File;
7 :    
8 :     use Data::Dumper;
9 :     use Carp;
10 :     use strict;
11 :     use FIG;
12 :     use FIG_Config;
13 :     use File::Basename;
14 :     use GenomeMeta;
15 :     use Sim;
16 :    
17 :     @ARGV == 1 or die "Usage: $0 job-dir\n";
18 :    
19 :     my $jobdir = shift;
20 :    
21 :     -d $jobdir or die "$0: job dir $jobdir does not exist\n";
22 :    
23 :     my $hostname = `hostname`;
24 :     chomp $hostname;
25 :    
26 :     my $genome = &FIG::file_head("$jobdir/GENOME_ID");
27 :     chomp $genome;
28 :     $genome =~ /^\d+\.\d+/ or die "$0: Cannnot find genome ID for jobdir $jobdir\n";
29 :    
30 :     my $meta = new GenomeMeta($genome, "$jobdir/meta.xml");
31 :    
32 : olson 1.5 $meta->set_metadata("status.pchs", "in_progress");
33 :    
34 : olson 1.2 my $genome_dir = "$jobdir/rp/$genome";
35 :    
36 : olson 1.1 my $sims_file = "$jobdir/rp/$genome/expanded_similarities";
37 : olson 1.2 my $raw_pch_file = "$jobdir/rp/$genome/pchs.raw";
38 :     my $proc_pch_file = "$jobdir/rp/$genome/pchs";
39 :     my $pch_btree_file = "$jobdir/rp/$genome/pchs.btree";
40 :     my $pch_ev_btree_file = "$jobdir/rp/$genome/pchs.evidence.btree";
41 :     my $scored_pch_file = "$jobdir/rp/$genome/pchs.scored";
42 :     my $compute_pch_err_file = "$jobdir/rp.errors/compute_pchs.stderr";
43 :     my $filter_pch_err_file = "$jobdir/rp.errors/remove_clustered_pchs.stderr";
44 :     my $score_pch_err_file = "$jobdir/rp.errors/score_pchs.stderr";
45 :    
46 :     my $cluster_cutoff = 70;
47 :    
48 :     my $genome_sim_cache_file = "$FIG_Config::fortyeight_data/genome_similarity.cache";
49 :     my $genome_sim_cache;
50 :     if (-f $genome_sim_cache_file)
51 :     {
52 :     $genome_sim_cache = "-cache $genome_sim_cache_file";
53 :     }
54 :     else
55 :     {
56 :     $meta->add_log_entry($0, "warning: missing genome sim cache $genome_sim_cache_file");
57 :     }
58 : olson 1.1
59 : olson 1.2 #
60 :     # Compute PCHs
61 :     #
62 : olson 1.3
63 : olson 1.1 $meta->add_log_entry($0, "start PCH processing on $hostname in $jobdir");
64 :    
65 : olson 1.3 my $cmd = "$FIG_Config::bin/compute_pchs_from_sims $sims_file $raw_pch_file 2>&1 >$compute_pch_err_file";
66 :     warn "Compute: $cmd\n";
67 :     my $rc = system($cmd);
68 : olson 1.1 if ($rc != 0)
69 :     {
70 :     &fatal("pchs computation failed with rc=$rc");
71 :     }
72 :    
73 : olson 1.2 #
74 :     # Remove clustered PCHs.
75 :     #
76 :    
77 :     my $cmd = "$FIG_Config::bin/remove_clustered_pchs3 -orgdir $genome_dir $genome_sim_cache $cluster_cutoff ";
78 :     $cmd .= " < $raw_pch_file > $proc_pch_file 2>$filter_pch_err_file";
79 :    
80 :     $meta->add_log_entry($0, "remove PCH clusters: $cmd");
81 :    
82 :     my $rc = system($cmd);
83 :     if ($rc != 0)
84 :     {
85 :     &fatal("remove_clustered_pchs3 computation failed with rc=$rc");
86 :     }
87 :     #
88 :     # compute simple scores
89 :     #
90 :    
91 :     my $cmd = "$FIG_Config::bin/compute_simple_scores 4 < $proc_pch_file > $scored_pch_file 2>$score_pch_err_file";
92 :    
93 :     $meta->add_log_entry($0, "score PCHs: $cmd");
94 :    
95 :     my $rc = system($cmd);
96 :     if ($rc != 0)
97 :     {
98 :     &fatal("compute_simple_scores computation failed with rc=$rc");
99 :     }
100 :    
101 :     #
102 :     # And create btree database file.
103 :     #
104 : olson 1.3
105 : olson 1.2 $DB_BTREE->{flags} = R_DUP;
106 :     my %index;
107 :     unlink($pch_btree_file);
108 :     my $tied = tie %index, 'DB_File', $pch_btree_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;
109 :    
110 :     if (!$tied)
111 :     {
112 :     &fatal("cannot create $pch_btree_file: $!");
113 :     }
114 :    
115 :     if (open(SC, "<$scored_pch_file"))
116 :     {
117 :     while (<SC>)
118 :     {
119 :     chomp;
120 :     my($p1, $p2, $sc) = split(/\t/);
121 :     $index{$p1, $p2} = $sc;
122 :     $index{$p1} = join($;, $p2, $sc);
123 :     }
124 :     }
125 :     untie $tied;
126 :     #
127 :     # Coupling evidence. This one requires duplicate keys.
128 :     #
129 :    
130 :     $DB_BTREE->{flags} = R_DUP;
131 :     my %index;
132 :     unlink($pch_ev_btree_file);
133 :     my $tied = tie %index, 'DB_File', $pch_ev_btree_file, O_RDWR | O_CREAT, 0666, $DB_BTREE;
134 :    
135 :     if (!$tied)
136 :     {
137 :     &fatal("cannot create $pch_ev_btree_file: $!");
138 :     }
139 :    
140 :     if (open(PCH, "<$proc_pch_file"))
141 :     {
142 :     while (<PCH>)
143 :     {
144 :     chomp;
145 :     my($p1, $p2, $p3, $p4, $iden3, $iden4, undef, undef, $rep) = split(/\t/);
146 :     $index{$p1, $p2} = join($;, $p3, $p4, $iden3, $iden4, $rep);
147 :     }
148 :     }
149 :     untie $tied;
150 :    
151 : olson 1.1 $meta->add_log_entry($0, "finish PCH computation on $jobdir");
152 :     $meta->set_metadata("status.pchs", "complete");
153 : olson 1.4 $meta->set_metadata("pchs.running", "no");
154 : olson 1.1 exit(0);
155 :    
156 :     sub fatal
157 :     {
158 :     my($msg) = @_;
159 :    
160 :     if ($meta)
161 :     {
162 :     $meta->add_log_entry($0, ['fatal error', $msg]);
163 :     $meta->set_metadata("status.pchs", "error");
164 : olson 1.4 $meta->set_metadata("pchs.running", "no");
165 : olson 1.1 }
166 :    
167 :     croak "$0: $msg";
168 :     }
169 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3