[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (view) (download) (as text)

1 : overbeek 1.1 # -*- perl -*-
2 : gdpusch 1.11 ########################################################################
3 : overbeek 1.1 # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 : gdpusch 1.11 ########################################################################
18 : overbeek 1.1
19 : olson 1.9 use strict;
20 : overbeek 1.1 use FIG;
21 :     my $fig = new FIG;
22 :    
23 :     use NewGenome;
24 :     use ToCall;
25 :    
26 : overbeek 1.5 my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
27 : overbeek 1.1
28 :     if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
29 :     print STDERR "\n $usage\n\n";
30 :     exit (0);
31 :     }
32 :    
33 : overbeek 1.5 my ($to_call_dir, $found_file);
34 : overbeek 1.1
35 :     (
36 :     ($to_call_dir = shift @ARGV)
37 : overbeek 1.5 &&
38 :     ($found_file = shift @ARGV)
39 : overbeek 1.1 )
40 : overbeek 1.2 || die "\n $usage\n\n";
41 : overbeek 1.1
42 : overbeek 1.3 $to_call_dir =~ s/\/$//o;
43 :    
44 : overbeek 1.1 my $to_call;
45 :     $to_call = new NewGenome($to_call_dir,"all");
46 : overbeek 1.5
47 :     my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;
48 :    
49 :     #...Build file of "nearby" PEGs
50 : gdpusch 1.12 if (@neighbors) {
51 :     my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
52 :     system("/bin/rm -f $tmp_close*");
53 : overbeek 1.5
54 : gdpusch 1.12 foreach my $org (@neighbors) {
55 :     print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
56 :     system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
57 : overbeek 1.5 }
58 : gdpusch 1.12 &FIG::run("formatdb -i$tmp_close -pT");
59 :    
60 :     my @promoted_fids;
61 :     my $tmp_sims = "$FIG_Config::temp/tmp_sims.$$";
62 :     open(TMP_SIMS, ">$tmp_sims")
63 :     || die "Could not write-open $tmp_sims";
64 :     foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
65 :     if (@neighbors) {
66 :     my $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
67 :    
68 :     if (@$sims) {
69 :     print STDERR "Promoting $orf_id based on ", (scalar @$sims), " sims\n" if $ENV{VERBOSE};
70 :    
71 :     my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
72 :     my $fid = $orf->promote_to_peg($sims)
73 :     || die "Could not promote ORF $orf_id to a PEG";
74 :     push @promoted_fids, $fid;
75 :     @$sims = map { $_->[0] = $fid; $_ } @$sims;
76 :     print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
77 :     }
78 :     else {
79 :     print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
80 :     }
81 :     }
82 : overbeek 1.5 }
83 : gdpusch 1.12 close(TMP_SIMS) || die "Could not close $tmp_sims";
84 : overbeek 1.5
85 : gdpusch 1.12 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
86 : olson 1.8 # Disable the auto assignment of the fids we just called.
87 : gdpusch 1.12 #=======================================================================
88 :     if (0)
89 :     {
90 :     my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
91 :     open(TMP_SEQS, ">$tmp_seqs")
92 :     || die "Could not write-open $tmp_seqs";
93 :     print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
94 :     close(TMP_SEQS) || die "Could not close $tmp_seqs";
95 :     my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
96 :    
97 :     open(FOUND, ">>$found_file")
98 :     || die "Could not write-open FoundFamiliesFile $found_file";
99 :     foreach my $entry (@auto_assigns) {
100 :     if ($entry =~ m/^(\S+)\t(.*)$/o) {
101 :     if ($to_call->set_function($1, $2)) {
102 :     print FOUND "$1\tCLOSE_SIMS\t$2\n";
103 :     }
104 :     else {
105 :     die "Could not set function of $1 to $2";
106 :     }
107 : olson 1.8 }
108 :     else {
109 : gdpusch 1.12 die "Could not parse auto-assignment: $entry";
110 : olson 1.8 }
111 : overbeek 1.5 }
112 : gdpusch 1.12 close(FOUND) || die "Could not close $found_file";
113 :     unlink($tmp_seqs) || die "Could not remove $tmp_seqs";
114 : overbeek 1.5 }
115 : gdpusch 1.12
116 :     unlink($tmp_sims) || die "Could not remove $tmp_sims";
117 :     unlink($tmp_close) || die "Could not remove $tmp_close";
118 : overbeek 1.5 }
119 : gdpusch 1.12 #-----------------------------------------------------------------------
120 :    
121 : overbeek 1.5
122 : gdpusch 1.12 #...Promote any remaining unpromoted ORFs...
123 : overbeek 1.1 $to_call->promote_remaining_orfs;
124 : gdpusch 1.11
125 : gdpusch 1.12
126 :     #...NOTE: export_features also writes assigned_functions
127 : overbeek 1.1 $to_call->export_features;
128 : overbeek 1.2
129 : olson 1.10
130 : overbeek 1.4 if (! -s "$to_call_dir/Features/orf/tbl") {
131 : olson 1.6 # Hrm, weird NFS effects here. There was a failure of this even though the directory
132 :     # was indeed empty.
133 : gdpusch 1.7 if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
134 : olson 1.6 warn "First remove did not work; sleeping and trying again";
135 : gdpusch 1.7 sleep(30);
136 :     system("/bin/rm -fRv $to_call_dir/Features/orf");
137 : olson 1.6 }
138 : overbeek 1.2 }
139 :     else {
140 :     die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
141 :     }
142 : gdpusch 1.12 exit(0);
143 : overbeek 1.5
144 :    
145 :    
146 :     sub blast_against {
147 :     my ($seq, $db) = @_;
148 :     my $seq_len = length($seq);
149 :    
150 :     my $tmp_seq = "$FIG_Config::temp/tmp_seq.$$";
151 :     open( TMP, ">$tmp_seq" ) || die "Could not write-open $tmp_seq";
152 :     print TMP ">query\n$seq\n";
153 :     close(TMP) || die "Could not close $tmp_seq";
154 :    
155 :     my $sims = [];
156 :     @$sims = map { chomp $_;
157 : olson 1.9 my @x = split(/\t/, $_);
158 : overbeek 1.5 push @x, ($seq_len, $fig->translation_length($x[1]));
159 :     bless( [@x], "Sim" )
160 :     } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;
161 :    
162 :     unlink($tmp_seq) || die "Could not unlink $tmp_seq";
163 :     return $sims;
164 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3