[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Thu Oct 5 00:28:58 2006 UTC revision 1.13, Wed Feb 13 14:54:27 2008 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2  #  ########################################################################
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
5  #  #
# Line 14  Line 14 
14  # at info@ci.uchicago.edu or the Fellowship for Interpretation of  # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  ########################################################################
18    
19    use strict;
20    use warnings;
21    
22  use FIG;  use FIG;
23  my $fig = new FIG;  my $fig = new FIG;
# Line 22  Line 25 
25  use NewGenome;  use NewGenome;
26  use ToCall;  use ToCall;
27    
28  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir";  $0 =~ m/([^\/]+)$/o;
29    my $self  = $1;
30    my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
31    
32  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
33      print STDERR "\n   $usage\n\n";      print STDERR "\n   $usage\n\n";
34      exit (0);      exit (0);
35  }  }
36    
37  my ($to_call_dir);  my ($to_call_dir, $found_file);
38    
39  (  (
40   ($to_call_dir = shift @ARGV)   ($to_call_dir = shift @ARGV)
41    &&
42     ($found_file  = shift @ARGV)
43  )  )
44      || die "\n   $usage\n\n";      || die "\n   $usage\n\n";
45    
46  $to_call_dir =~ s/\/$//o;  $to_call_dir =~ s/\/$//o;
47    
48  my $to_call;  my $to_call;
49  $to_call = new NewGenome($to_call_dir,"all");  $to_call = NewGenome->new($to_call_dir, "all");
50  $to_call->promote_remaining_orfs;  
51    my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;
52    
53    open(CALLED_BY, ">>$to_call_dir/called_by")
54        || die "could not open $to_call_dir/called_by";
55    
56    #...Build file of "nearby" PEGs
57    if (@neighbors) {
58        my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
59        system("/bin/rm -f $tmp_close*");
60    
61        foreach my $org (@neighbors) {
62            print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
63            system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
64        }
65        &FIG::run("formatdb -i$tmp_close -pT");
66    
67        my @promoted_fids;
68        my $tmp_sims = "$FIG_Config::temp/tmp_sims.$$";
69        open(TMP_SIMS, ">$tmp_sims")
70            || die "Could not write-open $tmp_sims";
71        foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
72            if (@neighbors) {
73                my $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
74    
75                if (@$sims) {
76                    my $num_sims = (scalar @$sims);
77                    print STDERR "Promoting $orf_id based on $num_sims sims\n" if $ENV{VERBOSE};
78    
79                    my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
80                    my $annot  = [ qq(RAST),
81                                   qq(Called by "$self" based on $num_sims sims.)
82                                   ];
83                    my $fid = $orf->promote_to_peg($sims, undef, $annot);
84                    if ($fid) {
85                        push @promoted_fids, $fid;
86                        @$sims = map { $_->[0] = $fid; $_ } @$sims;
87                        print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
88    
89                        print CALLED_BY "$fid\tpromote_remaining_orfs (with sims)\n";
90    
91                        print STDERR "Succeeded:\t$orf_id --> $fid\n";
92                    }
93                    else {
94                        print STDERR "Could not promote ORF $orf_id to a PEG\n";
95                    }
96                }
97                else {
98                    print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
99                }
100            }
101        }
102        close(TMP_SIMS) || die "Could not close $tmp_sims";
103    
104    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
105    # Disable the auto assignment of the fids we just called.
106    #=======================================================================
107        if (0)
108        {
109            my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
110            open(TMP_SEQS, ">$tmp_seqs")
111                || die "Could not write-open $tmp_seqs";
112            print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
113            close(TMP_SEQS) || die "Could not close $tmp_seqs";
114            my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
115    
116            open(FOUND, ">>$found_file")
117                || die "Could not append-open FoundFamiliesFile $found_file";
118            foreach my $entry (@auto_assigns) {
119                if ($entry =~ m/^(\S+)\t(.*)$/o) {
120                    if ($to_call->set_function($1, $2)) {
121                        print FOUND "$1\tCLOSE_SIMS\t$2\n";
122                    }
123                    else {
124                        die "Could not set function of $1 to $2";
125                    }
126                }
127                else {
128                    die "Could not parse auto-assignment: $entry";
129                }
130            }
131            close(FOUND) || die "Could not close $found_file";
132            unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
133        }
134    
135        unlink($tmp_sims)  || die "Could not remove $tmp_sims";
136        unlink($tmp_close) || die "Could not remove $tmp_close";
137    }
138    #-----------------------------------------------------------------------
139    
140    
141    #...Promote any remaining unpromoted ORFs...
142    $to_call->promote_remaining_orfs(\*CALLED_BY);
143    
144    close(CALLED_BY);
145    
146    #...NOTE: export_features also writes assigned_functions
147  $to_call->export_features;  $to_call->export_features;
148    
149    
150  if (! -s "$to_call_dir/Features/orf/tbl") {  if (! -s "$to_call_dir/Features/orf/tbl") {
151      &FIG::run("rm -fR $to_call_dir/Features/orf");      # Hrm, weird NFS effects here. There was a failure of this even though the directory
152        # was indeed empty.
153        if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
154            warn "First remove did not work; sleeping and trying again";
155            sleep(30);
156            system("/bin/rm -fRv $to_call_dir/Features/orf");
157        }
158  }  }
159  else {  else {
160      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
161  }  }
162    exit(0);
163    
164    
165    
166    sub blast_against {
167        my ($seq, $db) = @_;
168        my $seq_len = length($seq);
169    
170        my $tmp_seq = "$FIG_Config::temp/tmp_seq.$$";
171        open( TMP, ">$tmp_seq" ) || die "Could not write-open $tmp_seq";
172        print TMP  ">query\n$seq\n";
173        close(TMP) || die "Could not close $tmp_seq";
174    
175        my $sims = [];
176        @$sims = map { chomp $_;
177                       my @x = split(/\t/, $_);
178                       push @x, ($seq_len, $fig->translation_length($x[1]));
179                       bless( [@x], "Sim" )
180                       } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;
181    
182        unlink($tmp_seq) || die "Could not unlink $tmp_seq";
183        return $sims;
184    }

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3