[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Sep 13 03:27:45 2006 UTC revision 1.14, Fri Jul 16 15:09:17 2010 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2  #  ########################################################################
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
5  #  #
# Line 14  Line 14 
14  # at info@ci.uchicago.edu or the Fellowship for Interpretation of  # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  ########################################################################
18    
19    use strict;
20    use warnings;
21    
22  use FIG;  use FIG;
23  my $fig = new FIG;  my $fig = FIG->new();
24    
25  use NewGenome;  use NewGenome;
26  use ToCall;  use ToCall;
27    
28  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir";  $0 =~ m/([^\/]+)$/o;
29    my $self  = $1;
30    my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
31    
32  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
33      print STDERR "\n   $usage\n\n";      print STDERR "\n   $usage\n\n";
34      exit (0);      exit (0);
35  }  }
36    
37  my ($to_call_dir);  my ($to_call_dir, $found_file);
38    
39  (  (
40   ($to_call_dir = shift @ARGV)   ($to_call_dir = shift @ARGV)
41    &&
42     ($found_file  = shift @ARGV)
43  )  )
44      || die "\n   $usage\n\n";      || die "\n   $usage\n\n";
45    
46    $to_call_dir =~ s/\/$//o;
47    
48  my $to_call;  my $to_call;
49  $to_call = new NewGenome($to_call_dir,"all");  $to_call = NewGenome->new($to_call_dir, "all");
50  $to_call->promote_remaining_orfs;  
51    
52    my @neighbors = ();
53    my $neighbors_file;
54    if ((-s ($neighbors_file = "$to_call_dir/closest.genomes")) ||
55        (-s ($neighbors_file = "$to_call_dir/neighbors"))
56        ) {
57        @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } &FIG::file_read($neighbors_file);
58    }
59    
60    
61    open(CALLED_BY, ">>$to_call_dir/called_by")
62        || die "could not open $to_call_dir/called_by";
63    
64    #...Build file of "nearby" PEGs
65    if (@neighbors) {
66        my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
67        system("/bin/rm -f $tmp_close*");
68    
69        foreach my $org (@neighbors) {
70            print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
71            system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
72        }
73        &FIG::run("formatdb -i$tmp_close -pT");
74    
75        my @promoted_fids;
76        my $tmp_sims = "$FIG_Config::temp/tmp_sims.$$";
77        open(TMP_SIMS, ">$tmp_sims")
78            || die "Could not write-open $tmp_sims";
79        foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
80            if (@neighbors) {
81                my $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
82    
83                if (@$sims) {
84                    my $num_sims = (scalar @$sims);
85                    print STDERR "Promoting $orf_id based on $num_sims sims\n" if $ENV{VERBOSE};
86    
87                    my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
88                    my $annot  = [ qq(RAST),
89                                   qq(Called by "$self" based on $num_sims sims.)
90                                   ];
91                    my $fid = $orf->promote_to_peg($sims, undef, $annot);
92                    if ($fid) {
93                        push @promoted_fids, $fid;
94                        @$sims = map { $_->[0] = $fid; $_ } @$sims;
95                        print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
96    
97                        print CALLED_BY "$fid\tpromote_remaining_orfs (with sims)\n";
98    
99                        print STDERR "Succeeded:\t$orf_id --> $fid\n";
100                    }
101                    else {
102                        print STDERR "Could not promote ORF $orf_id to a PEG\n";
103                    }
104                }
105                else {
106                    print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
107                }
108            }
109        }
110        close(TMP_SIMS) || die "Could not close $tmp_sims";
111    
112    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
113    # Disable the auto assignment of the fids we just called.
114    #=======================================================================
115        if (0)
116        {
117            my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
118            open(TMP_SEQS, ">$tmp_seqs")
119                || die "Could not write-open $tmp_seqs";
120            print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
121            close(TMP_SEQS) || die "Could not close $tmp_seqs";
122            my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
123    
124            open(FOUND, ">>$found_file")
125                || die "Could not append-open FoundFamiliesFile $found_file";
126            foreach my $entry (@auto_assigns) {
127                if ($entry =~ m/^(\S+)\t(.*)$/o) {
128                    if ($to_call->set_function($1, $2)) {
129                        print FOUND "$1\tCLOSE_SIMS\t$2\n";
130                    }
131                    else {
132                        die "Could not set function of $1 to $2";
133                    }
134                }
135                else {
136                    die "Could not parse auto-assignment: $entry";
137                }
138            }
139            close(FOUND) || die "Could not close $found_file";
140            unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
141        }
142    
143        unlink($tmp_sims)  || die "Could not remove $tmp_sims";
144        unlink($tmp_close) || die "Could not remove $tmp_close";
145    }
146    #-----------------------------------------------------------------------
147    
148    
149    #...Promote any remaining unpromoted ORFs...
150    $to_call->promote_remaining_orfs(\*CALLED_BY);
151    
152    close(CALLED_BY);
153    
154    #...NOTE: export_features also writes assigned_functions
155  $to_call->export_features;  $to_call->export_features;
156    
157    
158  if (!-s "$to_call_dir/Features/orf/tbl") {  if (!-s "$to_call_dir/Features/orf/tbl") {
159      &FIG::run("rm -fR $to_call_dir/Features/orf");      # Hrm, weird NFS effects here. There was a failure of this even though the directory
160        # was indeed empty.
161        if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
162            warn "First remove did not work; sleeping and trying again";
163            sleep(30);
164            system("/bin/rm -fRv $to_call_dir/Features/orf");
165        }
166  }  }
167  else {  else {
168      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
169  }  }
170    exit(0);
171    
172    
173    
174    sub blast_against {
175        my ($seq, $db) = @_;
176        my $seq_len = length($seq);
177    
178        my $tmp_seq = "$FIG_Config::temp/tmp_seq.$$";
179        open( TMP, ">$tmp_seq" ) || die "Could not write-open $tmp_seq";
180        print TMP  ">query\n$seq\n";
181        close(TMP) || die "Could not close $tmp_seq";
182    
183        my $sims = [];
184        @$sims = map { chomp $_;
185                       my @x = split(/\t/, $_);
186                       push @x, ($seq_len, $fig->translation_length($x[1]));
187                       bless( [@x], "Sim" )
188                       } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;
189    
190        unlink($tmp_seq) || die "Could not unlink $tmp_seq";
191        return $sims;
192    }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.14

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3