[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Thu Jun 7 19:01:48 2007 UTC revision 1.15, Tue Nov 23 18:22:21 2010 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2  #  ########################################################################
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
5  #  #
# Line 14  Line 14 
14  # at info@ci.uchicago.edu or the Fellowship for Interpretation of  # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  ########################################################################
18    
19    use strict;
20    use warnings;
21    
22  use FIG;  use FIG;
23  my $fig = new FIG;  my $fig = FIG->new();
24    
25  use NewGenome;  use NewGenome;
26  use ToCall;  use ToCall;
27    
28    $0 =~ m/([^\/]+)$/o;
29    my $self  = $1;
30  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
31    
32  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
# Line 41  Line 46 
46  $to_call_dir =~ s/\/$//o;  $to_call_dir =~ s/\/$//o;
47    
48  my $to_call;  my $to_call;
49  $to_call = new NewGenome($to_call_dir,"all");  $to_call = NewGenome->new($to_call_dir, "all");
50    
51  my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;  
52    my @neighbors = ();
53    my $neighbors_file;
54    if ((-s ($neighbors_file = "$to_call_dir/closest.genomes")) ||
55        (-s ($neighbors_file = "$to_call_dir/neighbors"))
56        ) {
57        @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } &FIG::file_read($neighbors_file);
58    }
59    else {
60        warn qq(WARNING: $self could not read either nearest neighbors file\n);
61    }
62    
63    open(CALLED_BY, ">>$to_call_dir/called_by")
64        || die "could not open $to_call_dir/called_by";
65    
66  #...Build file of "nearby" PEGs  #...Build file of "nearby" PEGs
67    if (@neighbors == 0) {
68        warn qq($self: No nearby genomes found --- Skipping promotion based on sims\n);
69    }
70    else {
71  my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";  my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
72  system("/bin/rm -f $tmp_close*");  system("/bin/rm -f $tmp_close*");
73  foreach $org (@neighbors) {  
74        foreach my $org (@neighbors) {
75      print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};      print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
76      system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");      system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
77  }  }
78    
79        if (!-s $tmp_close) {
80            warn qq($self: Something is wrong --- Could not find any PEG sequences for nearby genomes);
81        }
82        else {
83  &FIG::run("formatdb -i$tmp_close -pT");  &FIG::run("formatdb -i$tmp_close -pT");
84    
85  my @promoted_fids;  my @promoted_fids;
# Line 59  Line 87 
87  open(TMP_SIMS, ">$tmp_sims")  open(TMP_SIMS, ">$tmp_sims")
88      || die "Could not write-open $tmp_sims";      || die "Could not write-open $tmp_sims";
89  foreach my $orf_id ($to_call->get_fids_for_type('orf')) {  foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
90      $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);              my $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
91    
92      if (@$sims) {      if (@$sims) {
93          print STDERR "Promoting $orf_id based on ", (scalar @$sims), " sims\n" if $ENV{VERBOSE};                  my $num_sims = (scalar @$sims);
94                    print STDERR "Promoting $orf_id based on $num_sims sims\n" if $ENV{VERBOSE};
95    
96          my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);          my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
97          my $fid = $orf->promote_to_peg($sims)                  my $annot  = [ qq(RAST),
98              || die "Could not promote ORF $orf_id to a PEG";                                 qq(Called by "$self" based on $num_sims sims.)
99                                   ];
100                    my $fid = $orf->promote_to_peg($sims, undef, $annot);
101                    if ($fid) {
102          push @promoted_fids, $fid;          push @promoted_fids, $fid;
103          @$sims = map { $_->[0] = $fid; $_ } @$sims;          @$sims = map { $_->[0] = $fid; $_ } @$sims;
104          print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;          print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
     }  
     else {  
         print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};  
     }  
 }  
 close(TMP_SIMS) || die "Could not close $tmp_sims";  
105    
106  #                      print CALLED_BY "$fid\tpromote_remaining_orfs (with sims)\n";
 # Disable the auto assignment of the fids we just called.  
 #  
 if (0)  
 {  
     my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";  
     open(TMP_SEQS, ">$tmp_seqs")  
         || die "Could not write-open $tmp_seqs";  
     print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;  
     close(TMP_SEQS) || die "Could not close $tmp_seqs";  
     my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;  
107    
108      open(FOUND, ">>$found_file")                      print STDERR "Succeeded:\t$orf_id --> $fid\n";
         || die "Could not write-open FoundFamiliesFile $found_file";  
     foreach my $entry (@auto_assigns) {  
         if ($entry =~ m/^(\S+)\t(.*)$/o) {  
             if ($to_call->set_function($1, $2)) {  
                 print FOUND "$1\tCLOSE_SIMS\t$2\n";  
109              }              }
110              else {              else {
111                  die "Could not set function of $1 to $2";                      print STDERR "Could not promote ORF $orf_id to a PEG\n";
112              }              }
113          }          }
114          else {          else {
115              die "Could not parse auto-assignment: $entry";                  print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
116                }
117          }          }
118    
119            if (not $ENV{DEBUG}) {
120                #...Clean up...
121                close(TMP_SIMS)    || die "Could not close $tmp_sims";
122                unlink($tmp_sims)  || die "Could not remove $tmp_sims";
123                unlink($tmp_close) || die "Could not remove $tmp_close";
124      }      }
     close(FOUND) || die "Could not close $found_file";  
125  }  }
126    
127  $to_call->promote_remaining_orfs;  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
128    # Disable the old auto assignment of the fids we just called.
129    #=======================================================================
130    #     if (0)
131    #     {
132    #       my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
133    #       open(TMP_SEQS, ">$tmp_seqs")
134    #           || die "Could not write-open $tmp_seqs";
135    #       print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
136    #       close(TMP_SEQS) || die "Could not close $tmp_seqs";
137    #       my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
138    #
139    #       open(FOUND, ">>$found_file")
140    #           || die "Could not append-open FoundFamiliesFile $found_file";
141    #       foreach my $entry (@auto_assigns) {
142    #           if ($entry =~ m/^(\S+)\t(.*)$/o) {
143    #               if ($to_call->set_function($1, $2)) {
144    #                   print FOUND "$1\tCLOSE_SIMS\t$2\n";
145    #               }
146    #               else {
147    #                   die "Could not set function of $1 to $2";
148    #               }
149    #           }
150    #           else {
151    #               die "Could not parse auto-assignment: $entry";
152    #           }
153    #       }
154    #       close(FOUND) || die "Could not close $found_file";
155    #       unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
156    #     }
157    #-----------------------------------------------------------------------
158    
159    }
160    
161    
162    
163    #...Promote any remaining unpromoted ORFs...
164    $to_call->promote_remaining_orfs(\*CALLED_BY);
165    
166    close(CALLED_BY);
167    
168    #...NOTE: export_features also writes assigned_functions
169  $to_call->export_features;  $to_call->export_features;
170    
171    
172  if (! -s "$to_call_dir/Features/orf/tbl") {  if (! -s "$to_call_dir/Features/orf/tbl") {
     #  
173      # Hrm, weird NFS effects here. There was a failure of this even though the directory      # Hrm, weird NFS effects here. There was a failure of this even though the directory
174      # was indeed empty.      # was indeed empty.
175      if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {      if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
# Line 123  Line 181 
181  else {  else {
182      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
183  }  }
184    exit(0);
 unlink($tmp_close) || die "Could not remove $tmp_close";  
 unlink($tmp_sims)  || die "Could not remove $tmp_sims";  
 unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";  
   
   
185    
186    
187    
# Line 143  Line 196 
196    
197      my $sims = [];      my $sims = [];
198      @$sims = map { chomp $_;      @$sims = map { chomp $_;
199                     @x = split(/\t/, $_);                     my @x = split(/\t/, $_);
200                     push @x, ($seq_len, $fig->translation_length($x[1]));                     push @x, ($seq_len, $fig->translation_length($x[1]));
201                     bless( [@x], "Sim" )                     bless( [@x], "Sim" )
202                     } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;                     } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.15

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3