[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Tue Nov 14 17:35:44 2006 UTC revision 1.14, Fri Jul 16 15:09:17 2010 UTC
# Line 1  Line 1 
1  # -*- perl -*-  # -*- perl -*-
2  #  ########################################################################
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
5  #  #
# Line 14  Line 14 
14  # at info@ci.uchicago.edu or the Fellowship for Interpretation of  # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  ########################################################################
18    
19    use strict;
20    use warnings;
21    
22  use FIG;  use FIG;
23  my $fig = new FIG;  my $fig = FIG->new();
24    
25  use NewGenome;  use NewGenome;
26  use ToCall;  use ToCall;
27    
28    $0 =~ m/([^\/]+)$/o;
29    my $self  = $1;
30  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
31    
32  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
# Line 41  Line 46 
46  $to_call_dir =~ s/\/$//o;  $to_call_dir =~ s/\/$//o;
47    
48  my $to_call;  my $to_call;
49  $to_call = new NewGenome($to_call_dir,"all");  $to_call = NewGenome->new($to_call_dir, "all");
50    
51  my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;  
52    my @neighbors = ();
53    my $neighbors_file;
54    if ((-s ($neighbors_file = "$to_call_dir/closest.genomes")) ||
55        (-s ($neighbors_file = "$to_call_dir/neighbors"))
56        ) {
57        @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } &FIG::file_read($neighbors_file);
58    }
59    
60    
61    open(CALLED_BY, ">>$to_call_dir/called_by")
62        || die "could not open $to_call_dir/called_by";
63    
64  #...Build file of "nearby" PEGs  #...Build file of "nearby" PEGs
65    if (@neighbors) {
66  my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";  my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
67  system("rm -f $tmp_close*");      system("/bin/rm -f $tmp_close*");
68  foreach $org (@neighbors) {  
69        foreach my $org (@neighbors) {
70      print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};      print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
71      system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");      system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
72  }  }
# Line 59  Line 77 
77  open(TMP_SIMS, ">$tmp_sims")  open(TMP_SIMS, ">$tmp_sims")
78      || die "Could not write-open $tmp_sims";      || die "Could not write-open $tmp_sims";
79  foreach my $orf_id ($to_call->get_fids_for_type('orf')) {  foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
80      $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);          if (@neighbors) {
81                my $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
82    
83      if (@$sims) {      if (@$sims) {
84          print STDERR "Promoting $orf_id based on ", (scalar @$sims), " sims\n" if $ENV{VERBOSE};                  my $num_sims = (scalar @$sims);
85                    print STDERR "Promoting $orf_id based on $num_sims sims\n" if $ENV{VERBOSE};
86    
87          my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);          my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
88          my $fid = $orf->promote_to_peg($sims)                  my $annot  = [ qq(RAST),
89              || die "Could not promote ORF $orf_id to a PEG";                                 qq(Called by "$self" based on $num_sims sims.)
90                                   ];
91                    my $fid = $orf->promote_to_peg($sims, undef, $annot);
92                    if ($fid) {
93          push @promoted_fids, $fid;          push @promoted_fids, $fid;
94          @$sims = map { $_->[0] = $fid; $_ } @$sims;          @$sims = map { $_->[0] = $fid; $_ } @$sims;
95          print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;          print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
96    
97                        print CALLED_BY "$fid\tpromote_remaining_orfs (with sims)\n";
98    
99                        print STDERR "Succeeded:\t$orf_id --> $fid\n";
100                    }
101                    else {
102                        print STDERR "Could not promote ORF $orf_id to a PEG\n";
103                    }
104      }      }
105      else {      else {
106          print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};          print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
107      }      }
108  }  }
109        }
110  close(TMP_SIMS) || die "Could not close $tmp_sims";  close(TMP_SIMS) || die "Could not close $tmp_sims";
111    
112    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
113    # Disable the auto assignment of the fids we just called.
114    #=======================================================================
115        if (0)
116        {
117  my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";  my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
118  open(TMP_SEQS, ">$tmp_seqs")  open(TMP_SEQS, ">$tmp_seqs")
119      || die "Could not write-open $tmp_seqs";      || die "Could not write-open $tmp_seqs";
# Line 85  Line 122 
122  my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;  my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
123    
124  open(FOUND, ">>$found_file")  open(FOUND, ">>$found_file")
125      || die "Could not write-open FoundFamiliesFile $found_file";              || die "Could not append-open FoundFamiliesFile $found_file";
126  foreach my $entry (@auto_assigns) {  foreach my $entry (@auto_assigns) {
127      if ($entry =~ m/^(\S+)\t(.*)$/o) {      if ($entry =~ m/^(\S+)\t(.*)$/o) {
128          if ($to_call->set_function($1, $2)) {          if ($to_call->set_function($1, $2)) {
# Line 100  Line 137 
137      }      }
138  }  }
139  close(FOUND) || die "Could not close $found_file";  close(FOUND) || die "Could not close $found_file";
140            unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
141        }
142    
143  $to_call->promote_remaining_orfs;      unlink($tmp_sims)  || die "Could not remove $tmp_sims";
144        unlink($tmp_close) || die "Could not remove $tmp_close";
145    }
146    #-----------------------------------------------------------------------
147    
148    
149    #...Promote any remaining unpromoted ORFs...
150    $to_call->promote_remaining_orfs(\*CALLED_BY);
151    
152    close(CALLED_BY);
153    
154    #...NOTE: export_features also writes assigned_functions
155  $to_call->export_features;  $to_call->export_features;
156    
157    
158  if (! -s "$to_call_dir/Features/orf/tbl") {  if (! -s "$to_call_dir/Features/orf/tbl") {
     #  
159      # Hrm, weird NFS effects here. There was a failure of this even though the directory      # Hrm, weird NFS effects here. There was a failure of this even though the directory
160      # was indeed empty.      # was indeed empty.
161      system("rm -fR $to_call_dir/Features/orf");      if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
     if (-d "$to_call_dir/Features/orf")  
     {  
162          warn "First remove did not work; sleeping and trying again";          warn "First remove did not work; sleeping and trying again";
163          sleep(2);          sleep(30);
164          system("rm -fR $to_call_dir/Features/orf");          system("/bin/rm -fRv $to_call_dir/Features/orf");
165      }      }
166  }  }
167  else {  else {
168      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
169  }  }
170    exit(0);
 unlink($tmp_close) || die "Could not remove $tmp_close";  
 unlink($tmp_sims)  || die "Could not remove $tmp_sims";  
 unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";  
   
   
171    
172    
173    
# Line 139  Line 182 
182    
183      my $sims = [];      my $sims = [];
184      @$sims = map { chomp $_;      @$sims = map { chomp $_;
185                     @x = split(/\t/, $_);                     my @x = split(/\t/, $_);
186                     push @x, ($seq_len, $fig->translation_length($x[1]));                     push @x, ($seq_len, $fig->translation_length($x[1]));
187                     bless( [@x], "Sim" )                     bless( [@x], "Sim" )
188                     } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;                     } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.14

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3