[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Mon Sep 4 22:24:32 2006 UTC revision 1.9, Fri Jun 8 18:11:21 2007 UTC
# Line 16  Line 16 
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  #
18    
19    use strict;
20  use FIG;  use FIG;
21  my $fig = new FIG;  my $fig = new FIG;
22    
23  use NewGenome;  use NewGenome;
24  use ToCall;  use ToCall;
25    
26  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir";  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
27    
28  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
29      print STDERR "\n   $usage\n\n";      print STDERR "\n   $usage\n\n";
30      exit (0);      exit (0);
31  }  }
32    
33  my ($to_call_dir);  my ($to_call_dir, $found_file);
34    
35  (  (
36   ($to_call_dir = shift @ARGV)   ($to_call_dir = shift @ARGV)
37    &&
38     ($found_file  = shift @ARGV)
39  )  )
40      || die $usage;      || die "\n   $usage\n\n";
41    
42    $to_call_dir =~ s/\/$//o;
43    
44  my $to_call;  my $to_call;
45  $to_call = new NewGenome($to_call_dir,"all");  $to_call = new NewGenome($to_call_dir,"all");
46    
47    my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;
48    
49    #...Build file of "nearby" PEGs
50    my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
51    system("/bin/rm -f $tmp_close*");
52    foreach my $org (@neighbors) {
53        print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
54        system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
55    }
56    &FIG::run("formatdb -i$tmp_close -pT");
57    
58    my @promoted_fids;
59    my $tmp_sims = "$FIG_Config::temp/tmp_sims.$$";
60    open(TMP_SIMS, ">$tmp_sims")
61        || die "Could not write-open $tmp_sims";
62    foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
63        my $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
64    
65        if (@$sims) {
66            print STDERR "Promoting $orf_id based on ", (scalar @$sims), " sims\n" if $ENV{VERBOSE};
67    
68            my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
69            my $fid = $orf->promote_to_peg($sims)
70                || die "Could not promote ORF $orf_id to a PEG";
71            push @promoted_fids, $fid;
72            @$sims = map { $_->[0] = $fid; $_ } @$sims;
73            print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
74        }
75        else {
76            print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
77        }
78    }
79    close(TMP_SIMS) || die "Could not close $tmp_sims";
80    
81    #
82    # Disable the auto assignment of the fids we just called.
83    #
84    if (0)
85    {
86        my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
87        open(TMP_SEQS, ">$tmp_seqs")
88            || die "Could not write-open $tmp_seqs";
89        print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
90        close(TMP_SEQS) || die "Could not close $tmp_seqs";
91        my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
92    
93        open(FOUND, ">>$found_file")
94            || die "Could not write-open FoundFamiliesFile $found_file";
95        foreach my $entry (@auto_assigns) {
96            if ($entry =~ m/^(\S+)\t(.*)$/o) {
97                if ($to_call->set_function($1, $2)) {
98                    print FOUND "$1\tCLOSE_SIMS\t$2\n";
99                }
100                else {
101                    die "Could not set function of $1 to $2";
102                }
103            }
104            else {
105                die "Could not parse auto-assignment: $entry";
106            }
107        }
108        close(FOUND) || die "Could not close $found_file";
109        unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
110    }
111    
112  $to_call->promote_remaining_orfs;  $to_call->promote_remaining_orfs;
113  $to_call->export_features;  $to_call->export_features;
114    
115    if (! -s "$to_call_dir/Features/orf/tbl") {
116        #
117        # Hrm, weird NFS effects here. There was a failure of this even though the directory
118        # was indeed empty.
119        if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
120            warn "First remove did not work; sleeping and trying again";
121            sleep(30);
122            system("/bin/rm -fRv $to_call_dir/Features/orf");
123        }
124    }
125    else {
126        die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
127    }
128    
129    unlink($tmp_close) || die "Could not remove $tmp_close";
130    unlink($tmp_sims)  || die "Could not remove $tmp_sims";
131    
132    
133    
134    sub blast_against {
135        my ($seq, $db) = @_;
136        my $seq_len = length($seq);
137    
138        my $tmp_seq = "$FIG_Config::temp/tmp_seq.$$";
139        open( TMP, ">$tmp_seq" ) || die "Could not write-open $tmp_seq";
140        print TMP  ">query\n$seq\n";
141        close(TMP) || die "Could not close $tmp_seq";
142    
143        my $sims = [];
144        @$sims = map { chomp $_;
145                       my @x = split(/\t/, $_);
146                       push @x, ($seq_len, $fig->translation_length($x[1]));
147                       bless( [@x], "Sim" )
148                       } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;
149    
150        unlink($tmp_seq) || die "Could not unlink $tmp_seq";
151        return $sims;
152    }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3