[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Thu Oct 5 00:28:58 2006 UTC revision 1.8, Thu Jun 7 19:01:48 2007 UTC
# Line 22  Line 22 
22  use NewGenome;  use NewGenome;
23  use ToCall;  use ToCall;
24    
25  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir";  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
26    
27  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
28      print STDERR "\n   $usage\n\n";      print STDERR "\n   $usage\n\n";
29      exit (0);      exit (0);
30  }  }
31    
32  my ($to_call_dir);  my ($to_call_dir, $found_file);
33    
34  (  (
35   ($to_call_dir = shift @ARGV)   ($to_call_dir = shift @ARGV)
36    &&
37     ($found_file  = shift @ARGV)
38  )  )
39      || die "\n   $usage\n\n";      || die "\n   $usage\n\n";
40    
# Line 40  Line 42 
42    
43  my $to_call;  my $to_call;
44  $to_call = new NewGenome($to_call_dir,"all");  $to_call = new NewGenome($to_call_dir,"all");
45    
46    my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;
47    
48    #...Build file of "nearby" PEGs
49    my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
50    system("/bin/rm -f $tmp_close*");
51    foreach $org (@neighbors) {
52        print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
53        system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
54    }
55    &FIG::run("formatdb -i$tmp_close -pT");
56    
57    my @promoted_fids;
58    my $tmp_sims = "$FIG_Config::temp/tmp_sims.$$";
59    open(TMP_SIMS, ">$tmp_sims")
60        || die "Could not write-open $tmp_sims";
61    foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
62        $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
63    
64        if (@$sims) {
65            print STDERR "Promoting $orf_id based on ", (scalar @$sims), " sims\n" if $ENV{VERBOSE};
66    
67            my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
68            my $fid = $orf->promote_to_peg($sims)
69                || die "Could not promote ORF $orf_id to a PEG";
70            push @promoted_fids, $fid;
71            @$sims = map { $_->[0] = $fid; $_ } @$sims;
72            print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
73        }
74        else {
75            print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
76        }
77    }
78    close(TMP_SIMS) || die "Could not close $tmp_sims";
79    
80    #
81    # Disable the auto assignment of the fids we just called.
82    #
83    if (0)
84    {
85        my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
86        open(TMP_SEQS, ">$tmp_seqs")
87            || die "Could not write-open $tmp_seqs";
88        print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
89        close(TMP_SEQS) || die "Could not close $tmp_seqs";
90        my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
91    
92        open(FOUND, ">>$found_file")
93            || die "Could not write-open FoundFamiliesFile $found_file";
94        foreach my $entry (@auto_assigns) {
95            if ($entry =~ m/^(\S+)\t(.*)$/o) {
96                if ($to_call->set_function($1, $2)) {
97                    print FOUND "$1\tCLOSE_SIMS\t$2\n";
98                }
99                else {
100                    die "Could not set function of $1 to $2";
101                }
102            }
103            else {
104                die "Could not parse auto-assignment: $entry";
105            }
106        }
107        close(FOUND) || die "Could not close $found_file";
108    }
109    
110  $to_call->promote_remaining_orfs;  $to_call->promote_remaining_orfs;
111  $to_call->export_features;  $to_call->export_features;
112    
113  if (! -s "$to_call_dir/Features/orf/tbl") {  if (! -s "$to_call_dir/Features/orf/tbl") {
114      &FIG::run("rm -fR $to_call_dir/Features/orf");      #
115        # Hrm, weird NFS effects here. There was a failure of this even though the directory
116        # was indeed empty.
117        if (system("/bin/rm -fRv $to_call_dir/Features/orf")) {
118            warn "First remove did not work; sleeping and trying again";
119            sleep(30);
120            system("/bin/rm -fRv $to_call_dir/Features/orf");
121        }
122  }  }
123  else {  else {
124      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
125  }  }
126    
127    unlink($tmp_close) || die "Could not remove $tmp_close";
128    unlink($tmp_sims)  || die "Could not remove $tmp_sims";
129    unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
130    
131    
132    
133    
134    
135    sub blast_against {
136        my ($seq, $db) = @_;
137        my $seq_len = length($seq);
138    
139        my $tmp_seq = "$FIG_Config::temp/tmp_seq.$$";
140        open( TMP, ">$tmp_seq" ) || die "Could not write-open $tmp_seq";
141        print TMP  ">query\n$seq\n";
142        close(TMP) || die "Could not close $tmp_seq";
143    
144        my $sims = [];
145        @$sims = map { chomp $_;
146                       @x = split(/\t/, $_);
147                       push @x, ($seq_len, $fig->translation_length($x[1]));
148                       bless( [@x], "Sim" )
149                       } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;
150    
151        unlink($tmp_seq) || die "Could not unlink $tmp_seq";
152        return $sims;
153    }

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3