[Bio] / FigKernelScripts / promote_orfs_to_pegs.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/promote_orfs_to_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Thu Oct 5 00:28:58 2006 UTC revision 1.5, Sat Oct 28 14:27:05 2006 UTC
# Line 22  Line 22 
22  use NewGenome;  use NewGenome;
23  use ToCall;  use ToCall;
24    
25  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir";  my $usage = "usage: promote_orfs_to_pegs To_Call_Dir FoundFamiliesFile";
26    
27  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {  if (@ARGV && ($ARGV[0] =~ m/-h(elp)?/)) {
28      print STDERR "\n   $usage\n\n";      print STDERR "\n   $usage\n\n";
29      exit (0);      exit (0);
30  }  }
31    
32  my ($to_call_dir);  my ($to_call_dir, $found_file);
33    
34  (  (
35   ($to_call_dir = shift @ARGV)   ($to_call_dir = shift @ARGV)
36    &&
37     ($found_file  = shift @ARGV)
38  )  )
39      || die "\n   $usage\n\n";      || die "\n   $usage\n\n";
40    
# Line 40  Line 42 
42    
43  my $to_call;  my $to_call;
44  $to_call = new NewGenome($to_call_dir,"all");  $to_call = new NewGenome($to_call_dir,"all");
45    
46    my @neighbors = map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $to_call_dir/neighbors`;
47    
48    #...Build file of "nearby" PEGs
49    my $tmp_close = "$FIG_Config::temp/tmp_close_$$.fasta";
50    system("rm -f $tmp_close*");
51    foreach $org (@neighbors) {
52        print STDERR "Appending $org to $tmp_close\n" if $ENV{VERBOSE};
53        system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close");
54    }
55    &FIG::run("formatdb -i$tmp_close -pT");
56    
57    my @promoted_fids;
58    my $tmp_sims = "$FIG_Config::temp/tmp_sims.$$";
59    open(TMP_SIMS, ">$tmp_sims")
60        || die "Could not write-open $tmp_sims";
61    foreach my $orf_id ($to_call->get_fids_for_type('orf')) {
62        $sims = &blast_against($to_call->get_feature_sequence($orf_id), $tmp_close);
63    
64        if (@$sims) {
65            print STDERR "Promoting $orf_id based on ", (scalar @$sims), " sims\n" if $ENV{VERBOSE};
66    
67            my $orf = &NewGenome::ORF::new('NewGenome::ORF', $to_call, $orf_id);
68            my $fid = $orf->promote_to_peg($sims)
69                || die "Could not promote ORF $orf_id to a PEG";
70            push @promoted_fids, $fid;
71            @$sims = map { $_->[0] = $fid; $_ } @$sims;
72            print TMP_SIMS map { join("\t", @$_) . qq(\n) } @$sims;
73        }
74        else {
75            print STDERR "No sims for $orf_id\n" if $ENV{VERBOSE};
76        }
77    }
78    close(TMP_SIMS) || die "Could not close $tmp_sims";
79    
80    my $tmp_seqs = "$FIG_Config::temp/tmp_seqs.$$";
81    open(TMP_SEQS, ">$tmp_seqs")
82        || die "Could not write-open $tmp_seqs";
83    print TMP_SEQS map { join("\t", ($_, $to_call->get_feature_sequence($_))) . qq(\n) } @promoted_fids;
84    close(TMP_SEQS) || die "Could not close $tmp_seqs";
85    my @auto_assigns = `cat $tmp_seqs | auto_assign sims=$tmp_sims`;
86    
87    open(FOUND, ">>$found_file")
88        || die "Could not write-open FoundFamiliesFile $found_file";
89    foreach my $entry (@auto_assigns) {
90        if ($entry =~ m/^(\S+)\t(.*)$/o) {
91            if ($to_call->set_function($1, $2)) {
92                print FOUND "$1\tCLOSE_SIMS\t$2\n";
93            }
94            else {
95                die "Could not set function of $1 to $2";
96            }
97        }
98        else {
99            die "Could not parse auto-assignment: $entry";
100        }
101    }
102    close(FOUND) || die "Could not close $found_file";
103    
104  $to_call->promote_remaining_orfs;  $to_call->promote_remaining_orfs;
105  $to_call->export_features;  $to_call->export_features;
106    
# Line 49  Line 110 
110  else {  else {
111      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";      die "Something is wrong --- $to_call_dir/Features/orf/tbl is non-empty";
112  }  }
113    
114    unlink($tmp_close) || die "Could not remove $tmp_close";
115    unlink($tmp_sims)  || die "Could not remove $tmp_sims";
116    unlink($tmp_seqs)  || die "Could not remove $tmp_seqs";
117    
118    
119    
120    
121    
122    sub blast_against {
123        my ($seq, $db) = @_;
124        my $seq_len = length($seq);
125    
126        my $tmp_seq = "$FIG_Config::temp/tmp_seq.$$";
127        open( TMP, ">$tmp_seq" ) || die "Could not write-open $tmp_seq";
128        print TMP  ">query\n$seq\n";
129        close(TMP) || die "Could not close $tmp_seq";
130    
131        my $sims = [];
132        @$sims = map { chomp $_;
133                       @x = split(/\t/, $_);
134                       push @x, ($seq_len, $fig->translation_length($x[1]));
135                       bless( [@x], "Sim" )
136                       } `blastall -i $tmp_seq -d $db -p blastp -m8 -e1.0e-10`;
137    
138        unlink($tmp_seq) || die "Could not unlink $tmp_seq";
139        return $sims;
140    }

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3