[Bio] / FigKernelScripts / get_sims_for_start_predictions.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/get_sims_for_start_predictions.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : overbeek 1.3 # -*- perl -*-
2 : gdpusch 1.5 ########################################################################
3 : olson 1.4 # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 : gdpusch 1.5 ########################################################################
18 : overbeek 1.3
19 : overbeek 1.1 use FIG_Config;
20 :    
21 :     use FIG;
22 : gdpusch 1.5 use FIGV;
23 : overbeek 1.1 my $fig = new FIG;
24 :    
25 : gdpusch 1.5 $usage = "usage: get_sims_for_start_prediction [-help] [-orgdir=OrgDir] AA_for_ORFs < PEGs > similarities";
26 :    
27 :     $trouble = 0;
28 :     while (@ARGV)
29 :     {
30 :     if ($ARGV[0] =~ m/-h(elp)?/) {
31 :     die "\n\tusage: $usage\n\n";
32 :     }
33 :     elsif ($ARGV[0] =~ /-orgdir=(\S+)/) {
34 :     $fig = new FIGV($1);
35 :     }
36 :     elsif (-s $ARGV[0]) {
37 :     $aa_for_orfs = $ARGV[0];
38 :     open(ORFPROTSEQS, "<$aa_for_orfs") || die "Could not read-open $aa_for_orfs";
39 :     }
40 :     else {
41 :     warn "Invalid argument $ARGV[0]\n";
42 :     }
43 :     shift @ARGV;
44 :     }
45 :     die "\nusage: $usage\n\n" if $trouble;
46 : overbeek 1.1
47 :    
48 :     @pegs = <STDIN>;
49 :     chomp @pegs;
50 :     ## print "NPEGS=", scalar(@pegs), "\n";
51 :     for ($i=0; $i < @pegs; $i++) # strip off all but peg ids at front
52 :     {
53 :     if ($pegs[$i] =~ /(\S+)\t.*/)
54 :     {
55 :     $pegs[$i] = $1;
56 :     }
57 :     }
58 :    
59 : overbeek 1.2 my $pegprots = "$FIG_Config::temp/pegprots$$";
60 :     my %seqs;
61 : overbeek 1.1
62 :     $/ = "\n>";
63 : overbeek 1.2 open(PEGPROTSEQS, ">$pegprots");
64 : overbeek 1.1 while (defined($_ = <ORFPROTSEQS>))
65 :     {
66 :     chomp;
67 :     if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
68 :     {
69 :     $id = $1;
70 :     $seq = $2;
71 :     $seq =~ s/\s//g;
72 : overbeek 1.2 $seqs{$id} = $seq;
73 :     print PEGPROTSEQS ">$id\n$seq\n";
74 : overbeek 1.1 }
75 :     }
76 :     close(ORFPROTSEQS);
77 : overbeek 1.2 close(PEGPROTSEQS);
78 :    
79 : overbeek 1.1 $/ = "\n";
80 : overbeek 1.2 $fig->run("$FIG_Config::ext_bin/formatdb -i $pegprots");
81 : overbeek 1.1
82 : overbeek 1.2 for $id (@pegs)
83 : overbeek 1.1 {
84 : overbeek 1.2 if ($seq = $seqs{$id})
85 : overbeek 1.1 {
86 : overbeek 1.2 open(TMP,">$FIG_Config::temp/tmporf$$");
87 :     print TMP ">$id\n$seq\n";
88 :     close(TMP);
89 :     $fig->run("$FIG_Config::ext_bin/blastall -m 8 -e 1.0e-20 -i $FIG_Config::temp/tmporf$$ -FF -d $pegprots -p blastp");
90 : overbeek 1.1 }
91 :     }
92 : overbeek 1.2 system "rm $pegprots\*";
93 :     unlink("$FIG_Config::temp/tmporf$$");

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3