[Bio] / FigKernelScripts / tmpred_predictions.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/tmpred_predictions.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : overbeek 1.1 ########################################################################
2 :     use strict;
3 :     use Data::Dumper;
4 :     use Carp;
5 :    
6 :     #
7 :     # This is a SAS Component
8 :     #
9 :    
10 :    
11 :     =head1 tmpred_predictions
12 :    
13 :     Estimate transmembrane domains
14 :    
15 :     ------
16 :    
17 :     Example:
18 :    
19 :     tmpred_predictions < fasta-file
20 :    
21 :     would produce a 5-column table. The first column would contain
22 :     the IDs from the fasta input file, and the remainder are
23 :     use to capture the core domains and scores produced by TMpred. There
24 :     would be a separate line for each predicted TM domain.
25 :     ------
26 :    
27 :     The standard input should be a FASTA formatted file of protein sequences.
28 :    
29 :     =head2 Command-Line Options
30 :    
31 :     =over 4
32 :    
33 :     =item -min MinSc [defaults to 0]
34 :    
35 :     =head2 Output Format
36 :    
37 :     The standard output is a tab-delimited file. It consists of the input
38 :     TD with five extra columns added:
39 :    
40 :     the beginning of the core domain
41 :     the end of the core domain
42 :     the score
43 :     the predicted center
44 :     the direction (outside-to-inside or inside-to-outside)
45 :    
46 :     =cut
47 :    
48 :     use SeedEnv;
49 :     use Getopt::Long;
50 :     use ScriptThing;
51 :     use LWP;
52 :     use URI;
53 :    
54 :     my $usage = "usage: svr_tmpred_predictions [-min = MinSc] ";
55 :    
56 :     my $cutoff = 0;
57 :     my $rc = GetOptions('min=i' => \$cutoff);
58 :     if (! $rc) { print STDERR $usage; exit }
59 :    
60 :     $/ = "\n>";
61 :     while (defined($_ = <STDIN>))
62 :     {
63 :     chomp;
64 : overbeek 1.2 $/ = "\n";
65 : overbeek 1.1 if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
66 :     {
67 :     my $id = $1;
68 :     my $seq = $2;
69 :     $seq =~ s/\s//gs;
70 :     my $browser = LWP::UserAgent->new;
71 :     my $url = "http://www.ch.embnet.org/cgi-bin/TMPRED_form_parser";
72 :     my $response = $browser->post($url,
73 :     [
74 :     'outmode' => "html",
75 :     'min' => "17",
76 :     'max' => "33",
77 :     'comm' => "FIGSEQ",
78 :     'format' => "plain_text",
79 :     'seq' => $seq
80 :     ]
81 :     );
82 :     my @good_hits;
83 :     my $x = $response->content;
84 :     if ($x =~ /Inside to outside[^\n]+\n[ \t]+from[ \t]+to[ \t]+score[ \t]+center\s+(.*?)\n\n/sg)
85 :     {
86 :     push(@good_hits,map { (($_ =~ /^\s*\d+\s*\(\s*(\d+)\)\s+\d+\s+\(\s*(\d+)\)\s+(\d+)\s+(\d+)/) && ($3 >= $cutoff)) ? [$1,$2,$3,$4,'inside-to-outside'] : () }
87 :     split(/\n/,$1));
88 :     }
89 :    
90 :     if ($x =~ /Outside to inside[^\n]+\n[ \t]+from[ \t]+to[ \t]+score[ \t]+center\s+(.*?)\n\n/sg)
91 :    
92 :     {
93 :     push(@good_hits, map { (($_ =~ /^\s*\d+\s*\(\s*(\d+)\)\s+\d+\s+\(\s*(\d+)\)\s+(\d+)\s+(\d+)/) && ($3 >= 400)) ? [$1,$2,$3,$4,'outside-to-inside'] : () }
94 :     split(/\n/,$1));
95 :     }
96 :     @good_hits = sort { ($a->[0] <=> $b->[0]) } @good_hits;
97 :     foreach my $tm (@good_hits)
98 :     {
99 :     print join("\t",($id,@$tm)),"\n";
100 :     }
101 :     }
102 : overbeek 1.2 $/ = "\n>";
103 : overbeek 1.1 }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3