Parent Directory
|
Revision Log
Revision 1.5 - (view) (download) (as text)
1 : | hwang | 1.1 | #!/usr/bin/perl -w |
2 : | hwang | 1.2 | |
3 : | hwang | 1.1 | use strict; |
4 : | hwang | 1.2 | #use CGI qw(:standard); |
5 : | #use CGI::Carp qw(warningsToBrowser fatalsToBrowser); | ||
6 : | hwang | 1.1 | use Data::Dumper; |
7 : | use LWP; | ||
8 : | use HTTP::Request::Common; | ||
9 : | use FIG; | ||
10 : | use HTML; | ||
11 : | |||
12 : | my $fig = new FIG; | ||
13 : | my $user_agent = LWP::UserAgent->new; | ||
14 : | hwang | 1.5 | my $usage = "usage: to_prodom [peg\tpegID]"; |
15 : | hwang | 1.2 | my @arguments_in; |
16 : | |||
17 : | if (@ARGV > 0) | ||
18 : | { | ||
19 : | @arguments_in = @ARGV; | ||
20 : | } | ||
21 : | else | ||
22 : | { | ||
23 : | die $usage; | ||
24 : | } | ||
25 : | |||
26 : | my %arg_pairs = (); | ||
27 : | hwang | 1.1 | |
28 : | hwang | 1.2 | foreach (@arguments_in) |
29 : | { | ||
30 : | my ($name,$val) = split(/\t/,$_); | ||
31 : | $arg_pairs{$name} = $val; | ||
32 : | } | ||
33 : | |||
34 : | my @keys = keys %arg_pairs; | ||
35 : | hwang | 1.1 | |
36 : | hwang | 1.2 | my $peg = $arg_pairs{'peg'}; |
37 : | hwang | 1.1 | |
38 : | $peg =~ s/\%7C/\|/g; | ||
39 : | hwang | 1.2 | |
40 : | hwang | 1.1 | my $seq = $fig->get_translation($peg); |
41 : | my @aliases=$fig->feature_aliases($peg); | ||
42 : | |||
43 : | my @sp_ids = grep {/.*sp.*/} @aliases; | ||
44 : | my @tr_ids = grep {/.*tr.*/} @aliases; | ||
45 : | my @uni_ids = grep {/.*uni.*/} @aliases; | ||
46 : | |||
47 : | # Putting the ids in an array. | ||
48 : | # Allow the program to process each one till there is a ProDom webpage | ||
49 : | # Order of importance is sp, tr, uni | ||
50 : | |||
51 : | my @all_ids; | ||
52 : | &add_to_array (\@uni_ids); | ||
53 : | &add_to_array (\@tr_ids); | ||
54 : | &add_to_array (\@sp_ids); | ||
55 : | |||
56 : | hwang | 1.2 | |
57 : | hwang | 1.1 | foreach (@all_ids) { |
58 : | hwang | 1.2 | |
59 : | hwang | 1.1 | &to_prodom_by_id($_); |
60 : | } | ||
61 : | |||
62 : | |||
63 : | # If made it this far, use sequence to get the Prodom website | ||
64 : | hwang | 1.3 | &to_prodom_by_seq; |
65 : | hwang | 1.1 | |
66 : | ############### | ||
67 : | # Subroutines | ||
68 : | ############### | ||
69 : | |||
70 : | |||
71 : | sub add_to_array { | ||
72 : | my ($x) = @_; | ||
73 : | for (my $i=0; $i <@$x; $i++){ | ||
74 : | push @all_ids, $x->[$i]; | ||
75 : | } | ||
76 : | } | ||
77 : | |||
78 : | |||
79 : | sub to_prodom_by_id { | ||
80 : | |||
81 : | my $id = $_; | ||
82 : | |||
83 : | if ( $id ne "") { | ||
84 : | # Remove all the identifier before | | ||
85 : | |||
86 : | my $url_id = $id; | ||
87 : | $url_id =~ s/sp\|//g; | ||
88 : | $url_id =~ s/uni\|//g; | ||
89 : | $url_id =~ s/tr\|//g; | ||
90 : | |||
91 : | my $url = "http://protein.toulouse.inra.fr/prodom/current/cgi-bin/request.pl?question=SPTR&query=$url_id&bool_operator=OR"; | ||
92 : | |||
93 : | |||
94 : | # Pre-Check to see if there's no entry in ProDom. If there are no entry for the specified id, | ||
95 : | # then it will go to the next id; | ||
96 : | |||
97 : | my $response = $user_agent->get( $url ); | ||
98 : | die "Can't get $url -- ", $response->status_line | ||
99 : | unless $response->is_success; | ||
100 : | |||
101 : | if(! ($response->content =~ m/Sorry/i) ) { | ||
102 : | my $result = $response->content; | ||
103 : | # Replace relative paths with absolute paths | ||
104 : | hwang | 1.4 | $result =~ s/\.\./http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current/g; |
105 : | hwang | 1.5 | $result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current\/html\/home\.php/g; |
106 : | hwang | 1.2 | |
107 : | hwang | 1.4 | $result =~ s/<\!DOCTYPE HTML PUBLIC \"-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN\"//; |
108 : | $result =~ s/"http\:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd\">//; | ||
109 : | $result =~ s/<HTML>//; | ||
110 : | $result =~ s/<HEAD>//; | ||
111 : | $result =~ s/<\/HEAD>//; | ||
112 : | $result =~ s/<\/HTML>//; | ||
113 : | hwang | 1.2 | #Get rid of css reference because it breaks the FIG header |
114 : | $result =~ s/<link rel=.*?>//g; | ||
115 : | hwang | 1.1 | print $result; |
116 : | exit; | ||
117 : | } | ||
118 : | } | ||
119 : | } | ||
120 : | |||
121 : | sub to_prodom_by_seq { | ||
122 : | |||
123 : | my $url = "http://protein.toulouse.inra.fr/prodom/current/cgi-bin/ProDomBlast3.pl"; | ||
124 : | my $request = POST( $url, | ||
125 : | Content_Type => 'form-data', | ||
126 : | Content => [ 'matrice' => 'BLOSUM62', | ||
127 : | 'program' => 'ncbi-blastp', | ||
128 : | 'typebd' => 'multiple alignments', | ||
129 : | 'expect' => '.01', | ||
130 : | 'filtre' => 'seq', | ||
131 : | 'nom_seq' => '', | ||
132 : | 'sequence' => $seq, | ||
133 : | ] | ||
134 : | ); | ||
135 : | |||
136 : | |||
137 : | my $response = $user_agent->request($request); | ||
138 : | my $result = $response->content; | ||
139 : | hwang | 1.4 | |
140 : | $result =~ s/<\!DOCTYPE HTML PUBLIC \"-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN\"//; | ||
141 : | $result =~ s/"http\:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd\">//; | ||
142 : | $result =~ s/<HTML>//; | ||
143 : | $result =~ s/<HEAD>//; | ||
144 : | $result =~ s/<\/HEAD>//; | ||
145 : | $result =~ s/<\/HTML>//; | ||
146 : | |||
147 : | hwang | 1.1 | # Replace relative paths with absolute paths |
148 : | hwang | 1.4 | $result =~ s/\.\./http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current/g; |
149 : | hwang | 1.2 | $result =~ s/<link rel=.*?>//g; |
150 : | hwang | 1.5 | $result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current\/html\/home\.php/g; |
151 : | hwang | 1.1 | print $result; |
152 : | hwang | 1.2 | |
153 : | hwang | 1.1 | } |
154 : |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |