[Bio] / FigWebServices / figfam_server_1.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/figfam_server_1.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download)

1 : disz 1.1 use strict;
2 :     use FIG;
3 :     use CGI;
4 :     use Data::Dumper;
5 :     use FFs;
6 :     use FF;
7 :     use Kmers;
8 :     use FIG_Config;
9 :    
10 :    
11 :     my $fig = new FIG;
12 :     my $ffs = new FFs("$FIG_Config::FigfamsData");
13 :    
14 :     my $kmerDB = "/home/overbeek/Proj/MotifFF/Rel9/10mers/kmer.db";
15 :     my $friDB = "/home/overbeek/Proj/MotifFF/Rel9/10mers/FRI.db";
16 :     my $binDB = "/scratch/olson/table.rel9+sims.Apr25";
17 :     my $kmers = Kmers->new_using_C($binDB, $friDB);
18 :     my $kmers_ross = new Kmers($kmerDB, $friDB);
19 :    
20 :     $| = 1;
21 :     my $use_tempfile = 0;
22 :     my $hook_called;
23 :     my $proc_buffer;
24 :     my $clean_up = 0;
25 :    
26 :     my $header = "Content-type: text/plain\n\n";
27 :    
28 :     my ($func, $function) = split(/=/, $ENV{QUERY_STRING});
29 :     if ($func ne "function") {
30 :     myerror(CGI->new(), "500 missing function", "Unknown argument $func\n");
31 :     }
32 :     $function or myerror(CGI->new(), "500 missing argument", "missing function argument");
33 :    
34 :    
35 :     #print STDERR "$function\n";
36 :     if ($function eq "members_of_families") {
37 :     my $cgi = new CGI;
38 :     my @id = $cgi->param('id');
39 :     print $cgi->header();
40 :     @id or myerror($cgi, "500 missing id", "figfam server $function missing id argument");
41 :     foreach my $famid (@id) {
42 :     my $fam = new FF($famid, $ffs->{dir});
43 :     if ($fam) {
44 :     print $famid, " ", $fam->family_function(), "\n";
45 :     print join("\n", $fam->list_members()), "\n";
46 :     } else {
47 :     print "$famid INVALID-FAMILY\n";
48 :     }
49 :     }
50 :     } elsif ($function eq "should_be_member") {
51 :     my $cgi = new CGI;
52 :     print $cgi->header();
53 :     my @id = $cgi->param('id_seq');
54 :     @id or myerror($cgi, "500 missing id_seq", "figfam server $function missing id_seq argument");
55 :     foreach my $parm (@id) {
56 :     my ($famid, $seq) = split /,/, $parm;
57 :     my $fam = new FF($famid, $ffs->{dir});
58 :     if ($fam) {
59 :     my ($bool, $sims) = $fam->should_be_member($seq), "\n";
60 :     print $bool?1:0, "\n";
61 :     } else {
62 :     print "INVALID-FAMILY\t$famid\n";
63 :     }
64 :     }
65 :     } elsif ($function eq "all_families") {
66 :     my $cgi = new CGI;
67 :     print $cgi->header();
68 :     print join("\n", $ffs->all_families(1)), "\n";
69 :     } elsif ($function eq "place_in_family" || $function eq "assign_functions_to_DNA" ||
70 :     $function eq "assign_function_to_prot") {
71 :     $clean_up = 1;
72 :     $proc_buffer = new ProcBufferFASTA();
73 :     #print STDERR "Setting hook\n";
74 :     CGI::upload_hook(\&hook, $proc_buffer, $use_tempfile);
75 :     } else {
76 :     myerror(CGI->new(), "500 invalid function", "invalid function $function\n");
77 :     }
78 :    
79 :     if ($clean_up) {
80 :     $proc_buffer->parse_fasta();
81 :     }
82 :    
83 :    
84 :     sub hook {
85 :     my ($filename, $buffer, $bytes_read, $data) = @_;
86 :    
87 :     if (not $hook_called) {
88 :     print $header;
89 :     # print "BYTES = $bytes_read\n$buffer<br>";
90 :     #print "ENV String ", $ENV{QUERY_STRING}, "\n";
91 :     #print "Filename $filename\n";
92 :     } else {
93 :     # print "Read $bytes_read of $filename\n$buffer<br>";
94 :     }
95 :     $data->process_block($buffer);
96 :     $hook_called += $bytes_read;
97 :    
98 :     }
99 :    
100 :     if ($hook_called) {
101 :     #print "All done $hook_called\n";
102 :     }
103 :    
104 :     exit;
105 :    
106 :     #
107 :     #The FIGfam server processes requests of the form:
108 :     #
109 :     # 1. PLACE-IN-FAMILY takes as input a list of protein sequences. It
110 :     # returns a list where each element describes the outcome of
111 :     # trying to place the corresponding input sequence into a
112 :     # FIGfam. Each output can be either
113 :     #
114 :     # COULD-NOT-PLACE-IN-FAMILY
115 :     # or
116 :     # ID FUNCTION
117 :     #
118 :     # where ID is of the form FIGxxxxxx and FUNCTION is the family
119 :     # function.
120 :     #
121 :     # 2. MEMBERS-OF-FAMILIES takes as input a list of FIGfam IDs. The
122 :     # output is a list of functions for those families
123 :     # (INVALID-FAMILY will be returned for IDs that do not correspond
124 :     # to an active family), as well as a list of the IDs in each family.
125 :     #
126 :     # 3. SHOULD-BE-MEMBER takes as input a list of 2-tuples
127 :     #
128 :     # [FIGfam-ID,protein sequence]
129 :     #
130 :     # It returns a list of boolean values indicating whether or not
131 :     # the indicated protein sequence can be placed in the designated
132 :     # family.
133 :     #
134 :     # 4. ALL-FAMILIES returns a list of [FIGfam-ID,function] tuples.
135 :     #
136 :     #
137 :     # 5. ASSIGN-FUNCTION-TO-PROT is similar to PLACE-IN-FAMILY, except
138 :     # that the returned list contains either
139 :     #
140 :     # COULD-NOT-PLACE-IN-FAMILY
141 :     # or
142 :     # ID FUNCTION
143 :     #
144 :     # That is, it does not indicate which FIGfam was used to
145 :     # determine the function. This allows higher-performance
146 :     # alternatives for cases in which multiple FIGfams implement the
147 :     # same function. The algorithm supported utilizes the underlying
148 :     # FIGfams, but characterizes sets that implement the same
149 :     # function and does not support distinguishing which FIGfam
150 :     # is actually the right subgrouping.
151 :     #
152 :     # 6. ASSIGN-FUNCTIONS-TO-DNA takes as input a list of DNA
153 :     # sequences. It returns a list where each element describes
154 :     # a region of DNA that is believed to be part of a gene encoding
155 :     # a protein sequence that would be placed into a FIGfam
156 :     # successfully, if the whole protein sequence could be
157 :     # determined. That is, the returned list will contain entrties
158 :     # of either the form
159 :     #
160 :     # COULD-NOT-PLACE-ANY-REGIONS-IN-FAMILIES
161 :     # or
162 :     # BEGIN1 END1 FUNCTION1 BEGIN2 END2 FUNCTION2 ...
163 :     #
164 :     # where BEGIN and END specify a region (if BEGIN is greater than
165 :     # END, the region described is on the reverse strand) and
166 :     # FUNCTION is the family function of the protein sequence that is
167 :     # believed to be encoded by DNA including the embedded region.
168 :     # Each input sequence can produce an arbitrary number of matched
169 :     # regions, there will be 3 fields for each matched region. Note
170 :     # that the described region may include frameshifts and embedded
171 :     # stop codons. The algorithm seeking meaningful sections of DNA
172 :     # assumes that it may have an incomplete, low-quality sequence
173 :     # (and uses an algorithm that attempts to locate meaningful
174 :     # matches even so).
175 :    
176 :     sub myerror
177 :     {
178 :     my($cgi, $stat, $msg) = @_;
179 :     print $cgi->header(-status => $stat);
180 :     print "$msg\n";
181 :     exit;
182 :     }
183 :    
184 :    
185 :     package ProcBufferFASTA;
186 :     use strict;
187 :     sub new {
188 :     my($class) = @_;
189 :     my $self = {
190 :     buf => '',
191 :     fasta=> ''
192 :     } ;
193 :     return bless $self, $class;
194 :     }
195 :    
196 :     sub process_block
197 :     {
198 :     my($self, $block) = @_;
199 :     $self->{buf} .= $block;
200 :     #print "$block, DNA Processing\n";
201 :    
202 :     while ($self->{buf} =~ s/^([^\n]*\n)//go) {
203 :     if (substr($1, 0, 1) eq ">") {
204 :     $self->parse_fasta();
205 :     $self->{fasta} = $1;
206 :     } else {
207 :     $self->{fasta} .= $1;
208 :     }
209 :     }
210 :    
211 :     }
212 :     sub parse_fasta {
213 :     my ($self) = @_;
214 :     my $state = 'expect_header';
215 :     my $cur_id;
216 :     my $seq = '';
217 :     #print STDERR "FASTA =$self->{fasta}\n";
218 :     while ($self->{fasta} =~ /([^\n]*)\n/go) {
219 :     $_ = $1;
220 :     if ($state eq 'expect_header')
221 :     {
222 :     if (/^>(\S+)/)
223 :     {
224 :     $cur_id = $1;
225 :     $state = 'expect_data';
226 :     #print $clean_fh ">$cur_id\n";
227 :     next;
228 :     }
229 :     else
230 :     {
231 :     die "Invalid fasta: Expected header at line $.\n";
232 :     }
233 :     }
234 :     elsif ($state eq 'expect_data')
235 :     {
236 :     if (/^>(\S+)/)
237 :     {
238 :     $cur_id = $1;
239 :     $state = 'expect_data';
240 :     #print $clean_fh ">$cur_id\n";
241 :     next;
242 :     }
243 :     elsif (/^([acgtumrwsykbdhvn]*)\s*$/i)
244 :     #elsif (/^([*abcdefghijklmnopqrstuvwxyz]*)\s*$/i)
245 :     {
246 :     $seq .= lc($1);
247 :     #print $clean_fh lc($1) . "\n";
248 :     next;
249 :     }
250 :     elsif (/^([*abcdefghijklmnopqrstuvwxyz]*)\s*$/i)
251 :     {
252 :     $seq .= $1;
253 :     ##print $clean_fh lc($1) . "\n";
254 :     #print "SEQ 2 = $seq\n";
255 :     next;
256 :     ##die "Invalid fasta: Bad data (appears to be protein translation data) at line $.\n";
257 :     }
258 :     else
259 :     {
260 :     my $str = $_;
261 :     if (length($_) > 100)
262 :     {
263 :     $str = substr($_, 0, 50) . " [...] " . substr($_, -50);
264 :     }
265 : disz 1.2 print "Invalid fasta: Bad data at line $.\n$str\n";
266 :     exit;
267 : disz 1.1 }
268 :     }
269 :     else
270 :     {
271 :     die "Internal error: invalid state $state\n";
272 :     }
273 :     }
274 :    
275 :    
276 :     if ($seq) {
277 :     #print "XXX$cur_id, $seq\n";
278 :     #print $function;
279 :     my $id = $cur_id;
280 :     if ($function eq "assign_functions_to_DNA") {
281 :     my @hits = $kmers->assign_functions_to_DNA($seq);
282 :     foreach my $hit (@hits) {
283 :     my ($b,$e,$func) = @$hit;
284 :     print join("\t",($id,join("_",($id,$b,$e)),$func)),"\n";
285 :     }
286 :     } elsif ($function eq "assign_function_to_prot") {
287 :     my $func = $kmers->assign_function_to_prot($seq);
288 :     if ($func) {
289 :     print "$id\t$func\n";
290 :     } else {
291 :     print "$id\tCOULD-NOT-PLACE\n";
292 :     }
293 :     } elsif ($function eq "place_in_family") {
294 :     my ($fam) = $ffs->place_in_family($seq);
295 :     if ($fam) {
296 :     print "$fam->{id}\t$fam->{function}\n";
297 :     } else {
298 :     print "COULD-NOT-PLACE-IN_FAMILY\n";
299 :     }
300 :     }
301 :     }
302 :     }
303 :    
304 :    
305 :     package ProcBuffer;
306 :     use strict;
307 :     sub new {
308 :     my($class) = @_;
309 :     my $self = {
310 :     buf => '',
311 :     } ;
312 :     return bless $self, $class;
313 :     }
314 :    
315 :     sub process_block
316 :     {
317 :     my($self, $block) = @_;
318 :     $self->{buf} .= $block;
319 :     print $block;
320 :     }
321 :    
322 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3