Parent Directory
|
Revision Log
Revision 1.23 - (view) (download) (as text)
1 : | mkubal | 1.1 | package Observation; |
2 : | |||
3 : | mkubal | 1.19 | use lib '/vol/ontologies'; |
4 : | use DBMaster; | ||
5 : | |||
6 : | mkubal | 1.1 | require Exporter; |
7 : | @EXPORT_OK = qw(get_objects); | ||
8 : | |||
9 : | arodri7 | 1.16 | use FIG_Config; |
10 : | mkubal | 1.1 | use strict; |
11 : | arodri7 | 1.16 | #use warnings; |
12 : | arodri7 | 1.9 | use HTML; |
13 : | mkubal | 1.1 | |
14 : | 1; | ||
15 : | |||
16 : | arodri7 | 1.23 | # $Id: Observation.pm,v 1.22 2007/06/29 16:22:37 arodri7 Exp $ |
17 : | mkubal | 1.1 | |
18 : | =head1 NAME | ||
19 : | |||
20 : | Observation -- A presentation layer for observations in SEED. | ||
21 : | |||
22 : | =head1 DESCRIPTION | ||
23 : | |||
24 : | The SEED environment contains various sources of information for sequence features. The purpose of this library is to provide a | ||
25 : | single interface to this data. | ||
26 : | |||
27 : | The data can be used to display information for a given sequence feature (protein or other, but primarily information is computed for proteins). | ||
28 : | |||
29 : | Example: | ||
30 : | |||
31 : | arodri7 | 1.9 | |
32 : | mkubal | 1.1 | use FIG; |
33 : | use Observation; | ||
34 : | |||
35 : | paczian | 1.2 | my $fig = new FIG; |
36 : | my $fid = "fig|83333.1.peg.3"; | ||
37 : | |||
38 : | my $observations = Observation::get_objects($fid); | ||
39 : | foreach my $observation (@$observations) { | ||
40 : | print "ID: " . $fid . "\n"; | ||
41 : | print "Start: " . $observation->start() . "\n"; | ||
42 : | ... | ||
43 : | } | ||
44 : | mkubal | 1.1 | |
45 : | B<return an array of objects> | ||
46 : | |||
47 : | |||
48 : | print "$Observation->acc\n" prints the Accession number if present for the Observation | ||
49 : | |||
50 : | =cut | ||
51 : | |||
52 : | =head1 BACKGROUND | ||
53 : | |||
54 : | =head2 Data incorporated in the Observations | ||
55 : | |||
56 : | As the goal of this library is to provide an integrated view, we combine diverse sources of evidence. | ||
57 : | |||
58 : | =head3 SEED core evidence | ||
59 : | |||
60 : | The core SEED data structures provided by FIG.pm. These are Similarities, BBHs and PCHs. | ||
61 : | |||
62 : | =head3 Attribute based Evidence | ||
63 : | |||
64 : | We use the SEED attribute infrastructure to store information computed by a variety of computational procedures. | ||
65 : | |||
66 : | These are e.g. InterPro hits via InterProScan (ipr), NCBI Conserved Domain Database Hits via PSSM(cdd), | ||
67 : | PFAM hits via HMM(pfam), SignalP results(signalp), and various others. | ||
68 : | |||
69 : | =head1 METHODS | ||
70 : | |||
71 : | The public methods this package provides are listed below: | ||
72 : | |||
73 : | =head3 acc() | ||
74 : | |||
75 : | A valid accession or remote ID (in the style of a db_xref) or a valid local ID (FID) in case this is supported. | ||
76 : | |||
77 : | =cut | ||
78 : | |||
79 : | sub acc { | ||
80 : | my ($self) = @_; | ||
81 : | |||
82 : | return $self->{acc}; | ||
83 : | } | ||
84 : | |||
85 : | =head3 description() | ||
86 : | |||
87 : | The description of the hit. Taken from the data or from the our Ontology database for some cases e.g. IPR or PFAM. | ||
88 : | |||
89 : | B<Please note:> | ||
90 : | Either remoteid or description is required. | ||
91 : | |||
92 : | =cut | ||
93 : | |||
94 : | sub description { | ||
95 : | my ($self) = @_; | ||
96 : | |||
97 : | arodri7 | 1.5 | return $self->{description}; |
98 : | mkubal | 1.1 | } |
99 : | |||
100 : | =head3 class() | ||
101 : | |||
102 : | The class of evidence (required). This is usually simply the name of the tool or the name of the SEED data structure. | ||
103 : | B<Please note> the connection of class and display_method and URL. | ||
104 : | mkubal | 1.7 | |
105 : | mkubal | 1.1 | Current valid classes are: |
106 : | |||
107 : | =over 9 | ||
108 : | |||
109 : | arodri7 | 1.9 | =item IDENTICAL (seq) |
110 : | |||
111 : | mkubal | 1.3 | =item SIM (seq) |
112 : | mkubal | 1.1 | |
113 : | mkubal | 1.3 | =item BBH (seq) |
114 : | mkubal | 1.1 | |
115 : | mkubal | 1.3 | =item PCH (fc) |
116 : | mkubal | 1.1 | |
117 : | mkubal | 1.3 | =item FIGFAM (seq) |
118 : | mkubal | 1.1 | |
119 : | mkubal | 1.3 | =item IPR (dom) |
120 : | mkubal | 1.1 | |
121 : | mkubal | 1.3 | =item CDD (dom) |
122 : | mkubal | 1.1 | |
123 : | mkubal | 1.3 | =item PFAM (dom) |
124 : | mkubal | 1.1 | |
125 : | mkubal | 1.12 | =item SIGNALP_CELLO_TMPRED (loc) |
126 : | mkubal | 1.1 | |
127 : | mkubal | 1.20 | =item PDB (seq) |
128 : | |||
129 : | mkubal | 1.3 | =item TMHMM (loc) |
130 : | mkubal | 1.1 | |
131 : | mkubal | 1.3 | =item HMMTOP (loc) |
132 : | mkubal | 1.1 | |
133 : | =back | ||
134 : | |||
135 : | =cut | ||
136 : | |||
137 : | sub class { | ||
138 : | my ($self) = @_; | ||
139 : | |||
140 : | return $self->{class}; | ||
141 : | } | ||
142 : | |||
143 : | =head3 type() | ||
144 : | |||
145 : | The type of evidence (required). | ||
146 : | |||
147 : | Where type is one of the following: | ||
148 : | |||
149 : | =over 8 | ||
150 : | |||
151 : | =item seq=Sequence similarity | ||
152 : | |||
153 : | =item dom=domain based match | ||
154 : | |||
155 : | =item loc=Localization of the feature | ||
156 : | |||
157 : | =item fc=Functional coupling. | ||
158 : | |||
159 : | =back | ||
160 : | |||
161 : | =cut | ||
162 : | |||
163 : | sub type { | ||
164 : | my ($self) = @_; | ||
165 : | |||
166 : | return $self->{acc}; | ||
167 : | } | ||
168 : | |||
169 : | =head3 start() | ||
170 : | |||
171 : | Start of hit in query sequence. | ||
172 : | |||
173 : | =cut | ||
174 : | |||
175 : | sub start { | ||
176 : | my ($self) = @_; | ||
177 : | |||
178 : | return $self->{start}; | ||
179 : | } | ||
180 : | |||
181 : | =head3 end() | ||
182 : | |||
183 : | End of the hit in query sequence. | ||
184 : | |||
185 : | =cut | ||
186 : | |||
187 : | sub stop { | ||
188 : | my ($self) = @_; | ||
189 : | |||
190 : | return $self->{stop}; | ||
191 : | } | ||
192 : | |||
193 : | arodri7 | 1.11 | =head3 start() |
194 : | |||
195 : | Start of hit in query sequence. | ||
196 : | |||
197 : | =cut | ||
198 : | |||
199 : | sub qstart { | ||
200 : | my ($self) = @_; | ||
201 : | |||
202 : | return $self->{qstart}; | ||
203 : | } | ||
204 : | |||
205 : | =head3 qstop() | ||
206 : | |||
207 : | End of the hit in query sequence. | ||
208 : | |||
209 : | =cut | ||
210 : | |||
211 : | sub qstop { | ||
212 : | my ($self) = @_; | ||
213 : | |||
214 : | return $self->{qstop}; | ||
215 : | } | ||
216 : | |||
217 : | =head3 hstart() | ||
218 : | |||
219 : | Start of hit in hit sequence. | ||
220 : | |||
221 : | =cut | ||
222 : | |||
223 : | sub hstart { | ||
224 : | my ($self) = @_; | ||
225 : | |||
226 : | return $self->{hstart}; | ||
227 : | } | ||
228 : | |||
229 : | =head3 end() | ||
230 : | |||
231 : | End of the hit in hit sequence. | ||
232 : | |||
233 : | =cut | ||
234 : | |||
235 : | sub hstop { | ||
236 : | my ($self) = @_; | ||
237 : | |||
238 : | return $self->{hstop}; | ||
239 : | } | ||
240 : | |||
241 : | =head3 qlength() | ||
242 : | |||
243 : | length of the query sequence in similarities | ||
244 : | |||
245 : | =cut | ||
246 : | |||
247 : | sub qlength { | ||
248 : | my ($self) = @_; | ||
249 : | |||
250 : | return $self->{qlength}; | ||
251 : | } | ||
252 : | |||
253 : | =head3 hlength() | ||
254 : | |||
255 : | length of the hit sequence in similarities | ||
256 : | |||
257 : | =cut | ||
258 : | |||
259 : | sub hlength { | ||
260 : | my ($self) = @_; | ||
261 : | |||
262 : | return $self->{hlength}; | ||
263 : | } | ||
264 : | |||
265 : | |||
266 : | |||
267 : | mkubal | 1.1 | =head3 evalue() |
268 : | |||
269 : | E-value or P-Value if present. | ||
270 : | |||
271 : | =cut | ||
272 : | |||
273 : | sub evalue { | ||
274 : | my ($self) = @_; | ||
275 : | |||
276 : | return $self->{evalue}; | ||
277 : | } | ||
278 : | |||
279 : | =head3 score() | ||
280 : | |||
281 : | Score if present. | ||
282 : | |||
283 : | B<Please note: > | ||
284 : | Either score or eval are required. | ||
285 : | |||
286 : | =cut | ||
287 : | |||
288 : | sub score { | ||
289 : | my ($self) = @_; | ||
290 : | return $self->{score}; | ||
291 : | } | ||
292 : | |||
293 : | |||
294 : | mkubal | 1.12 | =head3 display() |
295 : | mkubal | 1.1 | |
296 : | mkubal | 1.12 | will be different for each type |
297 : | mkubal | 1.1 | |
298 : | =cut | ||
299 : | |||
300 : | mkubal | 1.7 | sub display { |
301 : | mkubal | 1.1 | |
302 : | mkubal | 1.7 | die "Abstract Method Called\n"; |
303 : | mkubal | 1.1 | |
304 : | } | ||
305 : | |||
306 : | mkubal | 1.7 | |
307 : | mkubal | 1.1 | =head3 rank() |
308 : | |||
309 : | Returns an integer from 1 - 10 indicating the importance of this observations. | ||
310 : | |||
311 : | Currently always returns 1. | ||
312 : | |||
313 : | =cut | ||
314 : | |||
315 : | sub rank { | ||
316 : | my ($self) = @_; | ||
317 : | |||
318 : | # return $self->{rank}; | ||
319 : | |||
320 : | return 1; | ||
321 : | } | ||
322 : | |||
323 : | =head3 supports_annotation() | ||
324 : | |||
325 : | Does a this observation support the annotation of its feature? | ||
326 : | |||
327 : | Returns | ||
328 : | |||
329 : | =over 3 | ||
330 : | |||
331 : | =item 10, if feature annotation is identical to $self->description | ||
332 : | |||
333 : | =item 1, Feature annotation is similar to $self->annotation; this is computed using FIG::SameFunc() | ||
334 : | |||
335 : | =item undef | ||
336 : | |||
337 : | =back | ||
338 : | |||
339 : | =cut | ||
340 : | |||
341 : | sub supports_annotation { | ||
342 : | my ($self) = @_; | ||
343 : | |||
344 : | # no code here so far | ||
345 : | |||
346 : | return $self->{supports_annotation}; | ||
347 : | } | ||
348 : | |||
349 : | =head3 url() | ||
350 : | |||
351 : | URL describing the subject. In case of a BLAST hit against a sequence, this URL will lead to a page displaying the sequence record for the sequence. In case of an HMM hit, the URL will be to the URL description. | ||
352 : | |||
353 : | =cut | ||
354 : | |||
355 : | sub url { | ||
356 : | my ($self) = @_; | ||
357 : | |||
358 : | my $url = get_url($self->type, $self->acc); | ||
359 : | |||
360 : | return $url; | ||
361 : | } | ||
362 : | |||
363 : | =head3 get_objects() | ||
364 : | |||
365 : | This is the B<REAL WORKHORSE> method of this Package. | ||
366 : | |||
367 : | It will probably have to: | ||
368 : | |||
369 : | - get all sims for the feature | ||
370 : | - get all bbhs for the feature | ||
371 : | - copy information from sim to bbh (bbh have no match location etc) | ||
372 : | - get pchs (difficult) | ||
373 : | - get attributes (there is code for this that in get_attribute_based_observations | ||
374 : | - get_attributes_based_observations returns an array of arrays of hashes like this" | ||
375 : | |||
376 : | mkubal | 1.7 | my $dataset |
377 : | mkubal | 1.1 | [ |
378 : | [ { name => 'acc', value => '1234' }, | ||
379 : | { name => 'from', value => '4' }, | ||
380 : | { name => 'to', value => '400' }, | ||
381 : | .... | ||
382 : | ], | ||
383 : | [ { name => 'acc', value => '456' }, | ||
384 : | { name => 'from', value => '1' }, | ||
385 : | { name => 'to', value => '100' }, | ||
386 : | .... | ||
387 : | ], | ||
388 : | ... | ||
389 : | ]; | ||
390 : | return $datasets; | ||
391 : | } | ||
392 : | |||
393 : | It will invoke the required calls to the SEED API to retrieve the information required. | ||
394 : | |||
395 : | =cut | ||
396 : | |||
397 : | sub get_objects { | ||
398 : | mkubal | 1.7 | my ($self,$fid,$classes) = @_; |
399 : | |||
400 : | my $objects = []; | ||
401 : | my @matched_datasets=(); | ||
402 : | mkubal | 1.1 | |
403 : | mkubal | 1.7 | # call function that fetches attribute based observations |
404 : | # returns an array of arrays of hashes | ||
405 : | |||
406 : | if(scalar(@$classes) < 1){ | ||
407 : | get_attribute_based_observations($fid,\@matched_datasets); | ||
408 : | get_sims_observations($fid,\@matched_datasets); | ||
409 : | get_identical_proteins($fid,\@matched_datasets); | ||
410 : | get_functional_coupling($fid,\@matched_datasets); | ||
411 : | } | ||
412 : | else{ | ||
413 : | my %domain_classes; | ||
414 : | arodri7 | 1.9 | my $identical_flag=0; |
415 : | my $pch_flag=0; | ||
416 : | mkubal | 1.12 | my $location_flag = 0; |
417 : | arodri7 | 1.10 | my $sims_flag=0; |
418 : | arodri7 | 1.15 | my $cluster_flag = 0; |
419 : | mkubal | 1.20 | my $pdb_flag = 0; |
420 : | mkubal | 1.7 | foreach my $class (@$classes){ |
421 : | arodri7 | 1.9 | if($class =~ /(IPR|CDD|PFAM)/){ |
422 : | mkubal | 1.7 | $domain_classes{$class} = 1; |
423 : | arodri7 | 1.9 | } |
424 : | elsif ($class eq "IDENTICAL") | ||
425 : | { | ||
426 : | $identical_flag = 1; | ||
427 : | } | ||
428 : | elsif ($class eq "PCH") | ||
429 : | { | ||
430 : | $pch_flag = 1; | ||
431 : | mkubal | 1.7 | } |
432 : | mkubal | 1.12 | elsif ($class =~/(SIGNALP_CELLO_TMPRED)/) |
433 : | { | ||
434 : | $location_flag = 1; | ||
435 : | } | ||
436 : | arodri7 | 1.10 | elsif ($class eq "SIM") |
437 : | { | ||
438 : | $sims_flag = 1; | ||
439 : | } | ||
440 : | arodri7 | 1.15 | elsif ($class eq "CLUSTER") |
441 : | { | ||
442 : | $cluster_flag = 1; | ||
443 : | } | ||
444 : | mkubal | 1.20 | elsif ($class eq "PDB") |
445 : | { | ||
446 : | $pdb_flag = 1; | ||
447 : | } | ||
448 : | |||
449 : | mkubal | 1.7 | } |
450 : | arodri7 | 1.9 | |
451 : | if ($identical_flag ==1) | ||
452 : | { | ||
453 : | get_identical_proteins($fid,\@matched_datasets); | ||
454 : | } | ||
455 : | if ( (defined($domain_classes{IPR})) || (defined($domain_classes{CDD})) || (defined($domain_classes{PFAM})) ) { | ||
456 : | get_attribute_based_domain_observations($fid,\%domain_classes,\@matched_datasets); | ||
457 : | } | ||
458 : | if ($pch_flag == 1) | ||
459 : | { | ||
460 : | get_functional_coupling($fid,\@matched_datasets); | ||
461 : | } | ||
462 : | arodri7 | 1.10 | if ($sims_flag == 1) |
463 : | { | ||
464 : | get_sims_observations($fid,\@matched_datasets); | ||
465 : | } | ||
466 : | arodri7 | 1.5 | |
467 : | mkubal | 1.12 | if ($location_flag == 1) |
468 : | { | ||
469 : | get_attribute_based_location_observations($fid,\@matched_datasets); | ||
470 : | } | ||
471 : | arodri7 | 1.15 | if ($cluster_flag == 1) |
472 : | { | ||
473 : | get_cluster_observations($fid,\@matched_datasets); | ||
474 : | } | ||
475 : | mkubal | 1.20 | if ($pdb_flag == 1) |
476 : | { | ||
477 : | get_pdb_observations($fid,\@matched_datasets); | ||
478 : | } | ||
479 : | |||
480 : | mkubal | 1.12 | |
481 : | mkubal | 1.1 | } |
482 : | mkubal | 1.7 | |
483 : | foreach my $dataset (@matched_datasets) { | ||
484 : | my $object; | ||
485 : | if($dataset->{'type'} eq "dom"){ | ||
486 : | $object = Observation::Domain->new($dataset); | ||
487 : | } | ||
488 : | arodri7 | 1.9 | if($dataset->{'class'} eq "PCH"){ |
489 : | $object = Observation::FC->new($dataset); | ||
490 : | } | ||
491 : | if ($dataset->{'class'} eq "IDENTICAL"){ | ||
492 : | $object = Observation::Identical->new($dataset); | ||
493 : | } | ||
494 : | mkubal | 1.12 | if ($dataset->{'class'} eq "SIGNALP_CELLO_TMPRED"){ |
495 : | $object = Observation::Location->new($dataset); | ||
496 : | } | ||
497 : | arodri7 | 1.10 | if ($dataset->{'class'} eq "SIM"){ |
498 : | $object = Observation::Sims->new($dataset); | ||
499 : | } | ||
500 : | arodri7 | 1.15 | if ($dataset->{'class'} eq "CLUSTER"){ |
501 : | $object = Observation::Cluster->new($dataset); | ||
502 : | } | ||
503 : | mkubal | 1.20 | if ($dataset->{'class'} eq "PDB"){ |
504 : | $object = Observation::PDB->new($dataset); | ||
505 : | } | ||
506 : | |||
507 : | mkubal | 1.7 | push (@$objects, $object); |
508 : | mkubal | 1.1 | } |
509 : | mkubal | 1.7 | |
510 : | return $objects; | ||
511 : | mkubal | 1.1 | |
512 : | } | ||
513 : | |||
514 : | =head1 Internal Methods | ||
515 : | |||
516 : | These methods are not meant to be used outside of this package. | ||
517 : | |||
518 : | B<Please do not use them outside of this package!> | ||
519 : | |||
520 : | =cut | ||
521 : | |||
522 : | |||
523 : | =head3 get_url (internal) | ||
524 : | |||
525 : | get_url() return a valid URL or undef for any observation. | ||
526 : | |||
527 : | URLs are constructed by looking at the Accession acc() and name() | ||
528 : | |||
529 : | Info from both attributes is combined with a table of base URLs stored in this function. | ||
530 : | |||
531 : | =cut | ||
532 : | |||
533 : | sub get_url { | ||
534 : | |||
535 : | my ($self) = @_; | ||
536 : | my $url=''; | ||
537 : | |||
538 : | # a hash with a URL for each observation; identified by name() | ||
539 : | #my $URL => { 'PFAM' => "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?" ,\ | ||
540 : | # 'IPR' => "http://www.ebi.ac.uk/interpro/DisplayIproEntry?ac=" ,\ | ||
541 : | # 'CDD' => "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=",\ | ||
542 : | # 'PIR' => "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=",\ | ||
543 : | # 'FIGFAM' => '',\ | ||
544 : | # 'sim'=> "http://www.theseed.org/linkin.cgi?id=",\ | ||
545 : | # 'bbh'=> "http://www.theseed.org/linkin.cgi?id=" | ||
546 : | #}; | ||
547 : | |||
548 : | # if (defined $URL{$self->name}) { | ||
549 : | # $url = $URL{$self->name}.$self->acc; | ||
550 : | # return $url; | ||
551 : | # } | ||
552 : | # else | ||
553 : | return undef; | ||
554 : | } | ||
555 : | |||
556 : | =head3 get_display_method (internal) | ||
557 : | |||
558 : | get_display_method() return a valid URL or undef for any observation. | ||
559 : | |||
560 : | URLs are constructed by looking at the Accession acc() and name() | ||
561 : | and Info from both attributes is combined with a table of base URLs stored in this function. | ||
562 : | |||
563 : | =cut | ||
564 : | |||
565 : | sub get_display_method { | ||
566 : | |||
567 : | my ($self) = @_; | ||
568 : | |||
569 : | # a hash with a URL for each observation; identified by name() | ||
570 : | #my $URL => { 'sim'=> "http://www.theseed.org/featalign.cgi?id1=",\ | ||
571 : | # 'bbh'=> "http://www.theseed.org/featalign.cgi?id1=" | ||
572 : | # }; | ||
573 : | |||
574 : | #if (defined $URL{$self->name}) { | ||
575 : | # $url = $URL{$self->name}.$self->feature_id."&id2=".$self->acc; | ||
576 : | # return $url; | ||
577 : | # } | ||
578 : | # else | ||
579 : | return undef; | ||
580 : | } | ||
581 : | |||
582 : | mkubal | 1.7 | |
583 : | sub get_attribute_based_domain_observations{ | ||
584 : | |||
585 : | # we read a FIG ID and a reference to an array (of arrays of hashes, see above) | ||
586 : | my ($fid,$domain_classes,$datasets_ref) = (@_); | ||
587 : | |||
588 : | my $fig = new FIG; | ||
589 : | |||
590 : | foreach my $attr_ref ($fig->get_attributes($fid)) { | ||
591 : | my $key = @$attr_ref[1]; | ||
592 : | my @parts = split("::",$key); | ||
593 : | my $class = $parts[0]; | ||
594 : | |||
595 : | if($domain_classes->{$parts[0]}){ | ||
596 : | my $val = @$attr_ref[2]; | ||
597 : | mkubal | 1.8 | if($val =~/^(\d+\.\d+|0\.0);(\d+)-(\d+)/){ |
598 : | mkubal | 1.7 | my $raw_evalue = $1; |
599 : | mkubal | 1.8 | my $from = $2; |
600 : | my $to = $3; | ||
601 : | mkubal | 1.7 | my $evalue; |
602 : | if($raw_evalue =~/(\d+)\.(\d+)/){ | ||
603 : | my $part2 = 1000 - $1; | ||
604 : | my $part1 = $2/100; | ||
605 : | $evalue = $part1."e-".$part2; | ||
606 : | } | ||
607 : | else{ | ||
608 : | mkubal | 1.8 | $evalue = "0.0"; |
609 : | mkubal | 1.7 | } |
610 : | |||
611 : | my $dataset = {'class' => $class, | ||
612 : | 'acc' => $key, | ||
613 : | 'type' => "dom" , | ||
614 : | 'evalue' => $evalue, | ||
615 : | 'start' => $from, | ||
616 : | 'stop' => $to | ||
617 : | }; | ||
618 : | |||
619 : | push (@{$datasets_ref} ,$dataset); | ||
620 : | } | ||
621 : | } | ||
622 : | } | ||
623 : | } | ||
624 : | mkubal | 1.12 | |
625 : | sub get_attribute_based_location_observations{ | ||
626 : | |||
627 : | my ($fid,$datasets_ref) = (@_); | ||
628 : | my $fig = new FIG; | ||
629 : | |||
630 : | my $location_attributes = ['SignalP','CELLO','TMPRED']; | ||
631 : | |||
632 : | my $dataset = {'type' => "loc", 'class' => 'SIGNALP_CELLO_TMPRED'}; | ||
633 : | foreach my $attr_ref ($fig->get_attributes($fid,$location_attributes)) { | ||
634 : | my $key = @$attr_ref[1]; | ||
635 : | my @parts = split("::",$key); | ||
636 : | my $sub_class = $parts[0]; | ||
637 : | my $sub_key = $parts[1]; | ||
638 : | my $value = @$attr_ref[2]; | ||
639 : | if($sub_class eq "SignalP"){ | ||
640 : | if($sub_key eq "cleavage_site"){ | ||
641 : | my @value_parts = split(";",$value); | ||
642 : | $dataset->{'cleavage_prob'} = $value_parts[0]; | ||
643 : | $dataset->{'cleavage_loc'} = $value_parts[1]; | ||
644 : | } | ||
645 : | elsif($sub_key eq "signal_peptide"){ | ||
646 : | $dataset->{'signal_peptide_score'} = $value; | ||
647 : | } | ||
648 : | } | ||
649 : | elsif($sub_class eq "CELLO"){ | ||
650 : | $dataset->{'cello_location'} = $sub_key; | ||
651 : | $dataset->{'cello_score'} = $value; | ||
652 : | } | ||
653 : | elsif($sub_class eq "TMPRED"){ | ||
654 : | my @value_parts = split(";",$value); | ||
655 : | $dataset->{'tmpred_score'} = $value_parts[0]; | ||
656 : | $dataset->{'tmpred_locations'} = $value_parts[1]; | ||
657 : | } | ||
658 : | } | ||
659 : | |||
660 : | push (@{$datasets_ref} ,$dataset); | ||
661 : | |||
662 : | } | ||
663 : | |||
664 : | mkubal | 1.7 | |
665 : | mkubal | 1.1 | =head3 get_attribute_based_evidence (internal) |
666 : | |||
667 : | This method retrieves evidence from the attribute server | ||
668 : | |||
669 : | =cut | ||
670 : | |||
671 : | sub get_attribute_based_observations{ | ||
672 : | |||
673 : | # we read a FIG ID and a reference to an array (of arrays of hashes, see above) | ||
674 : | my ($fid,$datasets_ref) = (@_); | ||
675 : | |||
676 : | my $_myfig = new FIG; | ||
677 : | |||
678 : | foreach my $attr_ref ($_myfig->get_attributes($fid)) { | ||
679 : | |||
680 : | # convert the ref into a string for easier handling | ||
681 : | my ($string) = "@$attr_ref"; | ||
682 : | |||
683 : | # print "S:$string\n"; | ||
684 : | my ($key,$val) = ( $string =~ /\S+\s(\S+)\s(\S+)/); | ||
685 : | |||
686 : | # THIS SHOULD BE DONE ANOTHER WAY FM->TD | ||
687 : | # we need to do the right thing for each type, ie no evalue for CELLO and no coordinates, but a score, etc | ||
688 : | # as fas as possible this should be configured so that the type of observation and the regexp are | ||
689 : | # stored somewhere for easy expansion | ||
690 : | # | ||
691 : | |||
692 : | if (($key =~ /PFAM::/) || ( $key =~ /IPR::/) || ( $key =~ /CDD::/) ) { | ||
693 : | |||
694 : | # some keys are composite CDD::1233244 or PFAM:PF1233 | ||
695 : | |||
696 : | if ( $key =~ /::/ ) { | ||
697 : | my ($firstkey,$restkey) = ( $key =~ /([a-zA-Z0-9]+)::(.*)/); | ||
698 : | $val=$restkey.";".$val; | ||
699 : | $key=$firstkey; | ||
700 : | } | ||
701 : | |||
702 : | my ($acc,$raw_evalue, $from,$to) = ($val =~ /(\S+);(\S+);(\d+)-(\d+)/ ); | ||
703 : | |||
704 : | my $evalue= 255; | ||
705 : | if (defined $raw_evalue) { # some of the tool do not give us an evalue | ||
706 : | |||
707 : | my ($k,$expo) = ( $raw_evalue =~ /(\d+).(\d+)/); | ||
708 : | my ($new_k, $new_exp); | ||
709 : | |||
710 : | # | ||
711 : | # THIS DOES NOT WORK PROPERLY | ||
712 : | # | ||
713 : | if($raw_evalue =~/(\d+).(\d+)/){ | ||
714 : | |||
715 : | # $new_exp = (1000+$expo); | ||
716 : | # $new_k = $k / 100; | ||
717 : | |||
718 : | } | ||
719 : | $evalue = "0.01"#new_k."e-".$new_exp; | ||
720 : | } | ||
721 : | |||
722 : | # unroll it all into an array of hashes | ||
723 : | # this needs to be done differently for different types of observations | ||
724 : | my $dataset = [ { name => 'class', value => $key }, | ||
725 : | { name => 'acc' , value => $acc}, | ||
726 : | { name => 'type', value => "dom"} , # this clearly needs to be done properly FM->TD | ||
727 : | { name => 'evalue', value => $evalue }, | ||
728 : | { name => 'start', value => $from}, | ||
729 : | { name => 'stop' , value => $to} | ||
730 : | ]; | ||
731 : | |||
732 : | push (@{$datasets_ref} ,$dataset); | ||
733 : | } | ||
734 : | } | ||
735 : | } | ||
736 : | |||
737 : | mkubal | 1.20 | =head3 get_pdb_observations() (internal) |
738 : | |||
739 : | This methods sets the type and class for pdb observations | ||
740 : | |||
741 : | =cut | ||
742 : | |||
743 : | sub get_pdb_observations{ | ||
744 : | my ($fid,$datasets_ref) = (@_); | ||
745 : | |||
746 : | my $fig = new FIG; | ||
747 : | |||
748 : | print STDERR "get pdb obs called\n"; | ||
749 : | foreach my $attr_ref ($fig->get_attributes($fid,'PDB')) { | ||
750 : | |||
751 : | my $key = @$attr_ref[1]; | ||
752 : | my($key1,$key2) =split("::",$key); | ||
753 : | my $value = @$attr_ref[2]; | ||
754 : | my ($evalue,$location) = split(";",$value); | ||
755 : | |||
756 : | if($evalue =~/(\d+)\.(\d+)/){ | ||
757 : | my $part2 = 1000 - $1; | ||
758 : | my $part1 = $2/100; | ||
759 : | $evalue = $part1."e-".$part2; | ||
760 : | } | ||
761 : | |||
762 : | my($start,$stop) =split("-",$location); | ||
763 : | |||
764 : | my $url = @$attr_ref[3]; | ||
765 : | my $dataset = {'class' => 'PDB', | ||
766 : | 'type' => 'seq' , | ||
767 : | 'acc' => $key2, | ||
768 : | 'evalue' => $evalue, | ||
769 : | 'start' => $start, | ||
770 : | 'stop' => $stop | ||
771 : | }; | ||
772 : | |||
773 : | push (@{$datasets_ref} ,$dataset); | ||
774 : | } | ||
775 : | |||
776 : | } | ||
777 : | |||
778 : | |||
779 : | |||
780 : | |||
781 : | arodri7 | 1.15 | =head3 get_cluster_observations() (internal) |
782 : | |||
783 : | This methods sets the type and class for cluster observations | ||
784 : | |||
785 : | =cut | ||
786 : | |||
787 : | sub get_cluster_observations{ | ||
788 : | my ($fid,$datasets_ref) = (@_); | ||
789 : | |||
790 : | arodri7 | 1.16 | my $dataset = {'class' => 'CLUSTER', |
791 : | 'type' => 'fc' | ||
792 : | }; | ||
793 : | arodri7 | 1.15 | push (@{$datasets_ref} ,$dataset); |
794 : | } | ||
795 : | |||
796 : | |||
797 : | mkubal | 1.3 | =head3 get_sims_observations() (internal) |
798 : | |||
799 : | This methods retrieves sims fills the internal data structures. | ||
800 : | |||
801 : | =cut | ||
802 : | |||
803 : | sub get_sims_observations{ | ||
804 : | |||
805 : | my ($fid,$datasets_ref) = (@_); | ||
806 : | mkubal | 1.4 | my $fig = new FIG; |
807 : | arodri7 | 1.11 | # my @sims= $fig->nsims($fid,100,1e-20,"fig"); |
808 : | my @sims= $fig->nsims($fid,100,1e-20,"all"); | ||
809 : | mkubal | 1.4 | my ($dataset); |
810 : | mkubal | 1.3 | foreach my $sim (@sims){ |
811 : | mkubal | 1.4 | my $hit = $sim->[1]; |
812 : | arodri7 | 1.11 | my $percent = $sim->[2]; |
813 : | mkubal | 1.4 | my $evalue = $sim->[10]; |
814 : | arodri7 | 1.11 | my $qfrom = $sim->[6]; |
815 : | my $qto = $sim->[7]; | ||
816 : | my $hfrom = $sim->[8]; | ||
817 : | my $hto = $sim->[9]; | ||
818 : | my $qlength = $sim->[12]; | ||
819 : | my $hlength = $sim->[13]; | ||
820 : | my $db = get_database($hit); | ||
821 : | my $func = $fig->function_of($hit); | ||
822 : | my $organism = $fig->org_of($hit); | ||
823 : | |||
824 : | arodri7 | 1.10 | $dataset = {'class' => 'SIM', |
825 : | 'acc' => $hit, | ||
826 : | arodri7 | 1.11 | 'identity' => $percent, |
827 : | arodri7 | 1.10 | 'type' => 'seq', |
828 : | 'evalue' => $evalue, | ||
829 : | arodri7 | 1.11 | 'qstart' => $qfrom, |
830 : | 'qstop' => $qto, | ||
831 : | 'hstart' => $hfrom, | ||
832 : | 'hstop' => $hto, | ||
833 : | 'database' => $db, | ||
834 : | 'organism' => $organism, | ||
835 : | 'function' => $func, | ||
836 : | 'qlength' => $qlength, | ||
837 : | 'hlength' => $hlength | ||
838 : | arodri7 | 1.10 | }; |
839 : | |||
840 : | push (@{$datasets_ref} ,$dataset); | ||
841 : | mkubal | 1.3 | } |
842 : | } | ||
843 : | |||
844 : | arodri7 | 1.11 | =head3 get_database (internal) |
845 : | This method gets the database association from the sequence id | ||
846 : | |||
847 : | =cut | ||
848 : | |||
849 : | sub get_database{ | ||
850 : | my ($id) = (@_); | ||
851 : | |||
852 : | my ($db); | ||
853 : | if ($id =~ /^fig\|/) { $db = "FIG" } | ||
854 : | elsif ($id =~ /^gi\|/) { $db = "NCBI" } | ||
855 : | elsif ($id =~ /^^[NXYZA]P_/) { $db = "RefSeq" } | ||
856 : | elsif ($id =~ /^sp\|/) { $db = "SwissProt" } | ||
857 : | elsif ($id =~ /^uni\|/) { $db = "UniProt" } | ||
858 : | elsif ($id =~ /^tigr\|/) { $db = "TIGR" } | ||
859 : | elsif ($id =~ /^pir\|/) { $db = "PIR" } | ||
860 : | elsif ($id =~ /^kegg\|/) { $db = "KEGG" } | ||
861 : | elsif ($id =~ /^tr\|/) { $db = "TrEMBL" } | ||
862 : | elsif ($id =~ /^eric\|/) { $db = "ASAP" } | ||
863 : | elsif ($id =~ /^img\|/) { $db = "JGI" } | ||
864 : | |||
865 : | return ($db); | ||
866 : | |||
867 : | } | ||
868 : | |||
869 : | arodri7 | 1.5 | =head3 get_identical_proteins() (internal) |
870 : | |||
871 : | This methods retrieves sims fills the internal data structures. | ||
872 : | |||
873 : | =cut | ||
874 : | |||
875 : | sub get_identical_proteins{ | ||
876 : | |||
877 : | my ($fid,$datasets_ref) = (@_); | ||
878 : | my $fig = new FIG; | ||
879 : | my @funcs = (); | ||
880 : | |||
881 : | my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid); | ||
882 : | |||
883 : | foreach my $id (@maps_to) { | ||
884 : | my ($tmp, $who); | ||
885 : | arodri7 | 1.6 | if (($id ne $fid) && ($tmp = $fig->function_of($id))) { |
886 : | arodri7 | 1.11 | $who = &get_database($id); |
887 : | arodri7 | 1.5 | push(@funcs, [$id,$who,$tmp]); |
888 : | } | ||
889 : | } | ||
890 : | |||
891 : | my ($dataset); | ||
892 : | foreach my $row (@funcs){ | ||
893 : | my $id = $row->[0]; | ||
894 : | my $organism = $fig->org_of($fid); | ||
895 : | my $who = $row->[1]; | ||
896 : | my $assignment = $row->[2]; | ||
897 : | arodri7 | 1.9 | |
898 : | my $dataset = {'class' => 'IDENTICAL', | ||
899 : | 'id' => $id, | ||
900 : | 'organism' => $organism, | ||
901 : | 'type' => 'seq', | ||
902 : | 'database' => $who, | ||
903 : | 'function' => $assignment | ||
904 : | }; | ||
905 : | |||
906 : | arodri7 | 1.5 | push (@{$datasets_ref} ,$dataset); |
907 : | } | ||
908 : | |||
909 : | } | ||
910 : | |||
911 : | arodri7 | 1.6 | =head3 get_functional_coupling() (internal) |
912 : | |||
913 : | This methods retrieves the functional coupling of a protein given a peg ID | ||
914 : | |||
915 : | =cut | ||
916 : | |||
917 : | sub get_functional_coupling{ | ||
918 : | |||
919 : | my ($fid,$datasets_ref) = (@_); | ||
920 : | my $fig = new FIG; | ||
921 : | my @funcs = (); | ||
922 : | |||
923 : | # initialize some variables | ||
924 : | my($sc,$neigh); | ||
925 : | |||
926 : | # set default parameters for coupling and evidence | ||
927 : | my ($bound,$sim_cutoff,$coupling_cutoff) = (5000, 1.0e-10, 4); | ||
928 : | |||
929 : | # get the fc data | ||
930 : | my @fc_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,1); | ||
931 : | |||
932 : | # retrieve data | ||
933 : | my @rows = map { ($sc,$neigh) = @$_; | ||
934 : | [$sc,$neigh,scalar $fig->function_of($neigh)] | ||
935 : | } @fc_data; | ||
936 : | |||
937 : | my ($dataset); | ||
938 : | foreach my $row (@rows){ | ||
939 : | my $id = $row->[1]; | ||
940 : | my $score = $row->[0]; | ||
941 : | my $description = $row->[2]; | ||
942 : | arodri7 | 1.9 | my $dataset = {'class' => 'PCH', |
943 : | 'score' => $score, | ||
944 : | 'id' => $id, | ||
945 : | 'type' => 'fc', | ||
946 : | 'function' => $description | ||
947 : | }; | ||
948 : | |||
949 : | arodri7 | 1.6 | push (@{$datasets_ref} ,$dataset); |
950 : | } | ||
951 : | } | ||
952 : | arodri7 | 1.5 | |
953 : | mkubal | 1.1 | =head3 get_sims_and_bbhs() (internal) |
954 : | |||
955 : | This methods retrieves sims and also BBHs and fills the internal data structures. | ||
956 : | |||
957 : | =cut | ||
958 : | |||
959 : | # sub get_sims_and_bbhs{ | ||
960 : | |||
961 : | # # blast m8 output format | ||
962 : | # # id1, id2, %ident, align len, mismatches, gaps, q.start, q.stop, s. start, s.stop, eval, bit | ||
963 : | |||
964 : | # my $Sims=(); | ||
965 : | # @sims_src = $fig->sims($fid,80,500,"fig",0); | ||
966 : | # print "found $#sims_src SIMs\n"; | ||
967 : | # foreach $sims (@sims_src) { | ||
968 : | # my ($sims_string) = "@$sims"; | ||
969 : | # # print "$sims_string\n"; | ||
970 : | # my ($rfid,$start,$stop,$eval) = ( $sims_string =~ /\S+\s+(\S+)\s+\S+\s\S+\s+(\S+)\s+(\S+)\s+ | ||
971 : | # \S+\s+\S+\s+\S+\s+\S+\s+(\S+)+.*/); | ||
972 : | # # print "ID: $rfid, E:$eval, Start:$start stop:$stop\n"; | ||
973 : | # $Sims{$rfid}{'eval'}=$eval; | ||
974 : | # $Sims{$rfid}{'start'}=$start; | ||
975 : | # $Sims{$rfid}{'stop'}=$stop; | ||
976 : | # print "$rfid $Sims{$rfid}{'eval'}\n"; | ||
977 : | # } | ||
978 : | |||
979 : | # # BBHs | ||
980 : | # my $BBHs=(); | ||
981 : | |||
982 : | # @bbhs_src = $fig->bbhs($fid,1.0e-10); | ||
983 : | # print "found $#bbhs_src BBHs\n"; | ||
984 : | # foreach $bbh (@bbhs_src) { | ||
985 : | # #print "@$bbh\n"; | ||
986 : | # my ($bbh_string) = "@$bbh"; | ||
987 : | # my ($rfid,$eval,$score) = ( $bbh_string =~ /(\S+)\s(\S+)\s(\S+)/); | ||
988 : | # #print "ID: $rfid, E:$eval, S:$score\n"; | ||
989 : | # $BBHs{$rfid}{'eval'}=$eval; | ||
990 : | # $BBHs{$rfid}{'score'}=$score; | ||
991 : | # #print "$rfid $BBHs{$rfid}{'eval'}\n"; | ||
992 : | # } | ||
993 : | |||
994 : | # } | ||
995 : | |||
996 : | |||
997 : | |||
998 : | =head3 new (internal) | ||
999 : | |||
1000 : | Instantiate a new object. | ||
1001 : | |||
1002 : | =cut | ||
1003 : | |||
1004 : | sub new { | ||
1005 : | mkubal | 1.7 | my ($class,$dataset) = @_; |
1006 : | |||
1007 : | mkubal | 1.1 | |
1008 : | mkubal | 1.7 | #$self = { acc => '', |
1009 : | # description => '', | ||
1010 : | # class => '', | ||
1011 : | # type => '', | ||
1012 : | # start => '', | ||
1013 : | # stop => '', | ||
1014 : | # evalue => '', | ||
1015 : | # score => '', | ||
1016 : | # display_method => '', | ||
1017 : | # feature_id => '', | ||
1018 : | # rank => '', | ||
1019 : | # supports_annotation => '', | ||
1020 : | # id => '', | ||
1021 : | # organism => '', | ||
1022 : | # who => '' | ||
1023 : | # }; | ||
1024 : | mkubal | 1.1 | |
1025 : | mkubal | 1.7 | my $self = { class => $dataset->{'class'}, |
1026 : | type => $dataset->{'type'} | ||
1027 : | arodri7 | 1.10 | }; |
1028 : | mkubal | 1.7 | |
1029 : | bless($self,$class); | ||
1030 : | mkubal | 1.1 | |
1031 : | return $self; | ||
1032 : | } | ||
1033 : | |||
1034 : | arodri7 | 1.11 | =head3 identity (internal) |
1035 : | |||
1036 : | Returns the % identity of the similar sequence | ||
1037 : | |||
1038 : | =cut | ||
1039 : | |||
1040 : | sub identity { | ||
1041 : | my ($self) = @_; | ||
1042 : | |||
1043 : | return $self->{identity}; | ||
1044 : | } | ||
1045 : | |||
1046 : | mkubal | 1.1 | =head3 feature_id (internal) |
1047 : | |||
1048 : | |||
1049 : | =cut | ||
1050 : | |||
1051 : | sub feature_id { | ||
1052 : | my ($self) = @_; | ||
1053 : | |||
1054 : | return $self->{feature_id}; | ||
1055 : | } | ||
1056 : | arodri7 | 1.5 | |
1057 : | =head3 id (internal) | ||
1058 : | |||
1059 : | Returns the ID of the identical sequence | ||
1060 : | |||
1061 : | =cut | ||
1062 : | |||
1063 : | sub id { | ||
1064 : | my ($self) = @_; | ||
1065 : | |||
1066 : | return $self->{id}; | ||
1067 : | } | ||
1068 : | |||
1069 : | =head3 organism (internal) | ||
1070 : | |||
1071 : | Returns the organism of the identical sequence | ||
1072 : | |||
1073 : | =cut | ||
1074 : | |||
1075 : | sub organism { | ||
1076 : | my ($self) = @_; | ||
1077 : | |||
1078 : | return $self->{organism}; | ||
1079 : | } | ||
1080 : | |||
1081 : | arodri7 | 1.9 | =head3 function (internal) |
1082 : | |||
1083 : | Returns the function of the identical sequence | ||
1084 : | |||
1085 : | =cut | ||
1086 : | |||
1087 : | sub function { | ||
1088 : | my ($self) = @_; | ||
1089 : | |||
1090 : | return $self->{function}; | ||
1091 : | } | ||
1092 : | |||
1093 : | arodri7 | 1.5 | =head3 database (internal) |
1094 : | |||
1095 : | Returns the database of the identical sequence | ||
1096 : | |||
1097 : | =cut | ||
1098 : | |||
1099 : | sub database { | ||
1100 : | my ($self) = @_; | ||
1101 : | |||
1102 : | return $self->{database}; | ||
1103 : | } | ||
1104 : | |||
1105 : | mkubal | 1.20 | ############################################################ |
1106 : | ############################################################ | ||
1107 : | package Observation::PDB; | ||
1108 : | |||
1109 : | use base qw(Observation); | ||
1110 : | |||
1111 : | sub new { | ||
1112 : | |||
1113 : | my ($class,$dataset) = @_; | ||
1114 : | my $self = $class->SUPER::new($dataset); | ||
1115 : | $self->{acc} = $dataset->{'acc'}; | ||
1116 : | $self->{evalue} = $dataset->{'evalue'}; | ||
1117 : | $self->{start} = $dataset->{'start'}; | ||
1118 : | $self->{stop} = $dataset->{'stop'}; | ||
1119 : | bless($self,$class); | ||
1120 : | return $self; | ||
1121 : | } | ||
1122 : | |||
1123 : | =head3 display() | ||
1124 : | |||
1125 : | displays data stored in best_PDB attribute and in Ontology server for given PDB id | ||
1126 : | |||
1127 : | =cut | ||
1128 : | |||
1129 : | sub display{ | ||
1130 : | my ($self,$gd,$fid) = @_; | ||
1131 : | |||
1132 : | my $dbmaster = DBMaster->new(-database =>'Ontology'); | ||
1133 : | |||
1134 : | print STDERR "PDB::display called\n"; | ||
1135 : | |||
1136 : | my $acc = $self->acc; | ||
1137 : | |||
1138 : | print STDERR "acc:$acc\n"; | ||
1139 : | my ($pdb_description,$pdb_source,$pdb_ligand); | ||
1140 : | my $pdb_objs = $dbmaster->pdb->get_objects( { 'id' => $acc } ); | ||
1141 : | if(!scalar(@$pdb_objs)){ | ||
1142 : | $pdb_description = "not available"; | ||
1143 : | $pdb_source = "not available"; | ||
1144 : | $pdb_ligand = "not available"; | ||
1145 : | } | ||
1146 : | else{ | ||
1147 : | my $pdb_obj = $pdb_objs->[0]; | ||
1148 : | $pdb_description = $pdb_obj->description; | ||
1149 : | $pdb_source = $pdb_obj->source; | ||
1150 : | $pdb_ligand = $pdb_obj->ligand; | ||
1151 : | } | ||
1152 : | arodri7 | 1.6 | |
1153 : | mkubal | 1.20 | my $lines = []; |
1154 : | my $line_data = []; | ||
1155 : | my $line_config = { 'title' => "PDB hit for $fid", | ||
1156 : | 'short_title' => "best PDB", | ||
1157 : | 'basepair_offset' => '1' }; | ||
1158 : | |||
1159 : | my $fig = new FIG; | ||
1160 : | my $seq = $fig->get_translation($fid); | ||
1161 : | my $fid_stop = length($seq); | ||
1162 : | |||
1163 : | my $fid_element_hash = { | ||
1164 : | "title" => $fid, | ||
1165 : | "start" => '1', | ||
1166 : | "end" => $fid_stop, | ||
1167 : | "color"=> '1', | ||
1168 : | "zlayer" => '1' | ||
1169 : | }; | ||
1170 : | |||
1171 : | push(@$line_data,$fid_element_hash); | ||
1172 : | |||
1173 : | my $links_list = []; | ||
1174 : | my $descriptions = []; | ||
1175 : | |||
1176 : | my $name; | ||
1177 : | $name = {"title" => 'id', | ||
1178 : | "value" => $acc}; | ||
1179 : | push(@$descriptions,$name); | ||
1180 : | |||
1181 : | my $description; | ||
1182 : | $description = {"title" => 'pdb description', | ||
1183 : | "value" => $pdb_description}; | ||
1184 : | push(@$descriptions,$description); | ||
1185 : | |||
1186 : | my $score; | ||
1187 : | $score = {"title" => "score", | ||
1188 : | "value" => $self->evalue}; | ||
1189 : | push(@$descriptions,$score); | ||
1190 : | |||
1191 : | my $start_stop; | ||
1192 : | my $start_stop_value = $self->start."_".$self->stop; | ||
1193 : | $start_stop = {"title" => "start-stop", | ||
1194 : | "value" => $start_stop_value}; | ||
1195 : | push(@$descriptions,$start_stop); | ||
1196 : | |||
1197 : | my $source; | ||
1198 : | $source = {"title" => "source", | ||
1199 : | "value" => $pdb_source}; | ||
1200 : | push(@$descriptions,$source); | ||
1201 : | |||
1202 : | my $ligand; | ||
1203 : | $ligand = {"title" => "pdb ligand", | ||
1204 : | "value" => $pdb_ligand}; | ||
1205 : | push(@$descriptions,$ligand); | ||
1206 : | |||
1207 : | my $link; | ||
1208 : | my $link_url ="http://www.rcsb.org/pdb/explore/explore.do?structureId=".$acc; | ||
1209 : | |||
1210 : | $link = {"link_title" => $acc, | ||
1211 : | "link" => $link_url}; | ||
1212 : | push(@$links_list,$link); | ||
1213 : | |||
1214 : | my $pdb_element_hash = { | ||
1215 : | "title" => "PDB homology", | ||
1216 : | "start" => $self->start, | ||
1217 : | "end" => $self->stop, | ||
1218 : | "color"=> '6', | ||
1219 : | "zlayer" => '3', | ||
1220 : | "links_list" => $links_list, | ||
1221 : | "description" => $descriptions}; | ||
1222 : | |||
1223 : | push(@$line_data,$pdb_element_hash); | ||
1224 : | $gd->add_line($line_data, $line_config); | ||
1225 : | |||
1226 : | return $gd; | ||
1227 : | } | ||
1228 : | |||
1229 : | 1; | ||
1230 : | arodri7 | 1.11 | |
1231 : | arodri7 | 1.9 | ############################################################ |
1232 : | ############################################################ | ||
1233 : | package Observation::Identical; | ||
1234 : | |||
1235 : | use base qw(Observation); | ||
1236 : | |||
1237 : | sub new { | ||
1238 : | |||
1239 : | my ($class,$dataset) = @_; | ||
1240 : | my $self = $class->SUPER::new($dataset); | ||
1241 : | $self->{id} = $dataset->{'id'}; | ||
1242 : | $self->{organism} = $dataset->{'organism'}; | ||
1243 : | $self->{function} = $dataset->{'function'}; | ||
1244 : | $self->{database} = $dataset->{'database'}; | ||
1245 : | |||
1246 : | bless($self,$class); | ||
1247 : | return $self; | ||
1248 : | } | ||
1249 : | |||
1250 : | =head3 display() | ||
1251 : | arodri7 | 1.6 | |
1252 : | If available use the function specified here to display the "raw" observation. | ||
1253 : | This code will display a table for the identical protein | ||
1254 : | |||
1255 : | |||
1256 : | arodri7 | 1.9 | B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi |
1257 : | dence. | ||
1258 : | arodri7 | 1.6 | |
1259 : | =cut | ||
1260 : | |||
1261 : | arodri7 | 1.9 | sub display{ |
1262 : | my ($self, $cgi, $dataset) = @_; | ||
1263 : | arodri7 | 1.6 | |
1264 : | my $all_domains = []; | ||
1265 : | my $count_identical = 0; | ||
1266 : | arodri7 | 1.9 | my $content; |
1267 : | foreach my $thing (@$dataset) { | ||
1268 : | arodri7 | 1.6 | next if ($thing->class ne "IDENTICAL"); |
1269 : | arodri7 | 1.9 | my $single_domain = []; |
1270 : | push(@$single_domain,$thing->database); | ||
1271 : | my $id = $thing->id; | ||
1272 : | $count_identical++; | ||
1273 : | push(@$single_domain,&HTML::set_prot_links($cgi,$id)); | ||
1274 : | push(@$single_domain,$thing->organism); | ||
1275 : | #push(@$single_domain,$thing->type); | ||
1276 : | push(@$single_domain,$thing->function); | ||
1277 : | push(@$all_domains,$single_domain); | ||
1278 : | arodri7 | 1.6 | } |
1279 : | |||
1280 : | if ($count_identical >0){ | ||
1281 : | arodri7 | 1.9 | $content = $all_domains; |
1282 : | arodri7 | 1.6 | } |
1283 : | else{ | ||
1284 : | arodri7 | 1.9 | $content = "<p>This PEG does not have any essentially identical proteins</p>"; |
1285 : | arodri7 | 1.6 | } |
1286 : | return ($content); | ||
1287 : | } | ||
1288 : | mkubal | 1.7 | |
1289 : | arodri7 | 1.9 | 1; |
1290 : | |||
1291 : | |||
1292 : | ######################################### | ||
1293 : | ######################################### | ||
1294 : | package Observation::FC; | ||
1295 : | 1; | ||
1296 : | |||
1297 : | use base qw(Observation); | ||
1298 : | |||
1299 : | sub new { | ||
1300 : | |||
1301 : | my ($class,$dataset) = @_; | ||
1302 : | my $self = $class->SUPER::new($dataset); | ||
1303 : | $self->{score} = $dataset->{'score'}; | ||
1304 : | $self->{id} = $dataset->{'id'}; | ||
1305 : | $self->{function} = $dataset->{'function'}; | ||
1306 : | |||
1307 : | bless($self,$class); | ||
1308 : | return $self; | ||
1309 : | } | ||
1310 : | |||
1311 : | =head3 display() | ||
1312 : | |||
1313 : | If available use the function specified here to display the "raw" observation. | ||
1314 : | This code will display a table for the identical protein | ||
1315 : | |||
1316 : | |||
1317 : | B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi | ||
1318 : | dence. | ||
1319 : | |||
1320 : | =cut | ||
1321 : | |||
1322 : | sub display { | ||
1323 : | my ($self,$cgi,$dataset, $fid) = @_; | ||
1324 : | |||
1325 : | my $functional_data = []; | ||
1326 : | my $count = 0; | ||
1327 : | my $content; | ||
1328 : | |||
1329 : | foreach my $thing (@$dataset) { | ||
1330 : | my $single_domain = []; | ||
1331 : | next if ($thing->class ne "PCH"); | ||
1332 : | $count++; | ||
1333 : | |||
1334 : | # construct the score link | ||
1335 : | my $score = $thing->score; | ||
1336 : | my $toid = $thing->id; | ||
1337 : | my $link = $cgi->url(-relative => 1) . "?user=master&request=show_coupling_evidence&prot=$fid&to=$toid&SPROUT="; | ||
1338 : | my $sc_link = "<a href=$link>$score</a>"; | ||
1339 : | |||
1340 : | push(@$single_domain,$sc_link); | ||
1341 : | push(@$single_domain,$thing->id); | ||
1342 : | push(@$single_domain,$thing->function); | ||
1343 : | push(@$functional_data,$single_domain); | ||
1344 : | } | ||
1345 : | |||
1346 : | if ($count >0){ | ||
1347 : | $content = $functional_data; | ||
1348 : | } | ||
1349 : | else | ||
1350 : | { | ||
1351 : | $content = "<p>This PEG does not have any functional coupling</p>"; | ||
1352 : | } | ||
1353 : | return ($content); | ||
1354 : | } | ||
1355 : | |||
1356 : | |||
1357 : | ######################################### | ||
1358 : | ######################################### | ||
1359 : | mkubal | 1.7 | package Observation::Domain; |
1360 : | |||
1361 : | use base qw(Observation); | ||
1362 : | |||
1363 : | sub new { | ||
1364 : | |||
1365 : | my ($class,$dataset) = @_; | ||
1366 : | my $self = $class->SUPER::new($dataset); | ||
1367 : | $self->{evalue} = $dataset->{'evalue'}; | ||
1368 : | $self->{acc} = $dataset->{'acc'}; | ||
1369 : | $self->{start} = $dataset->{'start'}; | ||
1370 : | $self->{stop} = $dataset->{'stop'}; | ||
1371 : | |||
1372 : | bless($self,$class); | ||
1373 : | return $self; | ||
1374 : | } | ||
1375 : | |||
1376 : | sub display { | ||
1377 : | my ($thing,$gd) = @_; | ||
1378 : | my $lines = []; | ||
1379 : | my $line_config = { 'title' => $thing->acc, | ||
1380 : | 'short_title' => $thing->type, | ||
1381 : | 'basepair_offset' => '1' }; | ||
1382 : | my $color = "4"; | ||
1383 : | |||
1384 : | my $line_data = []; | ||
1385 : | my $links_list = []; | ||
1386 : | my $descriptions = []; | ||
1387 : | mkubal | 1.19 | |
1388 : | my $db_and_id = $thing->acc; | ||
1389 : | my ($db,$id) = split("::",$db_and_id); | ||
1390 : | |||
1391 : | my $dbmaster = DBMaster->new(-database =>'Ontology'); | ||
1392 : | mkubal | 1.7 | |
1393 : | mkubal | 1.19 | my ($name_title,$name_value,$description_title,$description_value); |
1394 : | if($db eq "CDD"){ | ||
1395 : | my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } ); | ||
1396 : | if(!scalar(@$cdd_objs)){ | ||
1397 : | $name_title = "name"; | ||
1398 : | $name_value = "not available"; | ||
1399 : | $description_title = "description"; | ||
1400 : | $description_value = "not available"; | ||
1401 : | } | ||
1402 : | else{ | ||
1403 : | my $cdd_obj = $cdd_objs->[0]; | ||
1404 : | $name_title = "name"; | ||
1405 : | $name_value = $cdd_obj->term; | ||
1406 : | $description_title = "description"; | ||
1407 : | $description_value = $cdd_obj->description; | ||
1408 : | } | ||
1409 : | } | ||
1410 : | mkubal | 1.7 | |
1411 : | mkubal | 1.19 | my $name; |
1412 : | $name = {"title" => $name_title, | ||
1413 : | "value" => $name_value}; | ||
1414 : | push(@$descriptions,$name); | ||
1415 : | |||
1416 : | my $description; | ||
1417 : | $description = {"title" => $description_title, | ||
1418 : | "value" => $description_value}; | ||
1419 : | push(@$descriptions,$description); | ||
1420 : | mkubal | 1.7 | |
1421 : | my $score; | ||
1422 : | $score = {"title" => "score", | ||
1423 : | "value" => $thing->evalue}; | ||
1424 : | push(@$descriptions,$score); | ||
1425 : | |||
1426 : | my $link_id; | ||
1427 : | mkubal | 1.12 | if ($thing->acc =~/\w+::(\d+)/){ |
1428 : | mkubal | 1.7 | $link_id = $1; |
1429 : | } | ||
1430 : | |||
1431 : | my $link; | ||
1432 : | mkubal | 1.12 | my $link_url; |
1433 : | if ($thing->class eq "CDD"){$link_url = "http://0-www.ncbi.nlm.nih.gov.library.vu.edu.au:80/Structure/cdd/cddsrv.cgi?uid=$link_id"} | ||
1434 : | elsif($thing->class eq "PFAM"){$link_url = "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?$link_id"} | ||
1435 : | else{$link_url = "NO_URL"} | ||
1436 : | |||
1437 : | mkubal | 1.7 | $link = {"link_title" => $thing->acc, |
1438 : | mkubal | 1.12 | "link" => $link_url}; |
1439 : | mkubal | 1.7 | push(@$links_list,$link); |
1440 : | |||
1441 : | my $element_hash = { | ||
1442 : | "title" => $thing->type, | ||
1443 : | "start" => $thing->start, | ||
1444 : | "end" => $thing->stop, | ||
1445 : | "color"=> $color, | ||
1446 : | "zlayer" => '2', | ||
1447 : | "links_list" => $links_list, | ||
1448 : | "description" => $descriptions}; | ||
1449 : | |||
1450 : | push(@$line_data,$element_hash); | ||
1451 : | $gd->add_line($line_data, $line_config); | ||
1452 : | |||
1453 : | return $gd; | ||
1454 : | |||
1455 : | } | ||
1456 : | |||
1457 : | arodri7 | 1.10 | ######################################### |
1458 : | ######################################### | ||
1459 : | mkubal | 1.12 | package Observation::Location; |
1460 : | |||
1461 : | use base qw(Observation); | ||
1462 : | |||
1463 : | sub new { | ||
1464 : | |||
1465 : | my ($class,$dataset) = @_; | ||
1466 : | my $self = $class->SUPER::new($dataset); | ||
1467 : | $self->{cleavage_prob} = $dataset->{'cleavage_prob'}; | ||
1468 : | $self->{cleavage_loc} = $dataset->{'cleavage_loc'}; | ||
1469 : | $self->{signal_peptide_score} = $dataset->{'signal_peptide_score'}; | ||
1470 : | $self->{cello_location} = $dataset->{'cello_location'}; | ||
1471 : | $self->{cello_score} = $dataset->{'cello_score'}; | ||
1472 : | $self->{tmpred_score} = $dataset->{'tmpred_score'}; | ||
1473 : | $self->{tmpred_locations} = $dataset->{'tmpred_locations'}; | ||
1474 : | |||
1475 : | bless($self,$class); | ||
1476 : | return $self; | ||
1477 : | } | ||
1478 : | |||
1479 : | sub display { | ||
1480 : | my ($thing,$gd,$fid) = @_; | ||
1481 : | |||
1482 : | my $fig= new FIG; | ||
1483 : | my $length = length($fig->get_translation($fid)); | ||
1484 : | |||
1485 : | my $cleavage_prob; | ||
1486 : | if($thing->cleavage_prob){$cleavage_prob = $thing->cleavage_prob;} | ||
1487 : | my ($cleavage_loc_begin,$cleavage_loc_end) = split("-",$thing->cleavage_loc); | ||
1488 : | my $signal_peptide_score = $thing->signal_peptide_score; | ||
1489 : | my $cello_location = $thing->cello_location; | ||
1490 : | my $cello_score = $thing->cello_score; | ||
1491 : | my $tmpred_score = $thing->tmpred_score; | ||
1492 : | my @tmpred_locations = split(",",$thing->tmpred_locations); | ||
1493 : | |||
1494 : | my $lines = []; | ||
1495 : | my $line_config = { 'title' => 'Localization Evidence', | ||
1496 : | 'short_title' => 'Local', | ||
1497 : | 'basepair_offset' => '1' }; | ||
1498 : | |||
1499 : | #color is | ||
1500 : | my $color = "5"; | ||
1501 : | |||
1502 : | my $line_data = []; | ||
1503 : | |||
1504 : | if($cello_location){ | ||
1505 : | my $cello_descriptions = []; | ||
1506 : | my $description_cello_location = {"title" => 'Best Cello Location', | ||
1507 : | "value" => $cello_location}; | ||
1508 : | |||
1509 : | push(@$cello_descriptions,$description_cello_location); | ||
1510 : | |||
1511 : | my $description_cello_score = {"title" => 'Cello Score', | ||
1512 : | "value" => $cello_score}; | ||
1513 : | |||
1514 : | push(@$cello_descriptions,$description_cello_score); | ||
1515 : | |||
1516 : | my $element_hash = { | ||
1517 : | "title" => "CELLO", | ||
1518 : | "start" => "1", | ||
1519 : | "end" => $length + 1, | ||
1520 : | "color"=> $color, | ||
1521 : | "type" => 'box', | ||
1522 : | "zlayer" => '2', | ||
1523 : | "description" => $cello_descriptions}; | ||
1524 : | |||
1525 : | push(@$line_data,$element_hash); | ||
1526 : | } | ||
1527 : | |||
1528 : | my $color = "6"; | ||
1529 : | #if(0){ | ||
1530 : | if($tmpred_score){ | ||
1531 : | foreach my $tmpred (@tmpred_locations){ | ||
1532 : | my $descriptions = []; | ||
1533 : | my ($begin,$end) =split("-",$tmpred); | ||
1534 : | my $description_tmpred_score = {"title" => 'TMPRED score', | ||
1535 : | "value" => $tmpred_score}; | ||
1536 : | |||
1537 : | push(@$descriptions,$description_tmpred_score); | ||
1538 : | |||
1539 : | my $element_hash = { | ||
1540 : | "title" => "transmembrane location", | ||
1541 : | "start" => $begin + 1, | ||
1542 : | "end" => $end + 1, | ||
1543 : | "color"=> $color, | ||
1544 : | "zlayer" => '5', | ||
1545 : | "type" => 'smallbox', | ||
1546 : | "description" => $descriptions}; | ||
1547 : | |||
1548 : | push(@$line_data,$element_hash); | ||
1549 : | } | ||
1550 : | } | ||
1551 : | |||
1552 : | my $color = "1"; | ||
1553 : | if($signal_peptide_score){ | ||
1554 : | my $descriptions = []; | ||
1555 : | my $description_signal_peptide_score = {"title" => 'signal peptide score', | ||
1556 : | "value" => $signal_peptide_score}; | ||
1557 : | |||
1558 : | push(@$descriptions,$description_signal_peptide_score); | ||
1559 : | |||
1560 : | my $description_cleavage_prob = {"title" => 'cleavage site probability', | ||
1561 : | "value" => $cleavage_prob}; | ||
1562 : | |||
1563 : | push(@$descriptions,$description_cleavage_prob); | ||
1564 : | |||
1565 : | my $element_hash = { | ||
1566 : | "title" => "SignalP", | ||
1567 : | "start" => $cleavage_loc_begin - 2, | ||
1568 : | "end" => $cleavage_loc_end + 3, | ||
1569 : | "type" => 'bigbox', | ||
1570 : | "color"=> $color, | ||
1571 : | "zlayer" => '10', | ||
1572 : | "description" => $descriptions}; | ||
1573 : | |||
1574 : | push(@$line_data,$element_hash); | ||
1575 : | } | ||
1576 : | |||
1577 : | $gd->add_line($line_data, $line_config); | ||
1578 : | |||
1579 : | return ($gd); | ||
1580 : | |||
1581 : | } | ||
1582 : | |||
1583 : | sub cleavage_loc { | ||
1584 : | my ($self) = @_; | ||
1585 : | |||
1586 : | return $self->{cleavage_loc}; | ||
1587 : | } | ||
1588 : | |||
1589 : | sub cleavage_prob { | ||
1590 : | my ($self) = @_; | ||
1591 : | |||
1592 : | return $self->{cleavage_prob}; | ||
1593 : | } | ||
1594 : | |||
1595 : | sub signal_peptide_score { | ||
1596 : | my ($self) = @_; | ||
1597 : | |||
1598 : | return $self->{signal_peptide_score}; | ||
1599 : | } | ||
1600 : | |||
1601 : | sub tmpred_score { | ||
1602 : | my ($self) = @_; | ||
1603 : | |||
1604 : | return $self->{tmpred_score}; | ||
1605 : | } | ||
1606 : | |||
1607 : | sub tmpred_locations { | ||
1608 : | my ($self) = @_; | ||
1609 : | |||
1610 : | return $self->{tmpred_locations}; | ||
1611 : | } | ||
1612 : | |||
1613 : | sub cello_location { | ||
1614 : | my ($self) = @_; | ||
1615 : | |||
1616 : | return $self->{cello_location}; | ||
1617 : | } | ||
1618 : | |||
1619 : | sub cello_score { | ||
1620 : | my ($self) = @_; | ||
1621 : | |||
1622 : | return $self->{cello_score}; | ||
1623 : | } | ||
1624 : | |||
1625 : | |||
1626 : | ######################################### | ||
1627 : | ######################################### | ||
1628 : | arodri7 | 1.10 | package Observation::Sims; |
1629 : | |||
1630 : | use base qw(Observation); | ||
1631 : | |||
1632 : | sub new { | ||
1633 : | |||
1634 : | my ($class,$dataset) = @_; | ||
1635 : | my $self = $class->SUPER::new($dataset); | ||
1636 : | arodri7 | 1.11 | $self->{identity} = $dataset->{'identity'}; |
1637 : | arodri7 | 1.10 | $self->{acc} = $dataset->{'acc'}; |
1638 : | $self->{evalue} = $dataset->{'evalue'}; | ||
1639 : | arodri7 | 1.11 | $self->{qstart} = $dataset->{'qstart'}; |
1640 : | $self->{qstop} = $dataset->{'qstop'}; | ||
1641 : | $self->{hstart} = $dataset->{'hstart'}; | ||
1642 : | $self->{hstop} = $dataset->{'hstop'}; | ||
1643 : | $self->{database} = $dataset->{'database'}; | ||
1644 : | $self->{organism} = $dataset->{'organism'}; | ||
1645 : | $self->{function} = $dataset->{'function'}; | ||
1646 : | $self->{qlength} = $dataset->{'qlength'}; | ||
1647 : | $self->{hlength} = $dataset->{'hlength'}; | ||
1648 : | arodri7 | 1.10 | |
1649 : | bless($self,$class); | ||
1650 : | return $self; | ||
1651 : | } | ||
1652 : | |||
1653 : | =head3 display() | ||
1654 : | |||
1655 : | If available use the function specified here to display the "raw" observation. | ||
1656 : | This code will display a table for the similarities protein | ||
1657 : | |||
1658 : | B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evidence. | ||
1659 : | |||
1660 : | =cut | ||
1661 : | |||
1662 : | sub display { | ||
1663 : | my ($self,$cgi,$dataset) = @_; | ||
1664 : | |||
1665 : | my $data = []; | ||
1666 : | my $count = 0; | ||
1667 : | my $content; | ||
1668 : | arodri7 | 1.11 | my $fig = new FIG; |
1669 : | arodri7 | 1.10 | |
1670 : | foreach my $thing (@$dataset) { | ||
1671 : | my $single_domain = []; | ||
1672 : | next if ($thing->class ne "SIM"); | ||
1673 : | $count++; | ||
1674 : | |||
1675 : | arodri7 | 1.11 | my $id = $thing->acc; |
1676 : | |||
1677 : | # add the subsystem information | ||
1678 : | my @in_sub = $fig->peg_to_subsystems($id); | ||
1679 : | my $in_sub; | ||
1680 : | |||
1681 : | if (@in_sub > 0) { | ||
1682 : | $in_sub = @in_sub; | ||
1683 : | |||
1684 : | # RAE: add a javascript popup with all the subsystems | ||
1685 : | my $ss_list=join "<br>", map { my $g = $_; $g =~ s/\_/ /g; $_ = $g } sort {$a cmp $b} @in_sub; | ||
1686 : | $in_sub = $cgi->a( {id=>"subsystems", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Subsystems', '$ss_list', ''); this.tooltip.addHandler(); return false;"}, $in_sub); | ||
1687 : | } else { | ||
1688 : | $in_sub = " "; | ||
1689 : | } | ||
1690 : | |||
1691 : | # add evidence code with tool tip | ||
1692 : | my $ev_codes=" "; | ||
1693 : | my @ev_codes = ""; | ||
1694 : | if ($id =~ /^fig\|\d+\.\d+\.peg\.\d+$/) { | ||
1695 : | my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($id); | ||
1696 : | @ev_codes = (); | ||
1697 : | foreach my $code (@codes) { | ||
1698 : | my $pretty_code = $code->[2]; | ||
1699 : | if ($pretty_code =~ /;/) { | ||
1700 : | my ($cd, $ss) = split(";", $code->[2]); | ||
1701 : | $ss =~ s/_/ /g; | ||
1702 : | $pretty_code = $cd;# . " in " . $ss; | ||
1703 : | } | ||
1704 : | push(@ev_codes, $pretty_code); | ||
1705 : | } | ||
1706 : | } | ||
1707 : | |||
1708 : | if (scalar(@ev_codes) && $ev_codes[0]) { | ||
1709 : | my $ev_code_help=join("<br />", map {&HTML::evidence_codes_explain($_)} @ev_codes); | ||
1710 : | $ev_codes = $cgi->a( | ||
1711 : | { | ||
1712 : | id=>"evidence_codes", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Evidence Codes', '$ev_code_help', ''); this.tooltip.addHandler(); return false;"}, join("<br />", @ev_codes)); | ||
1713 : | } | ||
1714 : | |||
1715 : | # add the aliases | ||
1716 : | my $aliases = undef; | ||
1717 : | $aliases = &html_enc( join( ", ", $fig->feature_aliases($id) ) ); | ||
1718 : | $aliases = &HTML::set_prot_links( $cgi, $aliases ); | ||
1719 : | $aliases ||= " "; | ||
1720 : | |||
1721 : | my $iden = $thing->identity; | ||
1722 : | my $ln1 = $thing->qlength; | ||
1723 : | my $ln2 = $thing->hlength; | ||
1724 : | my $b1 = $thing->qstart; | ||
1725 : | my $e1 = $thing->qstop; | ||
1726 : | my $b2 = $thing->hstart; | ||
1727 : | my $e2 = $thing->hstop; | ||
1728 : | my $d1 = abs($e1 - $b1) + 1; | ||
1729 : | my $d2 = abs($e2 - $b2) + 1; | ||
1730 : | my $reg1 = "$b1-$e1 (<b>$d1/$ln1</b>)"; | ||
1731 : | my $reg2 = "$b2-$e2 (<b>$d2/$ln2</b>)"; | ||
1732 : | |||
1733 : | |||
1734 : | push(@$single_domain,$thing->database); | ||
1735 : | arodri7 | 1.10 | push(@$single_domain,&HTML::set_prot_links($cgi,$thing->acc)); |
1736 : | push(@$single_domain,$thing->evalue); | ||
1737 : | arodri7 | 1.11 | push(@$single_domain,"$iden\%"); |
1738 : | push(@$single_domain,$reg1); | ||
1739 : | push(@$single_domain,$reg2); | ||
1740 : | push(@$single_domain,$in_sub); | ||
1741 : | push(@$single_domain,$ev_codes); | ||
1742 : | push(@$single_domain,$thing->organism); | ||
1743 : | push(@$single_domain,$thing->function); | ||
1744 : | push(@$single_domain,$aliases); | ||
1745 : | arodri7 | 1.10 | push(@$data,$single_domain); |
1746 : | } | ||
1747 : | |||
1748 : | if ($count >0){ | ||
1749 : | $content = $data; | ||
1750 : | } | ||
1751 : | else | ||
1752 : | { | ||
1753 : | $content = "<p>This PEG does not have any similarities</p>"; | ||
1754 : | } | ||
1755 : | return ($content); | ||
1756 : | } | ||
1757 : | arodri7 | 1.11 | |
1758 : | sub html_enc { $_ = $_[0]; s/\&/&/g; s/\>/>/g; s/\</</g; $_ } | ||
1759 : | mkubal | 1.12 | |
1760 : | arodri7 | 1.13 | |
1761 : | |||
1762 : | ############################ | ||
1763 : | package Observation::Cluster; | ||
1764 : | |||
1765 : | use base qw(Observation); | ||
1766 : | |||
1767 : | sub new { | ||
1768 : | |||
1769 : | my ($class,$dataset) = @_; | ||
1770 : | my $self = $class->SUPER::new($dataset); | ||
1771 : | |||
1772 : | bless($self,$class); | ||
1773 : | return $self; | ||
1774 : | } | ||
1775 : | |||
1776 : | sub display { | ||
1777 : | arodri7 | 1.23 | my ($self,$gd, $fid, $gd_window_size, $compare_or_coupling) = @_; |
1778 : | mkubal | 1.17 | |
1779 : | arodri7 | 1.13 | my $fig = new FIG; |
1780 : | mkubal | 1.14 | my $all_regions = []; |
1781 : | arodri7 | 1.13 | |
1782 : | #get the organism genome | ||
1783 : | mkubal | 1.14 | my $target_genome = $fig->genome_of($fid); |
1784 : | arodri7 | 1.13 | |
1785 : | # get location of the gene | ||
1786 : | my $data = $fig->feature_location($fid); | ||
1787 : | my ($contig, $beg, $end); | ||
1788 : | arodri7 | 1.22 | my %reverse_flag; |
1789 : | arodri7 | 1.13 | |
1790 : | if ($data =~ /(.*)_(\d+)_(\d+)$/){ | ||
1791 : | $contig = $1; | ||
1792 : | $beg = $2; | ||
1793 : | $end = $3; | ||
1794 : | } | ||
1795 : | |||
1796 : | arodri7 | 1.22 | my $offset; |
1797 : | arodri7 | 1.13 | my ($region_start, $region_end); |
1798 : | if ($beg < $end) | ||
1799 : | { | ||
1800 : | $region_start = $beg - 4000; | ||
1801 : | $region_end = $end+4000; | ||
1802 : | arodri7 | 1.22 | $offset = ($2+(($3-$2)/2))-($gd_window_size/2); |
1803 : | arodri7 | 1.13 | } |
1804 : | else | ||
1805 : | { | ||
1806 : | arodri7 | 1.21 | $region_start = $end-4000; |
1807 : | $region_end = $beg+4000; | ||
1808 : | arodri7 | 1.22 | $offset = ($3+(($2-$3)/2))-($gd_window_size/2); |
1809 : | $reverse_flag{$target_genome} = 1; | ||
1810 : | arodri7 | 1.21 | } |
1811 : | arodri7 | 1.13 | |
1812 : | # call genes in region | ||
1813 : | arodri7 | 1.16 | my ($target_gene_features, $reg_beg, $reg_end) = $fig->genes_in_region($target_genome, $contig, $region_start, $region_end); |
1814 : | mkubal | 1.14 | push(@$all_regions,$target_gene_features); |
1815 : | arodri7 | 1.16 | my (@start_array_region); |
1816 : | arodri7 | 1.22 | push (@start_array_region, $offset); |
1817 : | mkubal | 1.14 | |
1818 : | my %all_genes; | ||
1819 : | my %all_genomes; | ||
1820 : | mkubal | 1.17 | foreach my $feature (@$target_gene_features){ $all_genes{$feature} = 1;} |
1821 : | arodri7 | 1.16 | |
1822 : | arodri7 | 1.23 | if ($compare_or_coupling == 0) |
1823 : | arodri7 | 1.21 | { |
1824 : | my @coup = grep { $_->[1]} $fig->coupling_and_evidence($fid,5000,1e-10,4,1); | ||
1825 : | |||
1826 : | my $coup_count = 0; | ||
1827 : | |||
1828 : | foreach my $pair (@{$coup[0]->[2]}) { | ||
1829 : | # last if ($coup_count > 10); | ||
1830 : | my ($peg1,$peg2) = @$pair; | ||
1831 : | arodri7 | 1.22 | |
1832 : | my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); | ||
1833 : | $pair_genome = $fig->genome_of($peg1); | ||
1834 : | arodri7 | 1.21 | |
1835 : | my $location = $fig->feature_location($peg1); | ||
1836 : | if($location =~/(.*)_(\d+)_(\d+)$/){ | ||
1837 : | $pair_contig = $1; | ||
1838 : | $pair_beg = $2; | ||
1839 : | $pair_end = $3; | ||
1840 : | if ($pair_beg < $pair_end) | ||
1841 : | { | ||
1842 : | $pair_region_start = $pair_beg - 4000; | ||
1843 : | $pair_region_stop = $pair_end+4000; | ||
1844 : | arodri7 | 1.22 | $offset = ($2+(($3-$2)/2))-($gd_window_size/2); |
1845 : | arodri7 | 1.21 | } |
1846 : | else | ||
1847 : | { | ||
1848 : | $pair_region_start = $pair_end-4000; | ||
1849 : | $pair_region_stop = $pair_beg+4000; | ||
1850 : | arodri7 | 1.22 | $offset = ($3+(($2-$3)/2))-($gd_window_size/2); |
1851 : | $reverse_flag{$pair_genome} = 1; | ||
1852 : | arodri7 | 1.21 | } |
1853 : | |||
1854 : | arodri7 | 1.22 | push (@start_array_region, $offset); |
1855 : | arodri7 | 1.21 | |
1856 : | $all_genomes{$pair_genome} = 1; | ||
1857 : | my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); | ||
1858 : | push(@$all_regions,$pair_features); | ||
1859 : | foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = 1;} | ||
1860 : | } | ||
1861 : | $coup_count++; | ||
1862 : | } | ||
1863 : | } | ||
1864 : | arodri7 | 1.16 | |
1865 : | arodri7 | 1.23 | elsif ($compare_or_coupling == 1) |
1866 : | arodri7 | 1.21 | { |
1867 : | # make a hash of genomes that are phylogenetically close | ||
1868 : | #my $close_threshold = ".26"; | ||
1869 : | #my @genomes = $fig->genomes('complete'); | ||
1870 : | #my %close_genomes = (); | ||
1871 : | #foreach my $compared_genome (@genomes) | ||
1872 : | #{ | ||
1873 : | # my $dist = $fig->crude_estimate_of_distance($target_genome,$compared_genome); | ||
1874 : | # #$close_genomes{$compared_genome} = $dist; | ||
1875 : | # if ($dist <= $close_threshold) | ||
1876 : | # { | ||
1877 : | # $all_genomes{$compared_genome} = 1; | ||
1878 : | # } | ||
1879 : | #} | ||
1880 : | $all_genomes{"216592.1"} = 1; | ||
1881 : | $all_genomes{"79967.1"} = 1; | ||
1882 : | $all_genomes{"199310.1"} = 1; | ||
1883 : | $all_genomes{"216593.1"} = 1; | ||
1884 : | $all_genomes{"155864.1"} = 1; | ||
1885 : | $all_genomes{"83334.1"} = 1; | ||
1886 : | $all_genomes{"316407.3"} = 1; | ||
1887 : | |||
1888 : | foreach my $comp_genome (keys %all_genomes){ | ||
1889 : | my $return = $fig->bbh_list($comp_genome,[$fid]); | ||
1890 : | my $feature_list = $return->{$fid}; | ||
1891 : | foreach my $peg1 (@$feature_list){ | ||
1892 : | my $location = $fig->feature_location($peg1); | ||
1893 : | my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); | ||
1894 : | arodri7 | 1.22 | $pair_genome = $fig->genome_of($peg1); |
1895 : | |||
1896 : | arodri7 | 1.21 | if($location =~/(.*)_(\d+)_(\d+)$/){ |
1897 : | $pair_contig = $1; | ||
1898 : | $pair_beg = $2; | ||
1899 : | $pair_end = $3; | ||
1900 : | if ($pair_beg < $pair_end) | ||
1901 : | { | ||
1902 : | $pair_region_start = $pair_beg - 4000; | ||
1903 : | $pair_region_stop = $pair_end + 4000; | ||
1904 : | arodri7 | 1.22 | $offset = ($2+(($3-$2)/2))-($gd_window_size/2); |
1905 : | arodri7 | 1.21 | } |
1906 : | else | ||
1907 : | { | ||
1908 : | $pair_region_start = $pair_end-4000; | ||
1909 : | $pair_region_stop = $pair_beg+4000; | ||
1910 : | arodri7 | 1.22 | $offset = ($3+(($2-$3)/2))-($gd_window_size/2); |
1911 : | $reverse_flag{$pair_genome} = 1; | ||
1912 : | arodri7 | 1.21 | } |
1913 : | |||
1914 : | arodri7 | 1.22 | push (@start_array_region, $offset); |
1915 : | arodri7 | 1.21 | $all_genomes{$pair_genome} = 1; |
1916 : | my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); | ||
1917 : | push(@$all_regions,$pair_features); | ||
1918 : | foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = 1;} | ||
1919 : | } | ||
1920 : | mkubal | 1.14 | } |
1921 : | arodri7 | 1.16 | } |
1922 : | mkubal | 1.14 | } |
1923 : | |||
1924 : | arodri7 | 1.21 | # get the PCH to each of the genes |
1925 : | my $pch_sets = []; | ||
1926 : | my %pch_already; | ||
1927 : | foreach my $gene_peg (keys %all_genes) | ||
1928 : | { | ||
1929 : | if ($pch_already{$gene_peg}){next;}; | ||
1930 : | my $gene_set = [$gene_peg]; | ||
1931 : | foreach my $pch_peg ($fig->in_pch_pin_with($gene_peg)) { | ||
1932 : | $pch_peg =~ s/,.*$//; | ||
1933 : | my $pch_genome = $fig->genome_of($pch_peg); | ||
1934 : | if ( ($gene_peg ne $pch_peg) && ($all_genomes{$pch_genome})) { | ||
1935 : | push(@$gene_set,$pch_peg); | ||
1936 : | $pch_already{$pch_peg}=1; | ||
1937 : | mkubal | 1.14 | } |
1938 : | arodri7 | 1.21 | $pch_already{$gene_peg}=1; |
1939 : | mkubal | 1.14 | } |
1940 : | arodri7 | 1.21 | push(@$pch_sets,$gene_set); |
1941 : | mkubal | 1.14 | } |
1942 : | arodri7 | 1.21 | |
1943 : | #create a rank of the pch's | ||
1944 : | my %pch_set_rank; | ||
1945 : | mkubal | 1.14 | my $order = 0; |
1946 : | arodri7 | 1.21 | foreach my $set (@$pch_sets){ |
1947 : | mkubal | 1.14 | my $count = scalar(@$set); |
1948 : | arodri7 | 1.21 | $pch_set_rank{$order} = $count; |
1949 : | mkubal | 1.14 | $order++; |
1950 : | } | ||
1951 : | arodri7 | 1.21 | |
1952 : | mkubal | 1.14 | my %peg_rank; |
1953 : | my $counter = 1; | ||
1954 : | arodri7 | 1.21 | foreach my $pch_order (sort {$pch_set_rank{$b} <=> $pch_set_rank{$a}} keys %pch_set_rank){ |
1955 : | my $good_set = @$pch_sets[$pch_order]; | ||
1956 : | arodri7 | 1.18 | my $flag_set = 0; |
1957 : | if (scalar (@$good_set) > 1) | ||
1958 : | { | ||
1959 : | foreach my $peg (@$good_set){ | ||
1960 : | if ((!$peg_rank{$peg})){ | ||
1961 : | $peg_rank{$peg} = $counter; | ||
1962 : | $flag_set = 1; | ||
1963 : | } | ||
1964 : | } | ||
1965 : | $counter++ if ($flag_set == 1); | ||
1966 : | } | ||
1967 : | else | ||
1968 : | { | ||
1969 : | foreach my $peg (@$good_set){ | ||
1970 : | $peg_rank{$peg} = 100; | ||
1971 : | mkubal | 1.17 | } |
1972 : | mkubal | 1.14 | } |
1973 : | } | ||
1974 : | arodri7 | 1.21 | |
1975 : | |||
1976 : | # my $bbh_sets = []; | ||
1977 : | # my %already; | ||
1978 : | # foreach my $gene_key (keys(%all_genes)){ | ||
1979 : | # if($already{$gene_key}){next;} | ||
1980 : | # my $gene_set = [$gene_key]; | ||
1981 : | # | ||
1982 : | # my $gene_key_genome = $fig->genome_of($gene_key); | ||
1983 : | # | ||
1984 : | # foreach my $genome_key (keys(%all_genomes)){ | ||
1985 : | # #next if ($gene_key_genome eq $genome_key); | ||
1986 : | # my $return = $fig->bbh_list($genome_key,[$gene_key]); | ||
1987 : | # | ||
1988 : | # my $feature_list = $return->{$gene_key}; | ||
1989 : | # foreach my $fl (@$feature_list){ | ||
1990 : | # push(@$gene_set,$fl); | ||
1991 : | # } | ||
1992 : | # } | ||
1993 : | # $already{$gene_key} = 1; | ||
1994 : | # push(@$bbh_sets,$gene_set); | ||
1995 : | # } | ||
1996 : | # | ||
1997 : | # my %bbh_set_rank; | ||
1998 : | # my $order = 0; | ||
1999 : | # foreach my $set (@$bbh_sets){ | ||
2000 : | # my $count = scalar(@$set); | ||
2001 : | # $bbh_set_rank{$order} = $count; | ||
2002 : | # $order++; | ||
2003 : | # } | ||
2004 : | # | ||
2005 : | # my %peg_rank; | ||
2006 : | # my $counter = 1; | ||
2007 : | # foreach my $bbh_order (sort {$bbh_set_rank{$b} <=> $bbh_set_rank{$a}} keys %bbh_set_rank){ | ||
2008 : | # my $good_set = @$bbh_sets[$bbh_order]; | ||
2009 : | # my $flag_set = 0; | ||
2010 : | # if (scalar (@$good_set) > 1) | ||
2011 : | # { | ||
2012 : | # foreach my $peg (@$good_set){ | ||
2013 : | # if ((!$peg_rank{$peg})){ | ||
2014 : | # $peg_rank{$peg} = $counter; | ||
2015 : | # $flag_set = 1; | ||
2016 : | # } | ||
2017 : | # } | ||
2018 : | # $counter++ if ($flag_set == 1); | ||
2019 : | # } | ||
2020 : | # else | ||
2021 : | # { | ||
2022 : | # foreach my $peg (@$good_set){ | ||
2023 : | # $peg_rank{$peg} = 100; | ||
2024 : | # } | ||
2025 : | # } | ||
2026 : | # } | ||
2027 : | arodri7 | 1.18 | |
2028 : | mkubal | 1.14 | foreach my $region (@$all_regions){ |
2029 : | my $sample_peg = @$region[0]; | ||
2030 : | my $region_genome = $fig->genome_of($sample_peg); | ||
2031 : | my $region_gs = $fig->genus_species($region_genome); | ||
2032 : | arodri7 | 1.18 | my $abbrev_name = $fig->abbrev($region_gs); |
2033 : | arodri7 | 1.16 | my $line_config = { 'title' => $region_gs, |
2034 : | arodri7 | 1.18 | 'short_title' => $abbrev_name, |
2035 : | arodri7 | 1.16 | 'basepair_offset' => '0' |
2036 : | }; | ||
2037 : | |||
2038 : | arodri7 | 1.22 | my $offsetting = shift @start_array_region; |
2039 : | arodri7 | 1.16 | |
2040 : | mkubal | 1.14 | my $line_data = []; |
2041 : | arodri7 | 1.16 | foreach my $fid1 (@$region){ |
2042 : | mkubal | 1.14 | my $element_hash; |
2043 : | my $links_list = []; | ||
2044 : | my $descriptions = []; | ||
2045 : | |||
2046 : | arodri7 | 1.16 | my $color = $peg_rank{$fid1}; |
2047 : | arodri7 | 1.18 | |
2048 : | # get subsystem information | ||
2049 : | my $function = $fig->function_of($fid1); | ||
2050 : | my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$fid1; | ||
2051 : | |||
2052 : | my $link; | ||
2053 : | $link = {"link_title" => $fid1, | ||
2054 : | "link" => $url_link}; | ||
2055 : | push(@$links_list,$link); | ||
2056 : | |||
2057 : | my @subsystems = $fig->peg_to_subsystems($fid1); | ||
2058 : | foreach my $subsystem (@subsystems){ | ||
2059 : | my $link; | ||
2060 : | $link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem", | ||
2061 : | "link_title" => $subsystem}; | ||
2062 : | push(@$links_list,$link); | ||
2063 : | } | ||
2064 : | |||
2065 : | my $description_function; | ||
2066 : | $description_function = {"title" => "function", | ||
2067 : | "value" => $function}; | ||
2068 : | push(@$descriptions,$description_function); | ||
2069 : | |||
2070 : | my $description_ss; | ||
2071 : | my $ss_string = join (",", @subsystems); | ||
2072 : | $description_ss = {"title" => "subsystems", | ||
2073 : | "value" => $ss_string}; | ||
2074 : | push(@$descriptions,$description_ss); | ||
2075 : | |||
2076 : | arodri7 | 1.16 | |
2077 : | my $fid_location = $fig->feature_location($fid1); | ||
2078 : | mkubal | 1.14 | if($fid_location =~/(.*)_(\d+)_(\d+)$/){ |
2079 : | my($start,$stop); | ||
2080 : | arodri7 | 1.22 | $start = $2 - $offsetting; |
2081 : | $stop = $3 - $offsetting; | ||
2082 : | |||
2083 : | if (defined($reverse_flag{$region_genome})){ | ||
2084 : | $start = $gd_window_size - $start; | ||
2085 : | $stop = $gd_window_size - $stop; | ||
2086 : | } | ||
2087 : | |||
2088 : | mkubal | 1.14 | $element_hash = { |
2089 : | arodri7 | 1.16 | "title" => $fid1, |
2090 : | mkubal | 1.14 | "start" => $start, |
2091 : | "end" => $stop, | ||
2092 : | "type"=> 'arrow', | ||
2093 : | "color"=> $color, | ||
2094 : | arodri7 | 1.18 | "zlayer" => "2", |
2095 : | "links_list" => $links_list, | ||
2096 : | "description" => $descriptions | ||
2097 : | mkubal | 1.14 | }; |
2098 : | push(@$line_data,$element_hash); | ||
2099 : | } | ||
2100 : | } | ||
2101 : | $gd->add_line($line_data, $line_config); | ||
2102 : | } | ||
2103 : | return $gd; | ||
2104 : | } | ||
2105 : | |||
2106 : |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |