[Bio] / FigKernelPackages / Observation.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Observation.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : mkubal 1.1 package Observation;
2 :    
3 :     require Exporter;
4 :     @EXPORT_OK = qw(get_objects);
5 :    
6 :     use strict;
7 :     use warnings;
8 :    
9 :     1;
10 :    
11 : mkubal 1.3 # $Id: Observation.pm,v 1.2 2007/06/12 16:51:53 paczian Exp $
12 : mkubal 1.1
13 :     =head1 NAME
14 :    
15 :     Observation -- A presentation layer for observations in SEED.
16 :    
17 :     =head1 DESCRIPTION
18 :    
19 :     The SEED environment contains various sources of information for sequence features. The purpose of this library is to provide a
20 :     single interface to this data.
21 :    
22 :     The data can be used to display information for a given sequence feature (protein or other, but primarily information is computed for proteins).
23 :    
24 :     Example:
25 :    
26 :     use FIG;
27 :     use Observation;
28 :    
29 : paczian 1.2 my $fig = new FIG;
30 :     my $fid = "fig|83333.1.peg.3";
31 :    
32 :     my $observations = Observation::get_objects($fid);
33 :     foreach my $observation (@$observations) {
34 :     print "ID: " . $fid . "\n";
35 :     print "Start: " . $observation->start() . "\n";
36 :     ...
37 :     }
38 : mkubal 1.1
39 :     B<return an array of objects>
40 :    
41 :    
42 :     print "$Observation->acc\n" prints the Accession number if present for the Observation
43 :    
44 :     =cut
45 :    
46 :     =head1 BACKGROUND
47 :    
48 :     =head2 Data incorporated in the Observations
49 :    
50 :     As the goal of this library is to provide an integrated view, we combine diverse sources of evidence.
51 :    
52 :     =head3 SEED core evidence
53 :    
54 :     The core SEED data structures provided by FIG.pm. These are Similarities, BBHs and PCHs.
55 :    
56 :     =head3 Attribute based Evidence
57 :    
58 :     We use the SEED attribute infrastructure to store information computed by a variety of computational procedures.
59 :    
60 :     These are e.g. InterPro hits via InterProScan (ipr), NCBI Conserved Domain Database Hits via PSSM(cdd),
61 :     PFAM hits via HMM(pfam), SignalP results(signalp), and various others.
62 :    
63 :     =head1 METHODS
64 :    
65 :     The public methods this package provides are listed below:
66 :    
67 :     =head3 acc()
68 :    
69 :     A valid accession or remote ID (in the style of a db_xref) or a valid local ID (FID) in case this is supported.
70 :    
71 :     =cut
72 :    
73 :     sub acc {
74 :     my ($self) = @_;
75 :    
76 :     return $self->{acc};
77 :     }
78 :    
79 :     =head3 description()
80 :    
81 :     The description of the hit. Taken from the data or from the our Ontology database for some cases e.g. IPR or PFAM.
82 :    
83 :     B<Please note:>
84 :     Either remoteid or description is required.
85 :    
86 :     =cut
87 :    
88 :     sub description {
89 :     my ($self) = @_;
90 :    
91 :     return $self->{acc};
92 :     }
93 :    
94 :     =head3 class()
95 :    
96 :     The class of evidence (required). This is usually simply the name of the tool or the name of the SEED data structure.
97 :     B<Please note> the connection of class and display_method and URL.
98 :    
99 :     Current valid classes are:
100 :    
101 :     =over 9
102 :    
103 : mkubal 1.3 =item SIM (seq)
104 : mkubal 1.1
105 : mkubal 1.3 =item BBH (seq)
106 : mkubal 1.1
107 : mkubal 1.3 =item PCH (fc)
108 : mkubal 1.1
109 : mkubal 1.3 =item FIGFAM (seq)
110 : mkubal 1.1
111 : mkubal 1.3 =item IPR (dom)
112 : mkubal 1.1
113 : mkubal 1.3 =item CDD (dom)
114 : mkubal 1.1
115 : mkubal 1.3 =item PFAM (dom)
116 : mkubal 1.1
117 : mkubal 1.3 =item SIGNALP (dom)
118 : mkubal 1.1
119 : mkubal 1.3 =item CELLO(loc)
120 : mkubal 1.1
121 : mkubal 1.3 =item TMHMM (loc)
122 : mkubal 1.1
123 : mkubal 1.3 =item HMMTOP (loc)
124 : mkubal 1.1
125 :     =back
126 :    
127 :     =cut
128 :    
129 :     sub class {
130 :     my ($self) = @_;
131 :    
132 :     return $self->{class};
133 :     }
134 :    
135 :     =head3 type()
136 :    
137 :     The type of evidence (required).
138 :    
139 :     Where type is one of the following:
140 :    
141 :     =over 8
142 :    
143 :     =item seq=Sequence similarity
144 :    
145 :     =item dom=domain based match
146 :    
147 :     =item loc=Localization of the feature
148 :    
149 :     =item fc=Functional coupling.
150 :    
151 :     =back
152 :    
153 :     =cut
154 :    
155 :     sub type {
156 :     my ($self) = @_;
157 :    
158 :     return $self->{acc};
159 :     }
160 :    
161 :     =head3 start()
162 :    
163 :     Start of hit in query sequence.
164 :    
165 :     =cut
166 :    
167 :     sub start {
168 :     my ($self) = @_;
169 :    
170 :     return $self->{start};
171 :     }
172 :    
173 :     =head3 end()
174 :    
175 :     End of the hit in query sequence.
176 :    
177 :     =cut
178 :    
179 :     sub stop {
180 :     my ($self) = @_;
181 :    
182 :     return $self->{stop};
183 :     }
184 :    
185 :     =head3 evalue()
186 :    
187 :     E-value or P-Value if present.
188 :    
189 :     =cut
190 :    
191 :     sub evalue {
192 :     my ($self) = @_;
193 :    
194 :     return $self->{evalue};
195 :     }
196 :    
197 :     =head3 score()
198 :    
199 :     Score if present.
200 :    
201 :     B<Please note: >
202 :     Either score or eval are required.
203 :    
204 :     =cut
205 :    
206 :     sub score {
207 :     my ($self) = @_;
208 :    
209 :     return $self->{score};
210 :     }
211 :    
212 :    
213 :     =head3 display_method()
214 :    
215 :     If available use the function specified here to display the "raw" observation.
216 :     In the case of a BLAST alignment of fid1 and fid2 a cgi script
217 :     will be called to display the results of running the command "bl2seq fid1 fid2".
218 :    
219 :     B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evidence.
220 :    
221 :     =cut
222 :    
223 :     sub display_method {
224 :     my ($self) = @_;
225 :    
226 :     # add code here
227 :    
228 :     return $self->{display_method};
229 :     }
230 :    
231 :     =head3 rank()
232 :    
233 :     Returns an integer from 1 - 10 indicating the importance of this observations.
234 :    
235 :     Currently always returns 1.
236 :    
237 :     =cut
238 :    
239 :     sub rank {
240 :     my ($self) = @_;
241 :    
242 :     # return $self->{rank};
243 :    
244 :     return 1;
245 :     }
246 :    
247 :     =head3 supports_annotation()
248 :    
249 :     Does a this observation support the annotation of its feature?
250 :    
251 :     Returns
252 :    
253 :     =over 3
254 :    
255 :     =item 10, if feature annotation is identical to $self->description
256 :    
257 :     =item 1, Feature annotation is similar to $self->annotation; this is computed using FIG::SameFunc()
258 :    
259 :     =item undef
260 :    
261 :     =back
262 :    
263 :     =cut
264 :    
265 :     sub supports_annotation {
266 :     my ($self) = @_;
267 :    
268 :     # no code here so far
269 :    
270 :     return $self->{supports_annotation};
271 :     }
272 :    
273 :     =head3 url()
274 :    
275 :     URL describing the subject. In case of a BLAST hit against a sequence, this URL will lead to a page displaying the sequence record for the sequence. In case of an HMM hit, the URL will be to the URL description.
276 :    
277 :     =cut
278 :    
279 :     sub url {
280 :     my ($self) = @_;
281 :    
282 :     my $url = get_url($self->type, $self->acc);
283 :    
284 :     return $url;
285 :     }
286 :    
287 :     =head3 get_objects()
288 :    
289 :     This is the B<REAL WORKHORSE> method of this Package.
290 :    
291 :     It will probably have to:
292 :    
293 :     - get all sims for the feature
294 :     - get all bbhs for the feature
295 :     - copy information from sim to bbh (bbh have no match location etc)
296 :     - get pchs (difficult)
297 :     - get attributes (there is code for this that in get_attribute_based_observations
298 :     - get_attributes_based_observations returns an array of arrays of hashes like this"
299 :    
300 :     my $datasets =
301 :     [
302 :     [ { name => 'acc', value => '1234' },
303 :     { name => 'from', value => '4' },
304 :     { name => 'to', value => '400' },
305 :     ....
306 :     ],
307 :     [ { name => 'acc', value => '456' },
308 :     { name => 'from', value => '1' },
309 :     { name => 'to', value => '100' },
310 :     ....
311 :     ],
312 :     ...
313 :     ];
314 :     return $datasets;
315 :     }
316 :    
317 :     It will invoke the required calls to the SEED API to retrieve the information required.
318 :    
319 :     =cut
320 :    
321 :     sub get_objects {
322 :     my ($self,$fid) = @_;
323 :    
324 : mkubal 1.3 my $objects = [];
325 : mkubal 1.1 my @matched_datasets=();
326 :    
327 : mkubal 1.3 # call function that fetches attribute based observations
328 : mkubal 1.1 # returns an array of arrays of hashes
329 :     #
330 :     get_attribute_based_observations($fid,\@matched_datasets);
331 :    
332 : mkubal 1.3 # read sims
333 :     get_sims_observations($fid,\@matched_datasets);
334 :    
335 : mkubal 1.1 # read sims + bbh (enrich BBHs with sims coordindates etc)
336 :     # read pchs
337 :     # read figfam match data from 48hr directory (BobO knows how do do this!)
338 :     # what sources of evidence did I miss?
339 :    
340 :     foreach my $dataset (@matched_datasets) {
341 :     my $object = $self->new();
342 :     foreach my $attribute (@$dataset) {
343 :     $object->{$attribute->{'name'}} = $attribute->{'value'};
344 :     }
345 :     # $object->{$attribute->{'feature_id'}} = $attribute->{$fid};
346 :     push (@$objects, $object);
347 :     }
348 :    
349 :    
350 :     return $objects;
351 :     }
352 :    
353 :     =head1 Internal Methods
354 :    
355 :     These methods are not meant to be used outside of this package.
356 :    
357 :     B<Please do not use them outside of this package!>
358 :    
359 :     =cut
360 :    
361 :    
362 :     =head3 get_url (internal)
363 :    
364 :     get_url() return a valid URL or undef for any observation.
365 :    
366 :     URLs are constructed by looking at the Accession acc() and name()
367 :    
368 :     Info from both attributes is combined with a table of base URLs stored in this function.
369 :    
370 :     =cut
371 :    
372 :     sub get_url {
373 :    
374 :     my ($self) = @_;
375 :     my $url='';
376 :    
377 :     # a hash with a URL for each observation; identified by name()
378 :     #my $URL => { 'PFAM' => "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?" ,\
379 :     # 'IPR' => "http://www.ebi.ac.uk/interpro/DisplayIproEntry?ac=" ,\
380 :     # 'CDD' => "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=",\
381 :     # 'PIR' => "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=",\
382 :     # 'FIGFAM' => '',\
383 :     # 'sim'=> "http://www.theseed.org/linkin.cgi?id=",\
384 :     # 'bbh'=> "http://www.theseed.org/linkin.cgi?id="
385 :     #};
386 :    
387 :     # if (defined $URL{$self->name}) {
388 :     # $url = $URL{$self->name}.$self->acc;
389 :     # return $url;
390 :     # }
391 :     # else
392 :     return undef;
393 :     }
394 :    
395 :     =head3 get_display_method (internal)
396 :    
397 :     get_display_method() return a valid URL or undef for any observation.
398 :    
399 :     URLs are constructed by looking at the Accession acc() and name()
400 :     and Info from both attributes is combined with a table of base URLs stored in this function.
401 :    
402 :     =cut
403 :    
404 :     sub get_display_method {
405 :    
406 :     my ($self) = @_;
407 :    
408 :     # a hash with a URL for each observation; identified by name()
409 :     #my $URL => { 'sim'=> "http://www.theseed.org/featalign.cgi?id1=",\
410 :     # 'bbh'=> "http://www.theseed.org/featalign.cgi?id1="
411 :     # };
412 :    
413 :     #if (defined $URL{$self->name}) {
414 :     # $url = $URL{$self->name}.$self->feature_id."&id2=".$self->acc;
415 :     # return $url;
416 :     # }
417 :     # else
418 :     return undef;
419 :     }
420 :    
421 :     =head3 get_attribute_based_evidence (internal)
422 :    
423 :     This method retrieves evidence from the attribute server
424 :    
425 :     =cut
426 :    
427 :     sub get_attribute_based_observations{
428 :    
429 :     # we read a FIG ID and a reference to an array (of arrays of hashes, see above)
430 :     my ($fid,$datasets_ref) = (@_);
431 :    
432 :     my $_myfig = new FIG;
433 :    
434 :     foreach my $attr_ref ($_myfig->get_attributes($fid)) {
435 :    
436 :     # convert the ref into a string for easier handling
437 :     my ($string) = "@$attr_ref";
438 :    
439 :     # print "S:$string\n";
440 :     my ($key,$val) = ( $string =~ /\S+\s(\S+)\s(\S+)/);
441 :    
442 :     # THIS SHOULD BE DONE ANOTHER WAY FM->TD
443 :     # we need to do the right thing for each type, ie no evalue for CELLO and no coordinates, but a score, etc
444 :     # as fas as possible this should be configured so that the type of observation and the regexp are
445 :     # stored somewhere for easy expansion
446 :     #
447 :    
448 :     if (($key =~ /PFAM::/) || ( $key =~ /IPR::/) || ( $key =~ /CDD::/) ) {
449 :    
450 :     # some keys are composite CDD::1233244 or PFAM:PF1233
451 :    
452 :     if ( $key =~ /::/ ) {
453 :     my ($firstkey,$restkey) = ( $key =~ /([a-zA-Z0-9]+)::(.*)/);
454 :     $val=$restkey.";".$val;
455 :     $key=$firstkey;
456 :     }
457 :    
458 :     my ($acc,$raw_evalue, $from,$to) = ($val =~ /(\S+);(\S+);(\d+)-(\d+)/ );
459 :    
460 :     my $evalue= 255;
461 :     if (defined $raw_evalue) { # some of the tool do not give us an evalue
462 :    
463 :     my ($k,$expo) = ( $raw_evalue =~ /(\d+).(\d+)/);
464 :     my ($new_k, $new_exp);
465 :    
466 :     #
467 :     # THIS DOES NOT WORK PROPERLY
468 :     #
469 :     if($raw_evalue =~/(\d+).(\d+)/){
470 :    
471 :     # $new_exp = (1000+$expo);
472 :     # $new_k = $k / 100;
473 :    
474 :     }
475 :     $evalue = "0.01"#new_k."e-".$new_exp;
476 :     }
477 :    
478 :     # unroll it all into an array of hashes
479 :     # this needs to be done differently for different types of observations
480 :     my $dataset = [ { name => 'class', value => $key },
481 :     { name => 'acc' , value => $acc},
482 :     { name => 'type', value => "dom"} , # this clearly needs to be done properly FM->TD
483 :     { name => 'evalue', value => $evalue },
484 :     { name => 'start', value => $from},
485 :     { name => 'stop' , value => $to}
486 :     ];
487 :    
488 :     push (@{$datasets_ref} ,$dataset);
489 :     }
490 :     }
491 :     }
492 :    
493 : mkubal 1.3 =head3 get_sims_observations() (internal)
494 :    
495 :     This methods retrieves sims fills the internal data structures.
496 :    
497 :     =cut
498 :    
499 :     sub get_sims_observations{
500 :    
501 :     my ($fid,$datasets_ref) = (@_);
502 :     my $fig =
503 :     my @sims= $fig->nsims($fid,100,1e-5);
504 :     foreach my $sim (@sims){
505 :     $hit = $sim->[1];
506 :     $evalue = $sim->[10];
507 :     $from = $sim->[8];
508 :     $to = $sim->[9];
509 :     my $dataset = [ { name => 'class', value => "SIM" },
510 :     { name => 'acc' , value => $hit},
511 :     { name => 'type', value => "seq"} ,
512 :     { name => 'evalue', value => $evalue },
513 :     { name => 'start', value => $from},
514 :     { name => 'stop' , value => $to}
515 :     ];
516 :     }
517 :     push (@{$datasets_ref} ,$dataset);
518 :    
519 :     }
520 :    
521 : mkubal 1.1 =head3 get_sims_and_bbhs() (internal)
522 :    
523 :     This methods retrieves sims and also BBHs and fills the internal data structures.
524 :    
525 :     =cut
526 :    
527 :     # sub get_sims_and_bbhs{
528 :    
529 :     # # blast m8 output format
530 :     # # id1, id2, %ident, align len, mismatches, gaps, q.start, q.stop, s. start, s.stop, eval, bit
531 :    
532 :     # my $Sims=();
533 :     # @sims_src = $fig->sims($fid,80,500,"fig",0);
534 :     # print "found $#sims_src SIMs\n";
535 :     # foreach $sims (@sims_src) {
536 :     # my ($sims_string) = "@$sims";
537 :     # # print "$sims_string\n";
538 :     # my ($rfid,$start,$stop,$eval) = ( $sims_string =~ /\S+\s+(\S+)\s+\S+\s\S+\s+(\S+)\s+(\S+)\s+
539 :     # \S+\s+\S+\s+\S+\s+\S+\s+(\S+)+.*/);
540 :     # # print "ID: $rfid, E:$eval, Start:$start stop:$stop\n";
541 :     # $Sims{$rfid}{'eval'}=$eval;
542 :     # $Sims{$rfid}{'start'}=$start;
543 :     # $Sims{$rfid}{'stop'}=$stop;
544 :     # print "$rfid $Sims{$rfid}{'eval'}\n";
545 :     # }
546 :    
547 :     # # BBHs
548 :     # my $BBHs=();
549 :    
550 :     # @bbhs_src = $fig->bbhs($fid,1.0e-10);
551 :     # print "found $#bbhs_src BBHs\n";
552 :     # foreach $bbh (@bbhs_src) {
553 :     # #print "@$bbh\n";
554 :     # my ($bbh_string) = "@$bbh";
555 :     # my ($rfid,$eval,$score) = ( $bbh_string =~ /(\S+)\s(\S+)\s(\S+)/);
556 :     # #print "ID: $rfid, E:$eval, S:$score\n";
557 :     # $BBHs{$rfid}{'eval'}=$eval;
558 :     # $BBHs{$rfid}{'score'}=$score;
559 :     # #print "$rfid $BBHs{$rfid}{'eval'}\n";
560 :     # }
561 :    
562 :     # }
563 :    
564 :    
565 :    
566 :     =head3 new (internal)
567 :    
568 :     Instantiate a new object.
569 :    
570 :     =cut
571 :    
572 :     sub new {
573 :     my ($self) = @_;
574 :    
575 :     $self = { acc => '',
576 :     description => '',
577 :     class => '',
578 :     type => '',
579 :     start => '',
580 :     stop => '',
581 :     evalue => '',
582 :     score => '',
583 :     display_method => '',
584 :     feature_id => '',
585 :     rank => '',
586 :     supports_annotation => ''
587 :     };
588 :    
589 :     bless($self, 'Observation');
590 :    
591 :     return $self;
592 :     }
593 :    
594 :     =head3 feature_id (internal)
595 :    
596 :     Returns the ID of the feature these Observations belong to.
597 :    
598 :     =cut
599 :    
600 :     sub feature_id {
601 :     my ($self) = @_;
602 :    
603 :     return $self->{feature_id};
604 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3