Parent Directory
|
Revision Log
Revision 1.33 - (view) (download) (as text)
1 : | mkubal | 1.1 | package Observation; |
2 : | |||
3 : | mkubal | 1.19 | use lib '/vol/ontologies'; |
4 : | use DBMaster; | ||
5 : | |||
6 : | mkubal | 1.1 | require Exporter; |
7 : | @EXPORT_OK = qw(get_objects); | ||
8 : | |||
9 : | arodri7 | 1.16 | use FIG_Config; |
10 : | mkubal | 1.30 | #use strict; |
11 : | arodri7 | 1.16 | #use warnings; |
12 : | arodri7 | 1.9 | use HTML; |
13 : | mkubal | 1.1 | |
14 : | 1; | ||
15 : | |||
16 : | arodri7 | 1.33 | # $Id: Observation.pm,v 1.32 2007/08/21 22:45:54 arodri7 Exp $ |
17 : | mkubal | 1.1 | |
18 : | =head1 NAME | ||
19 : | |||
20 : | Observation -- A presentation layer for observations in SEED. | ||
21 : | |||
22 : | =head1 DESCRIPTION | ||
23 : | |||
24 : | The SEED environment contains various sources of information for sequence features. The purpose of this library is to provide a | ||
25 : | single interface to this data. | ||
26 : | |||
27 : | The data can be used to display information for a given sequence feature (protein or other, but primarily information is computed for proteins). | ||
28 : | |||
29 : | =cut | ||
30 : | |||
31 : | =head1 BACKGROUND | ||
32 : | |||
33 : | =head2 Data incorporated in the Observations | ||
34 : | |||
35 : | As the goal of this library is to provide an integrated view, we combine diverse sources of evidence. | ||
36 : | |||
37 : | =head3 SEED core evidence | ||
38 : | |||
39 : | The core SEED data structures provided by FIG.pm. These are Similarities, BBHs and PCHs. | ||
40 : | |||
41 : | =head3 Attribute based Evidence | ||
42 : | |||
43 : | We use the SEED attribute infrastructure to store information computed by a variety of computational procedures. | ||
44 : | |||
45 : | These are e.g. InterPro hits via InterProScan (ipr), NCBI Conserved Domain Database Hits via PSSM(cdd), | ||
46 : | PFAM hits via HMM(pfam), SignalP results(signalp), and various others. | ||
47 : | |||
48 : | =head1 METHODS | ||
49 : | |||
50 : | The public methods this package provides are listed below: | ||
51 : | |||
52 : | |||
53 : | mkubal | 1.24 | =head3 context() |
54 : | |||
55 : | Returns close or diverse for purposes of displaying genomic context | ||
56 : | mkubal | 1.1 | |
57 : | =cut | ||
58 : | |||
59 : | mkubal | 1.24 | sub context { |
60 : | mkubal | 1.1 | my ($self) = @_; |
61 : | |||
62 : | mkubal | 1.24 | return $self->{context}; |
63 : | mkubal | 1.1 | } |
64 : | |||
65 : | mkubal | 1.24 | =head3 rows() |
66 : | mkubal | 1.1 | |
67 : | mkubal | 1.24 | each row in a displayed table |
68 : | mkubal | 1.1 | |
69 : | mkubal | 1.24 | =cut |
70 : | |||
71 : | sub rows { | ||
72 : | my ($self) = @_; | ||
73 : | |||
74 : | return $self->{rows}; | ||
75 : | } | ||
76 : | |||
77 : | =head3 acc() | ||
78 : | |||
79 : | A valid accession or remote ID (in the style of a db_xref) or a valid local ID (FID) in case this is supported. | ||
80 : | mkubal | 1.1 | |
81 : | =cut | ||
82 : | |||
83 : | mkubal | 1.24 | sub acc { |
84 : | mkubal | 1.1 | my ($self) = @_; |
85 : | mkubal | 1.24 | return $self->{acc}; |
86 : | mkubal | 1.1 | } |
87 : | |||
88 : | =head3 class() | ||
89 : | |||
90 : | The class of evidence (required). This is usually simply the name of the tool or the name of the SEED data structure. | ||
91 : | B<Please note> the connection of class and display_method and URL. | ||
92 : | mkubal | 1.7 | |
93 : | mkubal | 1.1 | Current valid classes are: |
94 : | |||
95 : | =over 9 | ||
96 : | |||
97 : | arodri7 | 1.9 | =item IDENTICAL (seq) |
98 : | |||
99 : | mkubal | 1.3 | =item SIM (seq) |
100 : | mkubal | 1.1 | |
101 : | mkubal | 1.3 | =item BBH (seq) |
102 : | mkubal | 1.1 | |
103 : | mkubal | 1.3 | =item PCH (fc) |
104 : | mkubal | 1.1 | |
105 : | mkubal | 1.3 | =item FIGFAM (seq) |
106 : | mkubal | 1.1 | |
107 : | mkubal | 1.3 | =item IPR (dom) |
108 : | mkubal | 1.1 | |
109 : | mkubal | 1.3 | =item CDD (dom) |
110 : | mkubal | 1.1 | |
111 : | mkubal | 1.3 | =item PFAM (dom) |
112 : | mkubal | 1.1 | |
113 : | mkubal | 1.12 | =item SIGNALP_CELLO_TMPRED (loc) |
114 : | mkubal | 1.1 | |
115 : | mkubal | 1.20 | =item PDB (seq) |
116 : | |||
117 : | mkubal | 1.3 | =item TMHMM (loc) |
118 : | mkubal | 1.1 | |
119 : | mkubal | 1.3 | =item HMMTOP (loc) |
120 : | mkubal | 1.1 | |
121 : | =back | ||
122 : | |||
123 : | =cut | ||
124 : | |||
125 : | sub class { | ||
126 : | my ($self) = @_; | ||
127 : | |||
128 : | return $self->{class}; | ||
129 : | } | ||
130 : | |||
131 : | =head3 type() | ||
132 : | |||
133 : | The type of evidence (required). | ||
134 : | |||
135 : | Where type is one of the following: | ||
136 : | |||
137 : | =over 8 | ||
138 : | |||
139 : | =item seq=Sequence similarity | ||
140 : | |||
141 : | =item dom=domain based match | ||
142 : | |||
143 : | =item loc=Localization of the feature | ||
144 : | |||
145 : | =item fc=Functional coupling. | ||
146 : | |||
147 : | =back | ||
148 : | |||
149 : | =cut | ||
150 : | |||
151 : | sub type { | ||
152 : | my ($self) = @_; | ||
153 : | |||
154 : | arodri7 | 1.26 | return $self->{type}; |
155 : | mkubal | 1.1 | } |
156 : | |||
157 : | =head3 start() | ||
158 : | |||
159 : | Start of hit in query sequence. | ||
160 : | |||
161 : | =cut | ||
162 : | |||
163 : | sub start { | ||
164 : | my ($self) = @_; | ||
165 : | |||
166 : | return $self->{start}; | ||
167 : | } | ||
168 : | |||
169 : | =head3 end() | ||
170 : | |||
171 : | End of the hit in query sequence. | ||
172 : | |||
173 : | =cut | ||
174 : | |||
175 : | sub stop { | ||
176 : | my ($self) = @_; | ||
177 : | |||
178 : | return $self->{stop}; | ||
179 : | } | ||
180 : | |||
181 : | arodri7 | 1.11 | =head3 start() |
182 : | |||
183 : | Start of hit in query sequence. | ||
184 : | |||
185 : | =cut | ||
186 : | |||
187 : | sub qstart { | ||
188 : | my ($self) = @_; | ||
189 : | |||
190 : | return $self->{qstart}; | ||
191 : | } | ||
192 : | |||
193 : | =head3 qstop() | ||
194 : | |||
195 : | End of the hit in query sequence. | ||
196 : | |||
197 : | =cut | ||
198 : | |||
199 : | sub qstop { | ||
200 : | my ($self) = @_; | ||
201 : | |||
202 : | return $self->{qstop}; | ||
203 : | } | ||
204 : | |||
205 : | =head3 hstart() | ||
206 : | |||
207 : | Start of hit in hit sequence. | ||
208 : | |||
209 : | =cut | ||
210 : | |||
211 : | sub hstart { | ||
212 : | my ($self) = @_; | ||
213 : | |||
214 : | return $self->{hstart}; | ||
215 : | } | ||
216 : | |||
217 : | =head3 end() | ||
218 : | |||
219 : | End of the hit in hit sequence. | ||
220 : | |||
221 : | =cut | ||
222 : | |||
223 : | sub hstop { | ||
224 : | my ($self) = @_; | ||
225 : | |||
226 : | return $self->{hstop}; | ||
227 : | } | ||
228 : | |||
229 : | =head3 qlength() | ||
230 : | |||
231 : | length of the query sequence in similarities | ||
232 : | |||
233 : | =cut | ||
234 : | |||
235 : | sub qlength { | ||
236 : | my ($self) = @_; | ||
237 : | |||
238 : | return $self->{qlength}; | ||
239 : | } | ||
240 : | |||
241 : | =head3 hlength() | ||
242 : | |||
243 : | length of the hit sequence in similarities | ||
244 : | |||
245 : | =cut | ||
246 : | |||
247 : | sub hlength { | ||
248 : | my ($self) = @_; | ||
249 : | |||
250 : | return $self->{hlength}; | ||
251 : | } | ||
252 : | |||
253 : | mkubal | 1.1 | =head3 evalue() |
254 : | |||
255 : | E-value or P-Value if present. | ||
256 : | |||
257 : | =cut | ||
258 : | |||
259 : | sub evalue { | ||
260 : | my ($self) = @_; | ||
261 : | |||
262 : | return $self->{evalue}; | ||
263 : | } | ||
264 : | |||
265 : | =head3 score() | ||
266 : | |||
267 : | Score if present. | ||
268 : | |||
269 : | =cut | ||
270 : | |||
271 : | sub score { | ||
272 : | my ($self) = @_; | ||
273 : | return $self->{score}; | ||
274 : | } | ||
275 : | |||
276 : | mkubal | 1.12 | =head3 display() |
277 : | mkubal | 1.1 | |
278 : | mkubal | 1.12 | will be different for each type |
279 : | mkubal | 1.1 | |
280 : | =cut | ||
281 : | |||
282 : | mkubal | 1.7 | sub display { |
283 : | mkubal | 1.1 | |
284 : | mkubal | 1.7 | die "Abstract Method Called\n"; |
285 : | mkubal | 1.1 | |
286 : | } | ||
287 : | |||
288 : | mkubal | 1.24 | =head3 display_table() |
289 : | mkubal | 1.7 | |
290 : | mkubal | 1.24 | will be different for each type |
291 : | mkubal | 1.1 | |
292 : | mkubal | 1.24 | =cut |
293 : | mkubal | 1.1 | |
294 : | mkubal | 1.24 | sub display_table { |
295 : | |||
296 : | die "Abstract Table Method Called\n"; | ||
297 : | mkubal | 1.1 | |
298 : | } | ||
299 : | |||
300 : | =head3 get_objects() | ||
301 : | |||
302 : | This is the B<REAL WORKHORSE> method of this Package. | ||
303 : | |||
304 : | =cut | ||
305 : | |||
306 : | sub get_objects { | ||
307 : | mkubal | 1.24 | my ($self,$fid,$scope) = @_; |
308 : | mkubal | 1.7 | |
309 : | my $objects = []; | ||
310 : | my @matched_datasets=(); | ||
311 : | arodri7 | 1.28 | my $fig = new FIG; |
312 : | mkubal | 1.1 | |
313 : | mkubal | 1.7 | # call function that fetches attribute based observations |
314 : | # returns an array of arrays of hashes | ||
315 : | |||
316 : | mkubal | 1.24 | if($scope){ |
317 : | get_cluster_observations($fid,\@matched_datasets,$scope); | ||
318 : | mkubal | 1.7 | } |
319 : | else{ | ||
320 : | my %domain_classes; | ||
321 : | arodri7 | 1.28 | my @attributes = $fig->get_attributes($fid); |
322 : | mkubal | 1.24 | $domain_classes{'CDD'} = 1; |
323 : | arodri7 | 1.33 | get_identical_proteins($fid,\@matched_datasets); |
324 : | arodri7 | 1.28 | get_attribute_based_domain_observations($fid,\%domain_classes,\@matched_datasets,\@attributes); |
325 : | mkubal | 1.24 | get_sims_observations($fid,\@matched_datasets); |
326 : | get_functional_coupling($fid,\@matched_datasets); | ||
327 : | arodri7 | 1.28 | get_attribute_based_location_observations($fid,\@matched_datasets,\@attributes); |
328 : | get_pdb_observations($fid,\@matched_datasets,\@attributes); | ||
329 : | mkubal | 1.1 | } |
330 : | mkubal | 1.7 | |
331 : | foreach my $dataset (@matched_datasets) { | ||
332 : | my $object; | ||
333 : | if($dataset->{'type'} eq "dom"){ | ||
334 : | $object = Observation::Domain->new($dataset); | ||
335 : | } | ||
336 : | arodri7 | 1.9 | if($dataset->{'class'} eq "PCH"){ |
337 : | $object = Observation::FC->new($dataset); | ||
338 : | } | ||
339 : | if ($dataset->{'class'} eq "IDENTICAL"){ | ||
340 : | $object = Observation::Identical->new($dataset); | ||
341 : | } | ||
342 : | mkubal | 1.12 | if ($dataset->{'class'} eq "SIGNALP_CELLO_TMPRED"){ |
343 : | $object = Observation::Location->new($dataset); | ||
344 : | } | ||
345 : | arodri7 | 1.10 | if ($dataset->{'class'} eq "SIM"){ |
346 : | $object = Observation::Sims->new($dataset); | ||
347 : | } | ||
348 : | arodri7 | 1.15 | if ($dataset->{'class'} eq "CLUSTER"){ |
349 : | $object = Observation::Cluster->new($dataset); | ||
350 : | } | ||
351 : | mkubal | 1.20 | if ($dataset->{'class'} eq "PDB"){ |
352 : | $object = Observation::PDB->new($dataset); | ||
353 : | } | ||
354 : | |||
355 : | mkubal | 1.7 | push (@$objects, $object); |
356 : | mkubal | 1.1 | } |
357 : | mkubal | 1.7 | |
358 : | return $objects; | ||
359 : | mkubal | 1.1 | |
360 : | } | ||
361 : | |||
362 : | arodri7 | 1.28 | =head3 display_housekeeping |
363 : | This method returns the housekeeping data for a given peg in a table format | ||
364 : | |||
365 : | =cut | ||
366 : | sub display_housekeeping { | ||
367 : | my ($self,$fid) = @_; | ||
368 : | my $fig = new FIG; | ||
369 : | my $content; | ||
370 : | |||
371 : | my $org_name = $fig->org_of($fid); | ||
372 : | my $org_id = $fig->orgid_of_orgname($org_name); | ||
373 : | my $loc = $fig->feature_location($fid); | ||
374 : | my($contig, $beg, $end) = $fig->boundaries_of($loc); | ||
375 : | my $strand = ($beg <= $end)? '+' : '-'; | ||
376 : | my @subsystems = $fig->subsystems_for_peg($fid); | ||
377 : | my $function = $fig->function_of($fid); | ||
378 : | my @aliases = $fig->feature_aliases($fid); | ||
379 : | my $taxonomy = $fig->taxonomy_of($org_id); | ||
380 : | my @ecs = ($function =~ /\(EC\s(\d+\.[-\d+]+\.[-\d+]+\.[-\d+]+)\)/g); | ||
381 : | |||
382 : | $content .= qq(<b>General Protein Data</b><br><br><br><table border="0">); | ||
383 : | $content .= qq(<tr width=15%><td >FIG ID</td><td>$fid</td></tr>\n); | ||
384 : | $content .= qq(<tr width=15%><td >Organism Name</td><td>$org_name, $org_id</td></tr>\n); | ||
385 : | $content .= qq(<tr><td width=15%>Taxonomy</td><td>$taxonomy</td></tr>\n); | ||
386 : | $content .= qq(<tr width=15%><td>FIG Organism ID</td><td>$org_id</td></tr>\n); | ||
387 : | $content .= qq(<tr width=15%><td>Gene Location</td><td>Contig $contig [$beg,$end], Strand $strand</td></tr>\n);; | ||
388 : | $content .= qq(<tr width=15%><td>Function</td><td>$function</td></tr>\n); | ||
389 : | if ( @ecs ) { | ||
390 : | $content .= qq(<tr><td>EC:</td><td>); | ||
391 : | foreach my $ec ( @ecs ) { | ||
392 : | my $ec_name = $fig->ec_name($ec); | ||
393 : | $content .= join(" -- ", $ec, $ec_name) . "<br>\n"; | ||
394 : | } | ||
395 : | $content .= qq(</td></tr>\n); | ||
396 : | } | ||
397 : | |||
398 : | if ( @subsystems ) { | ||
399 : | $content .= qq(<tr><td>Subsystems</td><td>); | ||
400 : | foreach my $subsystem ( @subsystems ) { | ||
401 : | $content .= join(" -- ", @$subsystem) . "<br>\n"; | ||
402 : | } | ||
403 : | } | ||
404 : | |||
405 : | my %groups; | ||
406 : | if ( @aliases ) { | ||
407 : | # get the db for each alias | ||
408 : | foreach my $alias (@aliases){ | ||
409 : | $groups{$alias} = &get_database($alias); | ||
410 : | } | ||
411 : | |||
412 : | # group ids by aliases | ||
413 : | my %db_aliases; | ||
414 : | foreach my $key (sort {$groups{$a} cmp $groups{$b}} keys %groups){ | ||
415 : | push (@{$db_aliases{$groups{$key}}}, $key); | ||
416 : | } | ||
417 : | |||
418 : | |||
419 : | $content .= qq(<tr><td>Aliases</td><td><table border="0">); | ||
420 : | foreach my $key (sort keys %db_aliases){ | ||
421 : | $content .= qq(<tr><td>$key:</td><td>) . join(", ", @{$db_aliases{$key}}) . qq(</td></tr\n); | ||
422 : | } | ||
423 : | $content .= qq(</td></tr></table>\n); | ||
424 : | } | ||
425 : | |||
426 : | $content .= qq(</table><p>\n); | ||
427 : | |||
428 : | return ($content); | ||
429 : | } | ||
430 : | |||
431 : | =head3 get_sims_summary | ||
432 : | This method uses as input the similarities of a peg and creates a tree view of their taxonomy | ||
433 : | |||
434 : | =cut | ||
435 : | |||
436 : | sub get_sims_summary { | ||
437 : | my ($observation, $fid) = @_; | ||
438 : | my $fig = new FIG; | ||
439 : | my %families; | ||
440 : | my @sims= $fig->nsims($fid,20000,10,"all"); | ||
441 : | |||
442 : | foreach my $sim (@sims){ | ||
443 : | next if ($sim->[1] !~ /fig\|/); | ||
444 : | my $genome = $fig->genome_of($sim->[1]); | ||
445 : | my $taxonomy = $fig->taxonomy_of($fig->genome_of($sim->[1])); | ||
446 : | my $parent_tax = "Root"; | ||
447 : | foreach my $tax (split(/\; /, $taxonomy)){ | ||
448 : | push (@{$families{children}{$parent_tax}}, $tax); | ||
449 : | $families{parent}{$tax} = $parent_tax; | ||
450 : | $parent_tax = $tax; | ||
451 : | } | ||
452 : | } | ||
453 : | |||
454 : | foreach my $key (keys %{$families{children}}){ | ||
455 : | $families{count}{$key} = @{$families{children}{$key}}; | ||
456 : | |||
457 : | my %saw; | ||
458 : | my @out = grep(!$saw{$_}++, @{$families{children}{$key}}); | ||
459 : | $families{children}{$key} = \@out; | ||
460 : | } | ||
461 : | return (\%families); | ||
462 : | } | ||
463 : | |||
464 : | mkubal | 1.1 | =head1 Internal Methods |
465 : | |||
466 : | These methods are not meant to be used outside of this package. | ||
467 : | |||
468 : | B<Please do not use them outside of this package!> | ||
469 : | |||
470 : | =cut | ||
471 : | |||
472 : | mkubal | 1.7 | sub get_attribute_based_domain_observations{ |
473 : | |||
474 : | # we read a FIG ID and a reference to an array (of arrays of hashes, see above) | ||
475 : | arodri7 | 1.28 | my ($fid,$domain_classes,$datasets_ref,$attributes_ref) = (@_); |
476 : | mkubal | 1.7 | |
477 : | my $fig = new FIG; | ||
478 : | arodri7 | 1.28 | |
479 : | foreach my $attr_ref (@$attributes_ref) { | ||
480 : | # foreach my $attr_ref ($fig->get_attributes($fid)) { | ||
481 : | mkubal | 1.7 | my $key = @$attr_ref[1]; |
482 : | my @parts = split("::",$key); | ||
483 : | my $class = $parts[0]; | ||
484 : | |||
485 : | if($domain_classes->{$parts[0]}){ | ||
486 : | my $val = @$attr_ref[2]; | ||
487 : | mkubal | 1.8 | if($val =~/^(\d+\.\d+|0\.0);(\d+)-(\d+)/){ |
488 : | mkubal | 1.7 | my $raw_evalue = $1; |
489 : | mkubal | 1.8 | my $from = $2; |
490 : | my $to = $3; | ||
491 : | mkubal | 1.7 | my $evalue; |
492 : | if($raw_evalue =~/(\d+)\.(\d+)/){ | ||
493 : | my $part2 = 1000 - $1; | ||
494 : | my $part1 = $2/100; | ||
495 : | $evalue = $part1."e-".$part2; | ||
496 : | } | ||
497 : | else{ | ||
498 : | mkubal | 1.8 | $evalue = "0.0"; |
499 : | mkubal | 1.7 | } |
500 : | |||
501 : | my $dataset = {'class' => $class, | ||
502 : | 'acc' => $key, | ||
503 : | 'type' => "dom" , | ||
504 : | 'evalue' => $evalue, | ||
505 : | 'start' => $from, | ||
506 : | mkubal | 1.24 | 'stop' => $to, |
507 : | 'fig_id' => $fid, | ||
508 : | 'score' => $raw_evalue | ||
509 : | mkubal | 1.7 | }; |
510 : | |||
511 : | push (@{$datasets_ref} ,$dataset); | ||
512 : | } | ||
513 : | } | ||
514 : | } | ||
515 : | } | ||
516 : | mkubal | 1.12 | |
517 : | sub get_attribute_based_location_observations{ | ||
518 : | |||
519 : | arodri7 | 1.28 | my ($fid,$datasets_ref, $attributes_ref) = (@_); |
520 : | mkubal | 1.12 | my $fig = new FIG; |
521 : | |||
522 : | mkubal | 1.30 | my $location_attributes = ['SignalP','CELLO','TMPRED','Phobius']; |
523 : | mkubal | 1.12 | |
524 : | arodri7 | 1.26 | my $dataset = {'type' => "loc", |
525 : | 'class' => 'SIGNALP_CELLO_TMPRED', | ||
526 : | 'fig_id' => $fid | ||
527 : | }; | ||
528 : | |||
529 : | arodri7 | 1.28 | foreach my $attr_ref (@$attributes_ref){ |
530 : | # foreach my $attr_ref ($fig->get_attributes($fid,$location_attributes)) { | ||
531 : | mkubal | 1.12 | my $key = @$attr_ref[1]; |
532 : | mkubal | 1.30 | next if (($key !~ /SignalP/) && ($key !~ /CELLO/) && ($key !~ /TMPRED/) && ($key !~/Phobius/) ); |
533 : | mkubal | 1.12 | my @parts = split("::",$key); |
534 : | my $sub_class = $parts[0]; | ||
535 : | my $sub_key = $parts[1]; | ||
536 : | my $value = @$attr_ref[2]; | ||
537 : | if($sub_class eq "SignalP"){ | ||
538 : | if($sub_key eq "cleavage_site"){ | ||
539 : | my @value_parts = split(";",$value); | ||
540 : | $dataset->{'cleavage_prob'} = $value_parts[0]; | ||
541 : | $dataset->{'cleavage_loc'} = $value_parts[1]; | ||
542 : | arodri7 | 1.28 | # print STDERR "LOC: $value_parts[1]"; |
543 : | mkubal | 1.12 | } |
544 : | elsif($sub_key eq "signal_peptide"){ | ||
545 : | $dataset->{'signal_peptide_score'} = $value; | ||
546 : | } | ||
547 : | } | ||
548 : | mkubal | 1.30 | |
549 : | mkubal | 1.12 | elsif($sub_class eq "CELLO"){ |
550 : | $dataset->{'cello_location'} = $sub_key; | ||
551 : | $dataset->{'cello_score'} = $value; | ||
552 : | } | ||
553 : | mkubal | 1.30 | |
554 : | elsif($sub_class eq "Phobius"){ | ||
555 : | if($sub_key eq "transmembrane"){ | ||
556 : | $dataset->{'phobius_tm_locations'} = $value; | ||
557 : | } | ||
558 : | elsif($sub_key eq "signal"){ | ||
559 : | $dataset->{'phobius_signal_location'} = $value; | ||
560 : | } | ||
561 : | } | ||
562 : | |||
563 : | mkubal | 1.12 | elsif($sub_class eq "TMPRED"){ |
564 : | arodri7 | 1.26 | my @value_parts = split(/\;/,$value); |
565 : | mkubal | 1.12 | $dataset->{'tmpred_score'} = $value_parts[0]; |
566 : | $dataset->{'tmpred_locations'} = $value_parts[1]; | ||
567 : | } | ||
568 : | } | ||
569 : | |||
570 : | push (@{$datasets_ref} ,$dataset); | ||
571 : | |||
572 : | } | ||
573 : | |||
574 : | mkubal | 1.20 | =head3 get_pdb_observations() (internal) |
575 : | |||
576 : | This methods sets the type and class for pdb observations | ||
577 : | |||
578 : | =cut | ||
579 : | |||
580 : | sub get_pdb_observations{ | ||
581 : | arodri7 | 1.28 | my ($fid,$datasets_ref, $attributes_ref) = (@_); |
582 : | mkubal | 1.20 | |
583 : | my $fig = new FIG; | ||
584 : | |||
585 : | arodri7 | 1.28 | foreach my $attr_ref (@$attributes_ref){ |
586 : | #foreach my $attr_ref ($fig->get_attributes($fid,'PDB')) { | ||
587 : | mkubal | 1.20 | |
588 : | my $key = @$attr_ref[1]; | ||
589 : | arodri7 | 1.28 | next if ( ($key !~ /PDB/)); |
590 : | mkubal | 1.20 | my($key1,$key2) =split("::",$key); |
591 : | my $value = @$attr_ref[2]; | ||
592 : | my ($evalue,$location) = split(";",$value); | ||
593 : | |||
594 : | if($evalue =~/(\d+)\.(\d+)/){ | ||
595 : | my $part2 = 1000 - $1; | ||
596 : | my $part1 = $2/100; | ||
597 : | $evalue = $part1."e-".$part2; | ||
598 : | } | ||
599 : | |||
600 : | my($start,$stop) =split("-",$location); | ||
601 : | |||
602 : | my $url = @$attr_ref[3]; | ||
603 : | my $dataset = {'class' => 'PDB', | ||
604 : | 'type' => 'seq' , | ||
605 : | 'acc' => $key2, | ||
606 : | 'evalue' => $evalue, | ||
607 : | 'start' => $start, | ||
608 : | mkubal | 1.24 | 'stop' => $stop, |
609 : | 'fig_id' => $fid | ||
610 : | mkubal | 1.20 | }; |
611 : | |||
612 : | push (@{$datasets_ref} ,$dataset); | ||
613 : | } | ||
614 : | } | ||
615 : | |||
616 : | arodri7 | 1.15 | =head3 get_cluster_observations() (internal) |
617 : | |||
618 : | This methods sets the type and class for cluster observations | ||
619 : | |||
620 : | =cut | ||
621 : | |||
622 : | sub get_cluster_observations{ | ||
623 : | mkubal | 1.24 | my ($fid,$datasets_ref,$scope) = (@_); |
624 : | arodri7 | 1.15 | |
625 : | arodri7 | 1.16 | my $dataset = {'class' => 'CLUSTER', |
626 : | mkubal | 1.24 | 'type' => 'fc', |
627 : | 'context' => $scope, | ||
628 : | 'fig_id' => $fid | ||
629 : | arodri7 | 1.16 | }; |
630 : | arodri7 | 1.15 | push (@{$datasets_ref} ,$dataset); |
631 : | } | ||
632 : | |||
633 : | |||
634 : | mkubal | 1.3 | =head3 get_sims_observations() (internal) |
635 : | |||
636 : | This methods retrieves sims fills the internal data structures. | ||
637 : | |||
638 : | =cut | ||
639 : | |||
640 : | sub get_sims_observations{ | ||
641 : | |||
642 : | my ($fid,$datasets_ref) = (@_); | ||
643 : | mkubal | 1.4 | my $fig = new FIG; |
644 : | arodri7 | 1.26 | my @sims= $fig->nsims($fid,500,1e-20,"all"); |
645 : | mkubal | 1.4 | my ($dataset); |
646 : | arodri7 | 1.26 | |
647 : | my %id_list; | ||
648 : | mkubal | 1.3 | foreach my $sim (@sims){ |
649 : | mkubal | 1.4 | my $hit = $sim->[1]; |
650 : | arodri7 | 1.26 | |
651 : | next if ($hit !~ /^fig\|/); | ||
652 : | my @aliases = $fig->feature_aliases($hit); | ||
653 : | foreach my $alias (@aliases){ | ||
654 : | $id_list{$alias} = 1; | ||
655 : | } | ||
656 : | } | ||
657 : | |||
658 : | my %already; | ||
659 : | my (@new_sims, @uniprot); | ||
660 : | foreach my $sim (@sims){ | ||
661 : | my $hit = $sim->[1]; | ||
662 : | my ($id) = ($hit) =~ /\|(.*)/; | ||
663 : | next if (defined($already{$id})); | ||
664 : | next if (defined($id_list{$hit})); | ||
665 : | push (@new_sims, $sim); | ||
666 : | $already{$id} = 1; | ||
667 : | } | ||
668 : | |||
669 : | foreach my $sim (@new_sims){ | ||
670 : | my $hit = $sim->[1]; | ||
671 : | arodri7 | 1.11 | my $percent = $sim->[2]; |
672 : | mkubal | 1.4 | my $evalue = $sim->[10]; |
673 : | arodri7 | 1.11 | my $qfrom = $sim->[6]; |
674 : | my $qto = $sim->[7]; | ||
675 : | my $hfrom = $sim->[8]; | ||
676 : | my $hto = $sim->[9]; | ||
677 : | my $qlength = $sim->[12]; | ||
678 : | my $hlength = $sim->[13]; | ||
679 : | my $db = get_database($hit); | ||
680 : | my $func = $fig->function_of($hit); | ||
681 : | my $organism = $fig->org_of($hit); | ||
682 : | |||
683 : | arodri7 | 1.10 | $dataset = {'class' => 'SIM', |
684 : | 'acc' => $hit, | ||
685 : | arodri7 | 1.11 | 'identity' => $percent, |
686 : | arodri7 | 1.10 | 'type' => 'seq', |
687 : | 'evalue' => $evalue, | ||
688 : | arodri7 | 1.11 | 'qstart' => $qfrom, |
689 : | 'qstop' => $qto, | ||
690 : | 'hstart' => $hfrom, | ||
691 : | 'hstop' => $hto, | ||
692 : | 'database' => $db, | ||
693 : | 'organism' => $organism, | ||
694 : | 'function' => $func, | ||
695 : | 'qlength' => $qlength, | ||
696 : | mkubal | 1.24 | 'hlength' => $hlength, |
697 : | 'fig_id' => $fid | ||
698 : | arodri7 | 1.10 | }; |
699 : | |||
700 : | push (@{$datasets_ref} ,$dataset); | ||
701 : | mkubal | 1.3 | } |
702 : | } | ||
703 : | |||
704 : | arodri7 | 1.11 | =head3 get_database (internal) |
705 : | This method gets the database association from the sequence id | ||
706 : | |||
707 : | =cut | ||
708 : | |||
709 : | sub get_database{ | ||
710 : | my ($id) = (@_); | ||
711 : | |||
712 : | my ($db); | ||
713 : | if ($id =~ /^fig\|/) { $db = "FIG" } | ||
714 : | elsif ($id =~ /^gi\|/) { $db = "NCBI" } | ||
715 : | elsif ($id =~ /^^[NXYZA]P_/) { $db = "RefSeq" } | ||
716 : | elsif ($id =~ /^sp\|/) { $db = "SwissProt" } | ||
717 : | elsif ($id =~ /^uni\|/) { $db = "UniProt" } | ||
718 : | elsif ($id =~ /^tigr\|/) { $db = "TIGR" } | ||
719 : | elsif ($id =~ /^pir\|/) { $db = "PIR" } | ||
720 : | arodri7 | 1.28 | elsif (($id =~ /^kegg\|/) || ($id =~ /Spy/)) { $db = "KEGG" } |
721 : | elsif ($id =~ /^tr\|/) { $db = "TrEMBL" } | ||
722 : | arodri7 | 1.11 | elsif ($id =~ /^eric\|/) { $db = "ASAP" } |
723 : | elsif ($id =~ /^img\|/) { $db = "JGI" } | ||
724 : | |||
725 : | return ($db); | ||
726 : | |||
727 : | } | ||
728 : | |||
729 : | mkubal | 1.24 | |
730 : | arodri7 | 1.5 | =head3 get_identical_proteins() (internal) |
731 : | |||
732 : | This methods retrieves sims fills the internal data structures. | ||
733 : | |||
734 : | =cut | ||
735 : | |||
736 : | sub get_identical_proteins{ | ||
737 : | |||
738 : | my ($fid,$datasets_ref) = (@_); | ||
739 : | my $fig = new FIG; | ||
740 : | mkubal | 1.24 | my $funcs_ref; |
741 : | arodri7 | 1.5 | |
742 : | arodri7 | 1.33 | # my %id_list; |
743 : | arodri7 | 1.5 | my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid); |
744 : | arodri7 | 1.33 | # my @aliases = $fig->feature_aliases($fid); |
745 : | # foreach my $alias (@aliases){ | ||
746 : | # $id_list{$alias} = 1; | ||
747 : | # } | ||
748 : | arodri7 | 1.26 | |
749 : | arodri7 | 1.5 | foreach my $id (@maps_to) { |
750 : | my ($tmp, $who); | ||
751 : | arodri7 | 1.33 | if (($id ne $fid) && ($tmp = $fig->function_of($id))) { |
752 : | # if (($id ne $fid) && ($tmp = $fig->function_of($id)) && (! defined ($id_list{$id}))) { | ||
753 : | arodri7 | 1.11 | $who = &get_database($id); |
754 : | mkubal | 1.24 | push(@$funcs_ref, [$id,$who,$tmp]); |
755 : | arodri7 | 1.5 | } |
756 : | } | ||
757 : | |||
758 : | my ($dataset); | ||
759 : | mkubal | 1.24 | my $dataset = {'class' => 'IDENTICAL', |
760 : | 'type' => 'seq', | ||
761 : | 'fig_id' => $fid, | ||
762 : | 'rows' => $funcs_ref | ||
763 : | }; | ||
764 : | |||
765 : | push (@{$datasets_ref} ,$dataset); | ||
766 : | |||
767 : | arodri7 | 1.5 | |
768 : | } | ||
769 : | |||
770 : | arodri7 | 1.6 | =head3 get_functional_coupling() (internal) |
771 : | |||
772 : | This methods retrieves the functional coupling of a protein given a peg ID | ||
773 : | |||
774 : | =cut | ||
775 : | |||
776 : | sub get_functional_coupling{ | ||
777 : | |||
778 : | my ($fid,$datasets_ref) = (@_); | ||
779 : | my $fig = new FIG; | ||
780 : | my @funcs = (); | ||
781 : | |||
782 : | # initialize some variables | ||
783 : | my($sc,$neigh); | ||
784 : | |||
785 : | # set default parameters for coupling and evidence | ||
786 : | my ($bound,$sim_cutoff,$coupling_cutoff) = (5000, 1.0e-10, 4); | ||
787 : | |||
788 : | # get the fc data | ||
789 : | my @fc_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,1); | ||
790 : | |||
791 : | # retrieve data | ||
792 : | my @rows = map { ($sc,$neigh) = @$_; | ||
793 : | [$sc,$neigh,scalar $fig->function_of($neigh)] | ||
794 : | } @fc_data; | ||
795 : | |||
796 : | my ($dataset); | ||
797 : | mkubal | 1.24 | my $dataset = {'class' => 'PCH', |
798 : | 'type' => 'fc', | ||
799 : | 'fig_id' => $fid, | ||
800 : | 'rows' => \@rows | ||
801 : | }; | ||
802 : | |||
803 : | push (@{$datasets_ref} ,$dataset); | ||
804 : | arodri7 | 1.9 | |
805 : | arodri7 | 1.6 | } |
806 : | arodri7 | 1.5 | |
807 : | mkubal | 1.1 | =head3 new (internal) |
808 : | |||
809 : | Instantiate a new object. | ||
810 : | |||
811 : | =cut | ||
812 : | |||
813 : | sub new { | ||
814 : | mkubal | 1.7 | my ($class,$dataset) = @_; |
815 : | |||
816 : | my $self = { class => $dataset->{'class'}, | ||
817 : | mkubal | 1.24 | type => $dataset->{'type'}, |
818 : | fig_id => $dataset->{'fig_id'}, | ||
819 : | score => $dataset->{'score'}, | ||
820 : | arodri7 | 1.10 | }; |
821 : | mkubal | 1.7 | |
822 : | bless($self,$class); | ||
823 : | mkubal | 1.1 | |
824 : | return $self; | ||
825 : | } | ||
826 : | |||
827 : | arodri7 | 1.11 | =head3 identity (internal) |
828 : | |||
829 : | Returns the % identity of the similar sequence | ||
830 : | |||
831 : | =cut | ||
832 : | |||
833 : | sub identity { | ||
834 : | my ($self) = @_; | ||
835 : | |||
836 : | return $self->{identity}; | ||
837 : | } | ||
838 : | |||
839 : | mkubal | 1.24 | =head3 fig_id (internal) |
840 : | |||
841 : | =cut | ||
842 : | |||
843 : | sub fig_id { | ||
844 : | my ($self) = @_; | ||
845 : | return $self->{fig_id}; | ||
846 : | } | ||
847 : | |||
848 : | mkubal | 1.1 | =head3 feature_id (internal) |
849 : | |||
850 : | |||
851 : | =cut | ||
852 : | |||
853 : | sub feature_id { | ||
854 : | my ($self) = @_; | ||
855 : | |||
856 : | return $self->{feature_id}; | ||
857 : | } | ||
858 : | arodri7 | 1.5 | |
859 : | =head3 id (internal) | ||
860 : | |||
861 : | Returns the ID of the identical sequence | ||
862 : | |||
863 : | =cut | ||
864 : | |||
865 : | sub id { | ||
866 : | my ($self) = @_; | ||
867 : | |||
868 : | return $self->{id}; | ||
869 : | } | ||
870 : | |||
871 : | =head3 organism (internal) | ||
872 : | |||
873 : | Returns the organism of the identical sequence | ||
874 : | |||
875 : | =cut | ||
876 : | |||
877 : | sub organism { | ||
878 : | my ($self) = @_; | ||
879 : | |||
880 : | return $self->{organism}; | ||
881 : | } | ||
882 : | |||
883 : | arodri7 | 1.9 | =head3 function (internal) |
884 : | |||
885 : | Returns the function of the identical sequence | ||
886 : | |||
887 : | =cut | ||
888 : | |||
889 : | sub function { | ||
890 : | my ($self) = @_; | ||
891 : | |||
892 : | return $self->{function}; | ||
893 : | } | ||
894 : | |||
895 : | arodri7 | 1.5 | =head3 database (internal) |
896 : | |||
897 : | Returns the database of the identical sequence | ||
898 : | |||
899 : | =cut | ||
900 : | |||
901 : | sub database { | ||
902 : | my ($self) = @_; | ||
903 : | |||
904 : | return $self->{database}; | ||
905 : | } | ||
906 : | |||
907 : | mkubal | 1.24 | sub score { |
908 : | my ($self) = @_; | ||
909 : | |||
910 : | return $self->{score}; | ||
911 : | } | ||
912 : | |||
913 : | mkubal | 1.20 | ############################################################ |
914 : | ############################################################ | ||
915 : | package Observation::PDB; | ||
916 : | |||
917 : | use base qw(Observation); | ||
918 : | |||
919 : | sub new { | ||
920 : | |||
921 : | my ($class,$dataset) = @_; | ||
922 : | my $self = $class->SUPER::new($dataset); | ||
923 : | $self->{acc} = $dataset->{'acc'}; | ||
924 : | $self->{evalue} = $dataset->{'evalue'}; | ||
925 : | $self->{start} = $dataset->{'start'}; | ||
926 : | $self->{stop} = $dataset->{'stop'}; | ||
927 : | bless($self,$class); | ||
928 : | return $self; | ||
929 : | } | ||
930 : | |||
931 : | =head3 display() | ||
932 : | |||
933 : | displays data stored in best_PDB attribute and in Ontology server for given PDB id | ||
934 : | |||
935 : | =cut | ||
936 : | |||
937 : | sub display{ | ||
938 : | mkubal | 1.24 | my ($self,$gd) = @_; |
939 : | mkubal | 1.20 | |
940 : | mkubal | 1.24 | my $fid = $self->fig_id; |
941 : | mkubal | 1.20 | my $dbmaster = DBMaster->new(-database =>'Ontology'); |
942 : | |||
943 : | my $acc = $self->acc; | ||
944 : | |||
945 : | my ($pdb_description,$pdb_source,$pdb_ligand); | ||
946 : | my $pdb_objs = $dbmaster->pdb->get_objects( { 'id' => $acc } ); | ||
947 : | if(!scalar(@$pdb_objs)){ | ||
948 : | $pdb_description = "not available"; | ||
949 : | $pdb_source = "not available"; | ||
950 : | $pdb_ligand = "not available"; | ||
951 : | } | ||
952 : | else{ | ||
953 : | my $pdb_obj = $pdb_objs->[0]; | ||
954 : | $pdb_description = $pdb_obj->description; | ||
955 : | $pdb_source = $pdb_obj->source; | ||
956 : | $pdb_ligand = $pdb_obj->ligand; | ||
957 : | } | ||
958 : | arodri7 | 1.6 | |
959 : | mkubal | 1.20 | my $lines = []; |
960 : | my $line_data = []; | ||
961 : | my $line_config = { 'title' => "PDB hit for $fid", | ||
962 : | 'short_title' => "best PDB", | ||
963 : | 'basepair_offset' => '1' }; | ||
964 : | |||
965 : | my $fig = new FIG; | ||
966 : | my $seq = $fig->get_translation($fid); | ||
967 : | my $fid_stop = length($seq); | ||
968 : | |||
969 : | my $fid_element_hash = { | ||
970 : | "title" => $fid, | ||
971 : | "start" => '1', | ||
972 : | "end" => $fid_stop, | ||
973 : | "color"=> '1', | ||
974 : | "zlayer" => '1' | ||
975 : | }; | ||
976 : | |||
977 : | push(@$line_data,$fid_element_hash); | ||
978 : | |||
979 : | my $links_list = []; | ||
980 : | my $descriptions = []; | ||
981 : | |||
982 : | my $name; | ||
983 : | $name = {"title" => 'id', | ||
984 : | "value" => $acc}; | ||
985 : | push(@$descriptions,$name); | ||
986 : | |||
987 : | my $description; | ||
988 : | $description = {"title" => 'pdb description', | ||
989 : | "value" => $pdb_description}; | ||
990 : | push(@$descriptions,$description); | ||
991 : | |||
992 : | my $score; | ||
993 : | $score = {"title" => "score", | ||
994 : | "value" => $self->evalue}; | ||
995 : | push(@$descriptions,$score); | ||
996 : | |||
997 : | my $start_stop; | ||
998 : | my $start_stop_value = $self->start."_".$self->stop; | ||
999 : | $start_stop = {"title" => "start-stop", | ||
1000 : | "value" => $start_stop_value}; | ||
1001 : | push(@$descriptions,$start_stop); | ||
1002 : | |||
1003 : | my $source; | ||
1004 : | $source = {"title" => "source", | ||
1005 : | "value" => $pdb_source}; | ||
1006 : | push(@$descriptions,$source); | ||
1007 : | |||
1008 : | my $ligand; | ||
1009 : | $ligand = {"title" => "pdb ligand", | ||
1010 : | "value" => $pdb_ligand}; | ||
1011 : | push(@$descriptions,$ligand); | ||
1012 : | |||
1013 : | my $link; | ||
1014 : | my $link_url ="http://www.rcsb.org/pdb/explore/explore.do?structureId=".$acc; | ||
1015 : | |||
1016 : | $link = {"link_title" => $acc, | ||
1017 : | "link" => $link_url}; | ||
1018 : | push(@$links_list,$link); | ||
1019 : | |||
1020 : | my $pdb_element_hash = { | ||
1021 : | "title" => "PDB homology", | ||
1022 : | "start" => $self->start, | ||
1023 : | "end" => $self->stop, | ||
1024 : | "color"=> '6', | ||
1025 : | "zlayer" => '3', | ||
1026 : | "links_list" => $links_list, | ||
1027 : | "description" => $descriptions}; | ||
1028 : | |||
1029 : | push(@$line_data,$pdb_element_hash); | ||
1030 : | $gd->add_line($line_data, $line_config); | ||
1031 : | |||
1032 : | return $gd; | ||
1033 : | } | ||
1034 : | |||
1035 : | 1; | ||
1036 : | arodri7 | 1.11 | |
1037 : | arodri7 | 1.9 | ############################################################ |
1038 : | ############################################################ | ||
1039 : | package Observation::Identical; | ||
1040 : | |||
1041 : | use base qw(Observation); | ||
1042 : | |||
1043 : | sub new { | ||
1044 : | |||
1045 : | my ($class,$dataset) = @_; | ||
1046 : | my $self = $class->SUPER::new($dataset); | ||
1047 : | mkubal | 1.24 | $self->{rows} = $dataset->{'rows'}; |
1048 : | |||
1049 : | arodri7 | 1.9 | bless($self,$class); |
1050 : | return $self; | ||
1051 : | } | ||
1052 : | |||
1053 : | mkubal | 1.24 | =head3 display_table() |
1054 : | arodri7 | 1.6 | |
1055 : | If available use the function specified here to display the "raw" observation. | ||
1056 : | This code will display a table for the identical protein | ||
1057 : | |||
1058 : | |||
1059 : | arodri7 | 1.9 | B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi |
1060 : | dence. | ||
1061 : | arodri7 | 1.6 | |
1062 : | =cut | ||
1063 : | |||
1064 : | |||
1065 : | mkubal | 1.24 | sub display_table{ |
1066 : | my ($self) = @_; | ||
1067 : | |||
1068 : | my $fig = new FIG; | ||
1069 : | my $fid = $self->fig_id; | ||
1070 : | my $rows = $self->rows; | ||
1071 : | my $cgi = new CGI; | ||
1072 : | arodri7 | 1.6 | my $all_domains = []; |
1073 : | my $count_identical = 0; | ||
1074 : | arodri7 | 1.9 | my $content; |
1075 : | mkubal | 1.24 | foreach my $row (@$rows) { |
1076 : | my $id = $row->[0]; | ||
1077 : | my $who = $row->[1]; | ||
1078 : | my $assignment = $row->[2]; | ||
1079 : | arodri7 | 1.26 | my $organism = $fig->org_of($id); |
1080 : | arodri7 | 1.9 | my $single_domain = []; |
1081 : | mkubal | 1.24 | push(@$single_domain,$who); |
1082 : | push(@$single_domain,&HTML::set_prot_links($cgi,$id)); | ||
1083 : | push(@$single_domain,$organism); | ||
1084 : | push(@$single_domain,$assignment); | ||
1085 : | arodri7 | 1.9 | push(@$all_domains,$single_domain); |
1086 : | mkubal | 1.24 | $count_identical++; |
1087 : | arodri7 | 1.6 | } |
1088 : | |||
1089 : | if ($count_identical >0){ | ||
1090 : | arodri7 | 1.9 | $content = $all_domains; |
1091 : | arodri7 | 1.6 | } |
1092 : | else{ | ||
1093 : | arodri7 | 1.9 | $content = "<p>This PEG does not have any essentially identical proteins</p>"; |
1094 : | arodri7 | 1.6 | } |
1095 : | return ($content); | ||
1096 : | } | ||
1097 : | mkubal | 1.7 | |
1098 : | arodri7 | 1.9 | 1; |
1099 : | |||
1100 : | ######################################### | ||
1101 : | ######################################### | ||
1102 : | package Observation::FC; | ||
1103 : | 1; | ||
1104 : | |||
1105 : | use base qw(Observation); | ||
1106 : | |||
1107 : | sub new { | ||
1108 : | |||
1109 : | my ($class,$dataset) = @_; | ||
1110 : | my $self = $class->SUPER::new($dataset); | ||
1111 : | mkubal | 1.24 | $self->{rows} = $dataset->{'rows'}; |
1112 : | arodri7 | 1.9 | |
1113 : | bless($self,$class); | ||
1114 : | return $self; | ||
1115 : | } | ||
1116 : | |||
1117 : | mkubal | 1.24 | =head3 display_table() |
1118 : | arodri7 | 1.9 | |
1119 : | If available use the function specified here to display the "raw" observation. | ||
1120 : | This code will display a table for the identical protein | ||
1121 : | |||
1122 : | |||
1123 : | B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi | ||
1124 : | dence. | ||
1125 : | |||
1126 : | =cut | ||
1127 : | |||
1128 : | mkubal | 1.24 | sub display_table { |
1129 : | arodri7 | 1.9 | |
1130 : | mkubal | 1.24 | my ($self,$dataset) = @_; |
1131 : | my $fid = $self->fig_id; | ||
1132 : | my $rows = $self->rows; | ||
1133 : | my $cgi = new CGI; | ||
1134 : | arodri7 | 1.9 | my $functional_data = []; |
1135 : | my $count = 0; | ||
1136 : | my $content; | ||
1137 : | |||
1138 : | mkubal | 1.24 | foreach my $row (@$rows) { |
1139 : | arodri7 | 1.9 | my $single_domain = []; |
1140 : | $count++; | ||
1141 : | |||
1142 : | # construct the score link | ||
1143 : | mkubal | 1.24 | my $score = $row->[0]; |
1144 : | my $toid = $row->[1]; | ||
1145 : | arodri7 | 1.9 | my $link = $cgi->url(-relative => 1) . "?user=master&request=show_coupling_evidence&prot=$fid&to=$toid&SPROUT="; |
1146 : | my $sc_link = "<a href=$link>$score</a>"; | ||
1147 : | |||
1148 : | push(@$single_domain,$sc_link); | ||
1149 : | mkubal | 1.24 | push(@$single_domain,$row->[1]); |
1150 : | push(@$single_domain,$row->[2]); | ||
1151 : | arodri7 | 1.9 | push(@$functional_data,$single_domain); |
1152 : | } | ||
1153 : | |||
1154 : | if ($count >0){ | ||
1155 : | $content = $functional_data; | ||
1156 : | } | ||
1157 : | else | ||
1158 : | { | ||
1159 : | $content = "<p>This PEG does not have any functional coupling</p>"; | ||
1160 : | } | ||
1161 : | return ($content); | ||
1162 : | } | ||
1163 : | |||
1164 : | |||
1165 : | ######################################### | ||
1166 : | ######################################### | ||
1167 : | mkubal | 1.7 | package Observation::Domain; |
1168 : | |||
1169 : | use base qw(Observation); | ||
1170 : | |||
1171 : | sub new { | ||
1172 : | |||
1173 : | my ($class,$dataset) = @_; | ||
1174 : | my $self = $class->SUPER::new($dataset); | ||
1175 : | $self->{evalue} = $dataset->{'evalue'}; | ||
1176 : | $self->{acc} = $dataset->{'acc'}; | ||
1177 : | $self->{start} = $dataset->{'start'}; | ||
1178 : | $self->{stop} = $dataset->{'stop'}; | ||
1179 : | |||
1180 : | bless($self,$class); | ||
1181 : | return $self; | ||
1182 : | } | ||
1183 : | |||
1184 : | sub display { | ||
1185 : | my ($thing,$gd) = @_; | ||
1186 : | my $lines = []; | ||
1187 : | arodri7 | 1.27 | # my $line_config = { 'title' => $thing->acc, |
1188 : | # 'short_title' => $thing->type, | ||
1189 : | # 'basepair_offset' => '1' }; | ||
1190 : | mkubal | 1.7 | my $color = "4"; |
1191 : | |||
1192 : | my $line_data = []; | ||
1193 : | my $links_list = []; | ||
1194 : | my $descriptions = []; | ||
1195 : | mkubal | 1.19 | |
1196 : | my $db_and_id = $thing->acc; | ||
1197 : | my ($db,$id) = split("::",$db_and_id); | ||
1198 : | |||
1199 : | my $dbmaster = DBMaster->new(-database =>'Ontology'); | ||
1200 : | mkubal | 1.7 | |
1201 : | mkubal | 1.19 | my ($name_title,$name_value,$description_title,$description_value); |
1202 : | if($db eq "CDD"){ | ||
1203 : | my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } ); | ||
1204 : | if(!scalar(@$cdd_objs)){ | ||
1205 : | $name_title = "name"; | ||
1206 : | $name_value = "not available"; | ||
1207 : | $description_title = "description"; | ||
1208 : | $description_value = "not available"; | ||
1209 : | } | ||
1210 : | else{ | ||
1211 : | my $cdd_obj = $cdd_objs->[0]; | ||
1212 : | $name_title = "name"; | ||
1213 : | $name_value = $cdd_obj->term; | ||
1214 : | $description_title = "description"; | ||
1215 : | $description_value = $cdd_obj->description; | ||
1216 : | } | ||
1217 : | } | ||
1218 : | arodri7 | 1.27 | |
1219 : | my $line_config = { 'title' => $thing->acc, | ||
1220 : | 'short_title' => $name_value, | ||
1221 : | 'basepair_offset' => '1' }; | ||
1222 : | mkubal | 1.7 | |
1223 : | mkubal | 1.19 | my $name; |
1224 : | $name = {"title" => $name_title, | ||
1225 : | "value" => $name_value}; | ||
1226 : | push(@$descriptions,$name); | ||
1227 : | |||
1228 : | my $description; | ||
1229 : | $description = {"title" => $description_title, | ||
1230 : | "value" => $description_value}; | ||
1231 : | push(@$descriptions,$description); | ||
1232 : | mkubal | 1.7 | |
1233 : | my $score; | ||
1234 : | $score = {"title" => "score", | ||
1235 : | "value" => $thing->evalue}; | ||
1236 : | push(@$descriptions,$score); | ||
1237 : | |||
1238 : | my $link_id; | ||
1239 : | mkubal | 1.12 | if ($thing->acc =~/\w+::(\d+)/){ |
1240 : | mkubal | 1.7 | $link_id = $1; |
1241 : | } | ||
1242 : | |||
1243 : | my $link; | ||
1244 : | mkubal | 1.12 | my $link_url; |
1245 : | if ($thing->class eq "CDD"){$link_url = "http://0-www.ncbi.nlm.nih.gov.library.vu.edu.au:80/Structure/cdd/cddsrv.cgi?uid=$link_id"} | ||
1246 : | elsif($thing->class eq "PFAM"){$link_url = "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?$link_id"} | ||
1247 : | else{$link_url = "NO_URL"} | ||
1248 : | |||
1249 : | mkubal | 1.7 | $link = {"link_title" => $thing->acc, |
1250 : | mkubal | 1.12 | "link" => $link_url}; |
1251 : | mkubal | 1.7 | push(@$links_list,$link); |
1252 : | |||
1253 : | my $element_hash = { | ||
1254 : | "title" => $thing->type, | ||
1255 : | "start" => $thing->start, | ||
1256 : | "end" => $thing->stop, | ||
1257 : | "color"=> $color, | ||
1258 : | "zlayer" => '2', | ||
1259 : | "links_list" => $links_list, | ||
1260 : | "description" => $descriptions}; | ||
1261 : | |||
1262 : | push(@$line_data,$element_hash); | ||
1263 : | $gd->add_line($line_data, $line_config); | ||
1264 : | |||
1265 : | return $gd; | ||
1266 : | |||
1267 : | } | ||
1268 : | arodri7 | 1.28 | |
1269 : | sub display_table { | ||
1270 : | my ($self,$dataset) = @_; | ||
1271 : | my $cgi = new CGI; | ||
1272 : | my $data = []; | ||
1273 : | my $count = 0; | ||
1274 : | my $content; | ||
1275 : | |||
1276 : | foreach my $thing (@$dataset) { | ||
1277 : | next if ($thing->type !~ /dom/); | ||
1278 : | my $single_domain = []; | ||
1279 : | $count++; | ||
1280 : | |||
1281 : | my $db_and_id = $thing->acc; | ||
1282 : | my ($db,$id) = split("::",$db_and_id); | ||
1283 : | |||
1284 : | my $dbmaster = DBMaster->new(-database =>'Ontology'); | ||
1285 : | |||
1286 : | my ($name_title,$name_value,$description_title,$description_value); | ||
1287 : | if($db eq "CDD"){ | ||
1288 : | my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } ); | ||
1289 : | if(!scalar(@$cdd_objs)){ | ||
1290 : | $name_title = "name"; | ||
1291 : | $name_value = "not available"; | ||
1292 : | $description_title = "description"; | ||
1293 : | $description_value = "not available"; | ||
1294 : | } | ||
1295 : | else{ | ||
1296 : | my $cdd_obj = $cdd_objs->[0]; | ||
1297 : | $name_title = "name"; | ||
1298 : | $name_value = $cdd_obj->term; | ||
1299 : | $description_title = "description"; | ||
1300 : | $description_value = $cdd_obj->description; | ||
1301 : | } | ||
1302 : | } | ||
1303 : | |||
1304 : | my $location = $thing->start . " - " . $thing->stop; | ||
1305 : | |||
1306 : | push(@$single_domain,$db); | ||
1307 : | push(@$single_domain,$thing->acc); | ||
1308 : | push(@$single_domain,$name_value); | ||
1309 : | push(@$single_domain,$location); | ||
1310 : | push(@$single_domain,$thing->evalue); | ||
1311 : | push(@$single_domain,$description_value); | ||
1312 : | push(@$data,$single_domain); | ||
1313 : | } | ||
1314 : | |||
1315 : | if ($count >0){ | ||
1316 : | $content = $data; | ||
1317 : | } | ||
1318 : | else | ||
1319 : | { | ||
1320 : | $content = "<p>This PEG does not have any similarities to domains</p>"; | ||
1321 : | } | ||
1322 : | } | ||
1323 : | |||
1324 : | mkubal | 1.7 | |
1325 : | arodri7 | 1.10 | ######################################### |
1326 : | ######################################### | ||
1327 : | mkubal | 1.12 | package Observation::Location; |
1328 : | |||
1329 : | use base qw(Observation); | ||
1330 : | |||
1331 : | sub new { | ||
1332 : | |||
1333 : | my ($class,$dataset) = @_; | ||
1334 : | my $self = $class->SUPER::new($dataset); | ||
1335 : | $self->{cleavage_prob} = $dataset->{'cleavage_prob'}; | ||
1336 : | $self->{cleavage_loc} = $dataset->{'cleavage_loc'}; | ||
1337 : | $self->{signal_peptide_score} = $dataset->{'signal_peptide_score'}; | ||
1338 : | $self->{cello_location} = $dataset->{'cello_location'}; | ||
1339 : | $self->{cello_score} = $dataset->{'cello_score'}; | ||
1340 : | $self->{tmpred_score} = $dataset->{'tmpred_score'}; | ||
1341 : | $self->{tmpred_locations} = $dataset->{'tmpred_locations'}; | ||
1342 : | mkubal | 1.30 | $self->{phobius_signal_location} = $dataset->{'phobius_signal_location'}; |
1343 : | $self->{phobius_tm_locations} = $dataset->{'phobius_tm_locations'}; | ||
1344 : | mkubal | 1.12 | |
1345 : | bless($self,$class); | ||
1346 : | return $self; | ||
1347 : | } | ||
1348 : | |||
1349 : | sub display { | ||
1350 : | mkubal | 1.24 | my ($thing,$gd) = @_; |
1351 : | mkubal | 1.12 | |
1352 : | mkubal | 1.24 | my $fid = $thing->fig_id; |
1353 : | mkubal | 1.12 | my $fig= new FIG; |
1354 : | my $length = length($fig->get_translation($fid)); | ||
1355 : | |||
1356 : | my $cleavage_prob; | ||
1357 : | if($thing->cleavage_prob){$cleavage_prob = $thing->cleavage_prob;} | ||
1358 : | my ($cleavage_loc_begin,$cleavage_loc_end) = split("-",$thing->cleavage_loc); | ||
1359 : | my $signal_peptide_score = $thing->signal_peptide_score; | ||
1360 : | my $cello_location = $thing->cello_location; | ||
1361 : | my $cello_score = $thing->cello_score; | ||
1362 : | my $tmpred_score = $thing->tmpred_score; | ||
1363 : | my @tmpred_locations = split(",",$thing->tmpred_locations); | ||
1364 : | |||
1365 : | mkubal | 1.30 | my $phobius_signal_location = $thing->phobius_signal_location; |
1366 : | my @phobius_tm_locations = split(",",$thing->phobius_tm_locations); | ||
1367 : | |||
1368 : | mkubal | 1.12 | my $lines = []; |
1369 : | |||
1370 : | #color is | ||
1371 : | arodri7 | 1.28 | my $color = "6"; |
1372 : | mkubal | 1.12 | |
1373 : | if($cello_location){ | ||
1374 : | my $cello_descriptions = []; | ||
1375 : | arodri7 | 1.28 | my $line_data =[]; |
1376 : | |||
1377 : | my $line_config = { 'title' => 'Localization Evidence', | ||
1378 : | 'short_title' => 'CELLO', | ||
1379 : | 'basepair_offset' => '1' }; | ||
1380 : | |||
1381 : | mkubal | 1.12 | my $description_cello_location = {"title" => 'Best Cello Location', |
1382 : | "value" => $cello_location}; | ||
1383 : | |||
1384 : | push(@$cello_descriptions,$description_cello_location); | ||
1385 : | |||
1386 : | my $description_cello_score = {"title" => 'Cello Score', | ||
1387 : | "value" => $cello_score}; | ||
1388 : | |||
1389 : | push(@$cello_descriptions,$description_cello_score); | ||
1390 : | |||
1391 : | my $element_hash = { | ||
1392 : | "title" => "CELLO", | ||
1393 : | "start" => "1", | ||
1394 : | "end" => $length + 1, | ||
1395 : | "color"=> $color, | ||
1396 : | "type" => 'box', | ||
1397 : | arodri7 | 1.28 | "zlayer" => '1', |
1398 : | mkubal | 1.12 | "description" => $cello_descriptions}; |
1399 : | |||
1400 : | push(@$line_data,$element_hash); | ||
1401 : | arodri7 | 1.28 | $gd->add_line($line_data, $line_config); |
1402 : | mkubal | 1.12 | } |
1403 : | |||
1404 : | arodri7 | 1.28 | |
1405 : | $color = "2"; | ||
1406 : | mkubal | 1.12 | if($tmpred_score){ |
1407 : | arodri7 | 1.28 | my $line_data =[]; |
1408 : | my $line_config = { 'title' => 'Localization Evidence', | ||
1409 : | 'short_title' => 'Transmembrane', | ||
1410 : | 'basepair_offset' => '1' }; | ||
1411 : | |||
1412 : | |||
1413 : | mkubal | 1.12 | foreach my $tmpred (@tmpred_locations){ |
1414 : | my $descriptions = []; | ||
1415 : | my ($begin,$end) =split("-",$tmpred); | ||
1416 : | my $description_tmpred_score = {"title" => 'TMPRED score', | ||
1417 : | "value" => $tmpred_score}; | ||
1418 : | |||
1419 : | push(@$descriptions,$description_tmpred_score); | ||
1420 : | |||
1421 : | my $element_hash = { | ||
1422 : | "title" => "transmembrane location", | ||
1423 : | "start" => $begin + 1, | ||
1424 : | "end" => $end + 1, | ||
1425 : | "color"=> $color, | ||
1426 : | "zlayer" => '5', | ||
1427 : | "type" => 'smallbox', | ||
1428 : | "description" => $descriptions}; | ||
1429 : | |||
1430 : | push(@$line_data,$element_hash); | ||
1431 : | arodri7 | 1.28 | |
1432 : | mkubal | 1.12 | } |
1433 : | arodri7 | 1.28 | $gd->add_line($line_data, $line_config); |
1434 : | mkubal | 1.12 | } |
1435 : | |||
1436 : | mkubal | 1.30 | if((scalar(@phobius_tm_locations) > 0) || $phobius_signal_location){ |
1437 : | my $line_data =[]; | ||
1438 : | my $line_config = { 'title' => 'Localization Evidence', | ||
1439 : | 'short_title' => 'Phobius', | ||
1440 : | 'basepair_offset' => '1' }; | ||
1441 : | |||
1442 : | foreach my $tm_loc (@phobius_tm_locations){ | ||
1443 : | my $descriptions = []; | ||
1444 : | my $description_phobius_tm_locations = {"title" => 'Phobius TM Location', | ||
1445 : | "value" => $tm_loc}; | ||
1446 : | push(@$descriptions,$description_phobius_tm_locations); | ||
1447 : | |||
1448 : | my ($begin,$end) =split("-",$tm_loc); | ||
1449 : | |||
1450 : | my $element_hash = { | ||
1451 : | "title" => "phobius transmembrane location", | ||
1452 : | "start" => $begin + 1, | ||
1453 : | "end" => $end + 1, | ||
1454 : | "color"=> '6', | ||
1455 : | "zlayer" => '4', | ||
1456 : | "type" => 'bigbox', | ||
1457 : | "description" => $descriptions}; | ||
1458 : | |||
1459 : | push(@$line_data,$element_hash); | ||
1460 : | |||
1461 : | } | ||
1462 : | |||
1463 : | if($phobius_signal_location){ | ||
1464 : | my $descriptions = []; | ||
1465 : | my $description_phobius_signal_location = {"title" => 'Phobius Signal Location', | ||
1466 : | "value" => $phobius_signal_location}; | ||
1467 : | push(@$descriptions,$description_phobius_signal_location); | ||
1468 : | |||
1469 : | |||
1470 : | my ($begin,$end) =split("-",$phobius_signal_location); | ||
1471 : | my $element_hash = { | ||
1472 : | "title" => "phobius signal locations", | ||
1473 : | "start" => $begin + 1, | ||
1474 : | "end" => $end + 1, | ||
1475 : | "color"=> '1', | ||
1476 : | "zlayer" => '5', | ||
1477 : | "type" => 'box', | ||
1478 : | "description" => $descriptions}; | ||
1479 : | push(@$line_data,$element_hash); | ||
1480 : | } | ||
1481 : | |||
1482 : | $gd->add_line($line_data, $line_config); | ||
1483 : | } | ||
1484 : | |||
1485 : | |||
1486 : | arodri7 | 1.28 | $color = "1"; |
1487 : | mkubal | 1.12 | if($signal_peptide_score){ |
1488 : | arodri7 | 1.28 | my $line_data = []; |
1489 : | mkubal | 1.12 | my $descriptions = []; |
1490 : | arodri7 | 1.28 | |
1491 : | my $line_config = { 'title' => 'Localization Evidence', | ||
1492 : | 'short_title' => 'SignalP', | ||
1493 : | 'basepair_offset' => '1' }; | ||
1494 : | |||
1495 : | mkubal | 1.12 | my $description_signal_peptide_score = {"title" => 'signal peptide score', |
1496 : | "value" => $signal_peptide_score}; | ||
1497 : | |||
1498 : | push(@$descriptions,$description_signal_peptide_score); | ||
1499 : | |||
1500 : | my $description_cleavage_prob = {"title" => 'cleavage site probability', | ||
1501 : | "value" => $cleavage_prob}; | ||
1502 : | |||
1503 : | push(@$descriptions,$description_cleavage_prob); | ||
1504 : | |||
1505 : | my $element_hash = { | ||
1506 : | "title" => "SignalP", | ||
1507 : | "start" => $cleavage_loc_begin - 2, | ||
1508 : | arodri7 | 1.28 | "end" => $cleavage_loc_end + 1, |
1509 : | mkubal | 1.12 | "type" => 'bigbox', |
1510 : | "color"=> $color, | ||
1511 : | "zlayer" => '10', | ||
1512 : | "description" => $descriptions}; | ||
1513 : | |||
1514 : | push(@$line_data,$element_hash); | ||
1515 : | arodri7 | 1.28 | $gd->add_line($line_data, $line_config); |
1516 : | mkubal | 1.12 | } |
1517 : | |||
1518 : | return ($gd); | ||
1519 : | |||
1520 : | } | ||
1521 : | |||
1522 : | sub cleavage_loc { | ||
1523 : | my ($self) = @_; | ||
1524 : | |||
1525 : | return $self->{cleavage_loc}; | ||
1526 : | } | ||
1527 : | |||
1528 : | sub cleavage_prob { | ||
1529 : | my ($self) = @_; | ||
1530 : | |||
1531 : | return $self->{cleavage_prob}; | ||
1532 : | } | ||
1533 : | |||
1534 : | sub signal_peptide_score { | ||
1535 : | my ($self) = @_; | ||
1536 : | |||
1537 : | return $self->{signal_peptide_score}; | ||
1538 : | } | ||
1539 : | |||
1540 : | sub tmpred_score { | ||
1541 : | my ($self) = @_; | ||
1542 : | |||
1543 : | return $self->{tmpred_score}; | ||
1544 : | } | ||
1545 : | |||
1546 : | sub tmpred_locations { | ||
1547 : | my ($self) = @_; | ||
1548 : | |||
1549 : | return $self->{tmpred_locations}; | ||
1550 : | } | ||
1551 : | |||
1552 : | sub cello_location { | ||
1553 : | my ($self) = @_; | ||
1554 : | |||
1555 : | return $self->{cello_location}; | ||
1556 : | } | ||
1557 : | |||
1558 : | sub cello_score { | ||
1559 : | my ($self) = @_; | ||
1560 : | |||
1561 : | return $self->{cello_score}; | ||
1562 : | } | ||
1563 : | |||
1564 : | mkubal | 1.30 | sub phobius_signal_location { |
1565 : | my ($self) = @_; | ||
1566 : | return $self->{phobius_signal_location}; | ||
1567 : | } | ||
1568 : | |||
1569 : | sub phobius_tm_locations { | ||
1570 : | my ($self) = @_; | ||
1571 : | return $self->{phobius_tm_locations}; | ||
1572 : | } | ||
1573 : | |||
1574 : | |||
1575 : | mkubal | 1.12 | |
1576 : | ######################################### | ||
1577 : | ######################################### | ||
1578 : | arodri7 | 1.10 | package Observation::Sims; |
1579 : | |||
1580 : | use base qw(Observation); | ||
1581 : | |||
1582 : | sub new { | ||
1583 : | |||
1584 : | my ($class,$dataset) = @_; | ||
1585 : | my $self = $class->SUPER::new($dataset); | ||
1586 : | arodri7 | 1.11 | $self->{identity} = $dataset->{'identity'}; |
1587 : | arodri7 | 1.10 | $self->{acc} = $dataset->{'acc'}; |
1588 : | $self->{evalue} = $dataset->{'evalue'}; | ||
1589 : | arodri7 | 1.11 | $self->{qstart} = $dataset->{'qstart'}; |
1590 : | $self->{qstop} = $dataset->{'qstop'}; | ||
1591 : | $self->{hstart} = $dataset->{'hstart'}; | ||
1592 : | $self->{hstop} = $dataset->{'hstop'}; | ||
1593 : | $self->{database} = $dataset->{'database'}; | ||
1594 : | $self->{organism} = $dataset->{'organism'}; | ||
1595 : | $self->{function} = $dataset->{'function'}; | ||
1596 : | $self->{qlength} = $dataset->{'qlength'}; | ||
1597 : | $self->{hlength} = $dataset->{'hlength'}; | ||
1598 : | arodri7 | 1.10 | |
1599 : | bless($self,$class); | ||
1600 : | return $self; | ||
1601 : | } | ||
1602 : | |||
1603 : | arodri7 | 1.25 | =head3 display() |
1604 : | |||
1605 : | If available use the function specified here to display a graphical observation. | ||
1606 : | This code will display a graphical view of the similarities using the genome drawer object | ||
1607 : | |||
1608 : | =cut | ||
1609 : | |||
1610 : | sub display { | ||
1611 : | my ($self,$gd) = @_; | ||
1612 : | |||
1613 : | my $fig = new FIG; | ||
1614 : | my $peg = $self->acc; | ||
1615 : | |||
1616 : | my $organism = $self->organism; | ||
1617 : | arodri7 | 1.28 | my $genome = $fig->genome_of($peg); |
1618 : | my ($org_tax) = ($genome) =~ /(.*)\./; | ||
1619 : | arodri7 | 1.25 | my $function = $self->function; |
1620 : | my $abbrev_name = $fig->abbrev($organism); | ||
1621 : | my $align_start = $self->qstart; | ||
1622 : | my $align_stop = $self->qstop; | ||
1623 : | my $hit_start = $self->hstart; | ||
1624 : | my $hit_stop = $self->hstop; | ||
1625 : | |||
1626 : | arodri7 | 1.28 | my $tax_link = "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=" . $org_tax; |
1627 : | |||
1628 : | my $line_config = { 'title' => "$organism [$org_tax]", | ||
1629 : | arodri7 | 1.25 | 'short_title' => "$abbrev_name", |
1630 : | arodri7 | 1.28 | 'title_link' => '$tax_link', |
1631 : | arodri7 | 1.25 | 'basepair_offset' => '0' |
1632 : | }; | ||
1633 : | |||
1634 : | my $line_data = []; | ||
1635 : | |||
1636 : | my $element_hash; | ||
1637 : | my $links_list = []; | ||
1638 : | my $descriptions = []; | ||
1639 : | |||
1640 : | # get subsystem information | ||
1641 : | my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$peg; | ||
1642 : | |||
1643 : | my $link; | ||
1644 : | $link = {"link_title" => $peg, | ||
1645 : | "link" => $url_link}; | ||
1646 : | push(@$links_list,$link); | ||
1647 : | |||
1648 : | my @subsystems = $fig->peg_to_subsystems($peg); | ||
1649 : | foreach my $subsystem (@subsystems){ | ||
1650 : | my $link; | ||
1651 : | $link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem", | ||
1652 : | "link_title" => $subsystem}; | ||
1653 : | push(@$links_list,$link); | ||
1654 : | } | ||
1655 : | |||
1656 : | my $description_function; | ||
1657 : | $description_function = {"title" => "function", | ||
1658 : | "value" => $function}; | ||
1659 : | push(@$descriptions,$description_function); | ||
1660 : | |||
1661 : | arodri7 | 1.26 | my ($description_ss, $ss_string); |
1662 : | $ss_string = join (",", @subsystems); | ||
1663 : | arodri7 | 1.25 | $description_ss = {"title" => "subsystems", |
1664 : | "value" => $ss_string}; | ||
1665 : | push(@$descriptions,$description_ss); | ||
1666 : | |||
1667 : | my $description_loc; | ||
1668 : | $description_loc = {"title" => "location start", | ||
1669 : | "value" => $hit_start}; | ||
1670 : | push(@$descriptions, $description_loc); | ||
1671 : | |||
1672 : | $description_loc = {"title" => "location stop", | ||
1673 : | "value" => $hit_stop}; | ||
1674 : | push(@$descriptions, $description_loc); | ||
1675 : | |||
1676 : | my $evalue = $self->evalue; | ||
1677 : | while ($evalue =~ /-0/) | ||
1678 : | { | ||
1679 : | my ($chunk1, $chunk2) = split(/-/, $evalue); | ||
1680 : | $chunk2 = substr($chunk2,1); | ||
1681 : | $evalue = $chunk1 . "-" . $chunk2; | ||
1682 : | } | ||
1683 : | |||
1684 : | arodri7 | 1.26 | my $color = &color($evalue); |
1685 : | arodri7 | 1.25 | |
1686 : | my $description_eval = {"title" => "E-Value", | ||
1687 : | "value" => $evalue}; | ||
1688 : | push(@$descriptions, $description_eval); | ||
1689 : | |||
1690 : | my $identity = $self->identity; | ||
1691 : | my $description_identity = {"title" => "Identity", | ||
1692 : | "value" => $identity}; | ||
1693 : | push(@$descriptions, $description_identity); | ||
1694 : | |||
1695 : | $element_hash = { | ||
1696 : | "title" => $peg, | ||
1697 : | "start" => $align_start, | ||
1698 : | "end" => $align_stop, | ||
1699 : | "type"=> 'box', | ||
1700 : | "color"=> $color, | ||
1701 : | "zlayer" => "2", | ||
1702 : | "links_list" => $links_list, | ||
1703 : | "description" => $descriptions | ||
1704 : | }; | ||
1705 : | push(@$line_data,$element_hash); | ||
1706 : | $gd->add_line($line_data, $line_config); | ||
1707 : | |||
1708 : | return ($gd); | ||
1709 : | |||
1710 : | } | ||
1711 : | |||
1712 : | mkubal | 1.24 | =head3 display_table() |
1713 : | arodri7 | 1.10 | |
1714 : | If available use the function specified here to display the "raw" observation. | ||
1715 : | This code will display a table for the similarities protein | ||
1716 : | |||
1717 : | B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evidence. | ||
1718 : | |||
1719 : | =cut | ||
1720 : | |||
1721 : | mkubal | 1.24 | sub display_table { |
1722 : | arodri7 | 1.31 | my ($self,$dataset, $columns, $query_fid) = @_; |
1723 : | mkubal | 1.24 | |
1724 : | arodri7 | 1.10 | my $data = []; |
1725 : | my $count = 0; | ||
1726 : | my $content; | ||
1727 : | arodri7 | 1.11 | my $fig = new FIG; |
1728 : | mkubal | 1.24 | my $cgi = new CGI; |
1729 : | arodri7 | 1.28 | my @ids; |
1730 : | arodri7 | 1.10 | foreach my $thing (@$dataset) { |
1731 : | arodri7 | 1.28 | next if ($thing->class ne "SIM"); |
1732 : | push (@ids, $thing->acc); | ||
1733 : | } | ||
1734 : | |||
1735 : | arodri7 | 1.31 | my (%box_column, %subsystems_column, %evidence_column, %e_identical); |
1736 : | arodri7 | 1.29 | foreach my $col (@$columns){ |
1737 : | # get the column for the subsystems | ||
1738 : | if ($col eq "subsystem"){ | ||
1739 : | %subsystems_column = &get_subsystems_column(\@ids); | ||
1740 : | } | ||
1741 : | # get the column for the evidence codes | ||
1742 : | elsif ($col eq "evidence"){ | ||
1743 : | %evidence_column = &get_evidence_column(\@ids); | ||
1744 : | } | ||
1745 : | arodri7 | 1.33 | # get the column for pfam_domain |
1746 : | elsif ($col eq "pfam_domains"){ | ||
1747 : | %pfam_column = &get_pfam_column(\@ids); | ||
1748 : | } | ||
1749 : | arodri7 | 1.28 | } |
1750 : | |||
1751 : | arodri7 | 1.31 | my %e_identical = &get_essentially_identical($query_fid); |
1752 : | arodri7 | 1.33 | my $all_aliases = $fig->feature_aliases_bulk(\@ids); |
1753 : | arodri7 | 1.31 | |
1754 : | arodri7 | 1.28 | foreach my $thing (@$dataset) { |
1755 : | next if ($thing->class ne "SIM"); | ||
1756 : | arodri7 | 1.10 | my $single_domain = []; |
1757 : | $count++; | ||
1758 : | |||
1759 : | arodri7 | 1.11 | my $id = $thing->acc; |
1760 : | |||
1761 : | my $iden = $thing->identity; | ||
1762 : | my $ln1 = $thing->qlength; | ||
1763 : | my $ln2 = $thing->hlength; | ||
1764 : | my $b1 = $thing->qstart; | ||
1765 : | my $e1 = $thing->qstop; | ||
1766 : | my $b2 = $thing->hstart; | ||
1767 : | my $e2 = $thing->hstop; | ||
1768 : | my $d1 = abs($e1 - $b1) + 1; | ||
1769 : | my $d2 = abs($e2 - $b2) + 1; | ||
1770 : | my $reg1 = "$b1-$e1 (<b>$d1/$ln1</b>)"; | ||
1771 : | my $reg2 = "$b2-$e2 (<b>$d2/$ln2</b>)"; | ||
1772 : | |||
1773 : | arodri7 | 1.29 | # checkbox column |
1774 : | my $field_name = "tables_" . $id; | ||
1775 : | my $pair_name = "visual_" . $id; | ||
1776 : | my $box_col = qq(<input type=checkbox name=seq value="$id" id="$field_name" onClick="VisualCheckPair('$field_name', '$pair_name');">); | ||
1777 : | arodri7 | 1.31 | |
1778 : | # get the linked fig id | ||
1779 : | my $fig_col; | ||
1780 : | if (defined ($e_identical{$id})){ | ||
1781 : | $fig_col = &HTML::set_prot_links($cgi,$id) . "*"; | ||
1782 : | } | ||
1783 : | else{ | ||
1784 : | $fig_col = &HTML::set_prot_links($cgi,$id); | ||
1785 : | arodri7 | 1.28 | } |
1786 : | |||
1787 : | arodri7 | 1.29 | push(@$single_domain,$box_col); # permanent column |
1788 : | arodri7 | 1.31 | push(@$single_domain,$fig_col); # permanent column |
1789 : | arodri7 | 1.29 | push(@$single_domain,$thing->evalue); # permanent column |
1790 : | push(@$single_domain,"$iden\%"); # permanent column | ||
1791 : | push(@$single_domain,$reg1); # permanent column | ||
1792 : | push(@$single_domain,$reg2); # permanent column | ||
1793 : | push(@$single_domain,$thing->organism); # permanent column | ||
1794 : | push(@$single_domain,$thing->function); # permanent column | ||
1795 : | arodri7 | 1.32 | foreach my $col (@$columns){ |
1796 : | (push(@$single_domain,$subsystems_column{$id}) && (next)) if ($col eq "subsystem"); | ||
1797 : | (push(@$single_domain,$evidence_column{$id}) && (next)) if ($col eq "evidence"); | ||
1798 : | arodri7 | 1.33 | (push(@$single_domain,$pfam_column{$id}) && (next)) if ($col eq "pfam_domains"); |
1799 : | # (push(@$single_domain,@{$$all_aliases{$id}}[0]) && (next)) if ($col eq "ncbi_id"); | ||
1800 : | (push(@$single_domain,&get_prefer($thing->acc, 'NCBI', $all_aliases)) && (next)) if ($col eq "ncbi_id"); | ||
1801 : | (push(@$single_domain,&get_prefer($thing->acc, 'RefSeq', $all_aliases)) && (next)) if ($col eq "refseq_id"); | ||
1802 : | (push(@$single_domain,&get_prefer($thing->acc, 'SwissProt', $all_aliases)) && (next)) if ($col eq "swissprot_id"); | ||
1803 : | (push(@$single_domain,&get_prefer($thing->acc, 'UniProt', $all_aliases)) && (next)) if ($col eq "uniprot_id"); | ||
1804 : | (push(@$single_domain,&get_prefer($thing->acc, 'TIGR', $all_aliases)) && (next)) if ($col eq "tigr_id"); | ||
1805 : | (push(@$single_domain,&get_prefer($thing->acc, 'PIR', $all_aliases)) && (next)) if ($col eq "pir_id"); | ||
1806 : | (push(@$single_domain,&get_prefer($thing->acc, 'KEGG', $all_aliases)) && (next)) if ($col eq "kegg_id"); | ||
1807 : | (push(@$single_domain,&get_prefer($thing->acc, 'TrEMBL', $all_aliases)) && (next)) if ($col eq "trembl_id"); | ||
1808 : | (push(@$single_domain,&get_prefer($thing->acc, 'ASAP', $all_aliases)) && (next)) if ($col eq "asap_id"); | ||
1809 : | (push(@$single_domain,&get_prefer($thing->acc, 'JGI', $all_aliases)) && (next)) if ($col eq "jgi_id"); | ||
1810 : | arodri7 | 1.32 | } |
1811 : | arodri7 | 1.10 | push(@$data,$single_domain); |
1812 : | } | ||
1813 : | |||
1814 : | arodri7 | 1.26 | if ($count >0 ){ |
1815 : | $content = $data; | ||
1816 : | arodri7 | 1.10 | } |
1817 : | arodri7 | 1.26 | else{ |
1818 : | arodri7 | 1.10 | $content = "<p>This PEG does not have any similarities</p>"; |
1819 : | } | ||
1820 : | return ($content); | ||
1821 : | } | ||
1822 : | arodri7 | 1.11 | |
1823 : | arodri7 | 1.29 | sub get_box_column{ |
1824 : | my ($ids) = @_; | ||
1825 : | my %column; | ||
1826 : | foreach my $id (@$ids){ | ||
1827 : | my $field_name = "tables_" . $id; | ||
1828 : | my $pair_name = "visual_" . $id; | ||
1829 : | $column{$id} = qq(<input type=checkbox name=seq value="$id" id="$field_name" onClick="VisualCheckPair('$field_name', '$pair_name');">); | ||
1830 : | } | ||
1831 : | return (%column); | ||
1832 : | } | ||
1833 : | |||
1834 : | sub get_subsystems_column{ | ||
1835 : | my ($ids) = @_; | ||
1836 : | |||
1837 : | my $fig = new FIG; | ||
1838 : | my $cgi = new CGI; | ||
1839 : | my %in_subs = $fig->subsystems_for_pegs($ids); | ||
1840 : | my %column; | ||
1841 : | foreach my $id (@$ids){ | ||
1842 : | arodri7 | 1.32 | my @in_sub = @{$in_subs{$id}} if (defined $in_subs{$id}); |
1843 : | my @subsystems; | ||
1844 : | |||
1845 : | arodri7 | 1.29 | if (@in_sub > 0) { |
1846 : | arodri7 | 1.32 | my $count = 1; |
1847 : | foreach my $array(@in_sub){ | ||
1848 : | push (@subsystems, $count . ". " . $$array[0]); | ||
1849 : | $count++; | ||
1850 : | } | ||
1851 : | my $in_sub_line = join ("<br>", @subsystems); | ||
1852 : | $column{$id} = $in_sub_line; | ||
1853 : | arodri7 | 1.29 | } else { |
1854 : | $column{$id} = " "; | ||
1855 : | } | ||
1856 : | } | ||
1857 : | return (%column); | ||
1858 : | } | ||
1859 : | |||
1860 : | arodri7 | 1.31 | sub get_essentially_identical{ |
1861 : | my ($fid) = @_; | ||
1862 : | my $fig = new FIG; | ||
1863 : | |||
1864 : | my %id_list; | ||
1865 : | my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid); | ||
1866 : | |||
1867 : | foreach my $id (@maps_to) { | ||
1868 : | if (($id ne $fid) && ($fig->function_of($id))) { | ||
1869 : | $id_list{$id} = 1; | ||
1870 : | } | ||
1871 : | } | ||
1872 : | return(%id_list); | ||
1873 : | } | ||
1874 : | |||
1875 : | |||
1876 : | arodri7 | 1.29 | sub get_evidence_column{ |
1877 : | my ($ids) = @_; | ||
1878 : | my $fig = new FIG; | ||
1879 : | my $cgi = new CGI; | ||
1880 : | my (%column, %code_attributes); | ||
1881 : | |||
1882 : | my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($ids); | ||
1883 : | foreach my $key (@codes){ | ||
1884 : | push (@{$code_attributes{$$key[0]}}, $key); | ||
1885 : | } | ||
1886 : | |||
1887 : | foreach my $id (@$ids){ | ||
1888 : | # add evidence code with tool tip | ||
1889 : | my $ev_codes=" "; | ||
1890 : | my @ev_codes = ""; | ||
1891 : | |||
1892 : | if ($id =~ /^fig\|\d+\.\d+\.peg\.\d+$/) { | ||
1893 : | my @codes; | ||
1894 : | @codes = @{$code_attributes{$id}} if (defined @{$code_attributes{$id}}); | ||
1895 : | @ev_codes = (); | ||
1896 : | foreach my $code (@codes) { | ||
1897 : | my $pretty_code = $code->[2]; | ||
1898 : | if ($pretty_code =~ /;/) { | ||
1899 : | my ($cd, $ss) = split(";", $code->[2]); | ||
1900 : | $ss =~ s/_/ /g; | ||
1901 : | $pretty_code = $cd;# . " in " . $ss; | ||
1902 : | } | ||
1903 : | push(@ev_codes, $pretty_code); | ||
1904 : | } | ||
1905 : | } | ||
1906 : | |||
1907 : | if (scalar(@ev_codes) && $ev_codes[0]) { | ||
1908 : | my $ev_code_help=join("<br />", map {&HTML::evidence_codes_explain($_)} @ev_codes); | ||
1909 : | $ev_codes = $cgi->a( | ||
1910 : | { | ||
1911 : | id=>"evidence_codes", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Evidence Codes', '$ev_code_help', ''); this.tooltip.addHandler(); return false;"}, join("<br />", @ev_codes)); | ||
1912 : | } | ||
1913 : | $column{$id}=$ev_codes; | ||
1914 : | } | ||
1915 : | return (%column); | ||
1916 : | } | ||
1917 : | |||
1918 : | arodri7 | 1.33 | sub get_pfam_column{ |
1919 : | my ($ids) = @_; | ||
1920 : | my $fig = new FIG; | ||
1921 : | my $cgi = new CGI; | ||
1922 : | my (%column, %code_attributes); | ||
1923 : | my $dbmaster = DBMaster->new(-database =>'Ontology'); | ||
1924 : | |||
1925 : | my @codes = grep { $_->[1] =~ /^PFAM/i } $fig->get_attributes($ids); | ||
1926 : | foreach my $key (@codes){ | ||
1927 : | push (@{$code_attributes{$$key[0]}}, $$key[1]); | ||
1928 : | } | ||
1929 : | |||
1930 : | foreach my $id (@$ids){ | ||
1931 : | # add evidence code with tool tip | ||
1932 : | my $pfam_codes=" "; | ||
1933 : | my @pfam_codes = ""; | ||
1934 : | my %description_codes; | ||
1935 : | |||
1936 : | if ($id =~ /^fig\|\d+\.\d+\.peg\.\d+$/) { | ||
1937 : | my @codes; | ||
1938 : | @codes = @{$code_attributes{$id}} if (defined @{$code_attributes{$id}}); | ||
1939 : | @pfam_codes = (); | ||
1940 : | foreach my $code (@codes) { | ||
1941 : | my @parts = split("::",$code); | ||
1942 : | my $pfam_link = "<a href=http://www.sanger.ac.uk//cgi-bin/Pfam/getacc?" . $parts[1] . ">$parts[1]</a>"; | ||
1943 : | if (defined ($description_codes{$parts[1]})){ | ||
1944 : | push(@pfam_codes, "$description_codes{$parts[1]} ($parts[1])"); | ||
1945 : | } | ||
1946 : | else { | ||
1947 : | my $description = $dbmaster->pfam->get_objects( { 'id' => $parts[1] } ); | ||
1948 : | $description_codes{$parts[1]} = ${$$description[0]}{term}; | ||
1949 : | push(@pfam_codes, "${$$description[0]}{term} ($pfam_link)"); | ||
1950 : | } | ||
1951 : | } | ||
1952 : | } | ||
1953 : | |||
1954 : | $column{$id}=join("<br><br>", @pfam_codes); | ||
1955 : | } | ||
1956 : | return (%column); | ||
1957 : | |||
1958 : | } | ||
1959 : | mkubal | 1.12 | |
1960 : | arodri7 | 1.28 | sub get_prefer { |
1961 : | arodri7 | 1.33 | my ($fid, $db, $all_aliases) = @_; |
1962 : | arodri7 | 1.28 | my $fig = new FIG; |
1963 : | arodri7 | 1.31 | my $cgi = new CGI; |
1964 : | |||
1965 : | arodri7 | 1.33 | foreach my $alias (@{$$all_aliases{$fid}}){ |
1966 : | arodri7 | 1.28 | my $id_db = &Observation::get_database($alias); |
1967 : | if ($id_db eq $db){ | ||
1968 : | arodri7 | 1.31 | my $acc_col .= &HTML::set_prot_links($cgi,$alias); |
1969 : | return ($acc_col); | ||
1970 : | arodri7 | 1.28 | } |
1971 : | } | ||
1972 : | arodri7 | 1.31 | return (" "); |
1973 : | arodri7 | 1.28 | } |
1974 : | |||
1975 : | arodri7 | 1.33 | sub html_enc { $_ = $_[0]; s/\&/&/g; s/\>/>/g; s/\</</g; $_ } |
1976 : | |||
1977 : | arodri7 | 1.26 | sub color { |
1978 : | my ($evalue) = @_; | ||
1979 : | |||
1980 : | my $color; | ||
1981 : | arodri7 | 1.28 | if ($evalue <= 1e-170){ |
1982 : | $color = 51; | ||
1983 : | } | ||
1984 : | elsif (($evalue <= 1e-120) && ($evalue > 1e-170)){ | ||
1985 : | $color = 52; | ||
1986 : | } | ||
1987 : | elsif (($evalue <= 1e-90) && ($evalue > 1e-120)){ | ||
1988 : | $color = 53; | ||
1989 : | } | ||
1990 : | elsif (($evalue <= 1e-70) && ($evalue > 1e-90)){ | ||
1991 : | $color = 54; | ||
1992 : | arodri7 | 1.26 | } |
1993 : | arodri7 | 1.28 | elsif (($evalue <= 1e-40) && ($evalue > 1e-70)){ |
1994 : | $color = 55; | ||
1995 : | arodri7 | 1.26 | } |
1996 : | arodri7 | 1.28 | elsif (($evalue <= 1e-20) && ($evalue > 1e-40)){ |
1997 : | $color = 56; | ||
1998 : | arodri7 | 1.26 | } |
1999 : | arodri7 | 1.28 | elsif (($evalue <= 1e-5) && ($evalue > 1e-20)){ |
2000 : | $color = 57; | ||
2001 : | arodri7 | 1.26 | } |
2002 : | arodri7 | 1.28 | elsif (($evalue <= 1) && ($evalue > 1e-5)){ |
2003 : | $color = 58; | ||
2004 : | } | ||
2005 : | elsif (($evalue <= 10) && ($evalue > 1)){ | ||
2006 : | $color = 59; | ||
2007 : | arodri7 | 1.26 | } |
2008 : | else{ | ||
2009 : | arodri7 | 1.28 | $color = 60; |
2010 : | arodri7 | 1.26 | } |
2011 : | arodri7 | 1.28 | |
2012 : | |||
2013 : | arodri7 | 1.26 | return ($color); |
2014 : | } | ||
2015 : | arodri7 | 1.13 | |
2016 : | |||
2017 : | ############################ | ||
2018 : | package Observation::Cluster; | ||
2019 : | |||
2020 : | use base qw(Observation); | ||
2021 : | |||
2022 : | sub new { | ||
2023 : | |||
2024 : | my ($class,$dataset) = @_; | ||
2025 : | my $self = $class->SUPER::new($dataset); | ||
2026 : | mkubal | 1.24 | $self->{context} = $dataset->{'context'}; |
2027 : | arodri7 | 1.13 | bless($self,$class); |
2028 : | return $self; | ||
2029 : | } | ||
2030 : | |||
2031 : | sub display { | ||
2032 : | mkubal | 1.24 | my ($self,$gd) = @_; |
2033 : | |||
2034 : | my $fid = $self->fig_id; | ||
2035 : | my $compare_or_coupling = $self->context; | ||
2036 : | my $gd_window_size = $gd->window_size; | ||
2037 : | arodri7 | 1.13 | my $fig = new FIG; |
2038 : | mkubal | 1.14 | my $all_regions = []; |
2039 : | arodri7 | 1.13 | |
2040 : | #get the organism genome | ||
2041 : | mkubal | 1.14 | my $target_genome = $fig->genome_of($fid); |
2042 : | arodri7 | 1.13 | |
2043 : | # get location of the gene | ||
2044 : | my $data = $fig->feature_location($fid); | ||
2045 : | my ($contig, $beg, $end); | ||
2046 : | arodri7 | 1.22 | my %reverse_flag; |
2047 : | arodri7 | 1.13 | |
2048 : | if ($data =~ /(.*)_(\d+)_(\d+)$/){ | ||
2049 : | $contig = $1; | ||
2050 : | $beg = $2; | ||
2051 : | $end = $3; | ||
2052 : | } | ||
2053 : | |||
2054 : | arodri7 | 1.22 | my $offset; |
2055 : | arodri7 | 1.13 | my ($region_start, $region_end); |
2056 : | if ($beg < $end) | ||
2057 : | { | ||
2058 : | $region_start = $beg - 4000; | ||
2059 : | $region_end = $end+4000; | ||
2060 : | arodri7 | 1.22 | $offset = ($2+(($3-$2)/2))-($gd_window_size/2); |
2061 : | arodri7 | 1.13 | } |
2062 : | else | ||
2063 : | { | ||
2064 : | arodri7 | 1.21 | $region_start = $end-4000; |
2065 : | $region_end = $beg+4000; | ||
2066 : | arodri7 | 1.22 | $offset = ($3+(($2-$3)/2))-($gd_window_size/2); |
2067 : | arodri7 | 1.25 | $reverse_flag{$target_genome} = $fid; |
2068 : | arodri7 | 1.21 | } |
2069 : | arodri7 | 1.13 | |
2070 : | # call genes in region | ||
2071 : | arodri7 | 1.16 | my ($target_gene_features, $reg_beg, $reg_end) = $fig->genes_in_region($target_genome, $contig, $region_start, $region_end); |
2072 : | mkubal | 1.14 | push(@$all_regions,$target_gene_features); |
2073 : | arodri7 | 1.16 | my (@start_array_region); |
2074 : | arodri7 | 1.22 | push (@start_array_region, $offset); |
2075 : | mkubal | 1.14 | |
2076 : | my %all_genes; | ||
2077 : | my %all_genomes; | ||
2078 : | arodri7 | 1.25 | foreach my $feature (@$target_gene_features){ $all_genes{$feature} = $fid;} |
2079 : | arodri7 | 1.16 | |
2080 : | mkubal | 1.24 | if ($compare_or_coupling eq "diverse") |
2081 : | arodri7 | 1.25 | { |
2082 : | arodri7 | 1.21 | my @coup = grep { $_->[1]} $fig->coupling_and_evidence($fid,5000,1e-10,4,1); |
2083 : | |||
2084 : | my $coup_count = 0; | ||
2085 : | |||
2086 : | foreach my $pair (@{$coup[0]->[2]}) { | ||
2087 : | # last if ($coup_count > 10); | ||
2088 : | my ($peg1,$peg2) = @$pair; | ||
2089 : | arodri7 | 1.22 | |
2090 : | my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); | ||
2091 : | $pair_genome = $fig->genome_of($peg1); | ||
2092 : | arodri7 | 1.21 | |
2093 : | my $location = $fig->feature_location($peg1); | ||
2094 : | if($location =~/(.*)_(\d+)_(\d+)$/){ | ||
2095 : | $pair_contig = $1; | ||
2096 : | $pair_beg = $2; | ||
2097 : | $pair_end = $3; | ||
2098 : | if ($pair_beg < $pair_end) | ||
2099 : | { | ||
2100 : | $pair_region_start = $pair_beg - 4000; | ||
2101 : | $pair_region_stop = $pair_end+4000; | ||
2102 : | arodri7 | 1.22 | $offset = ($2+(($3-$2)/2))-($gd_window_size/2); |
2103 : | arodri7 | 1.21 | } |
2104 : | else | ||
2105 : | { | ||
2106 : | $pair_region_start = $pair_end-4000; | ||
2107 : | $pair_region_stop = $pair_beg+4000; | ||
2108 : | arodri7 | 1.22 | $offset = ($3+(($2-$3)/2))-($gd_window_size/2); |
2109 : | arodri7 | 1.25 | $reverse_flag{$pair_genome} = $peg1; |
2110 : | arodri7 | 1.21 | } |
2111 : | |||
2112 : | arodri7 | 1.22 | push (@start_array_region, $offset); |
2113 : | arodri7 | 1.21 | |
2114 : | $all_genomes{$pair_genome} = 1; | ||
2115 : | my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); | ||
2116 : | push(@$all_regions,$pair_features); | ||
2117 : | arodri7 | 1.25 | foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = $peg1;} |
2118 : | arodri7 | 1.21 | } |
2119 : | $coup_count++; | ||
2120 : | } | ||
2121 : | } | ||
2122 : | arodri7 | 1.16 | |
2123 : | mkubal | 1.24 | elsif ($compare_or_coupling eq "close") |
2124 : | arodri7 | 1.21 | { |
2125 : | # make a hash of genomes that are phylogenetically close | ||
2126 : | #my $close_threshold = ".26"; | ||
2127 : | #my @genomes = $fig->genomes('complete'); | ||
2128 : | #my %close_genomes = (); | ||
2129 : | #foreach my $compared_genome (@genomes) | ||
2130 : | #{ | ||
2131 : | # my $dist = $fig->crude_estimate_of_distance($target_genome,$compared_genome); | ||
2132 : | # #$close_genomes{$compared_genome} = $dist; | ||
2133 : | # if ($dist <= $close_threshold) | ||
2134 : | # { | ||
2135 : | # $all_genomes{$compared_genome} = 1; | ||
2136 : | # } | ||
2137 : | #} | ||
2138 : | $all_genomes{"216592.1"} = 1; | ||
2139 : | $all_genomes{"79967.1"} = 1; | ||
2140 : | $all_genomes{"199310.1"} = 1; | ||
2141 : | $all_genomes{"216593.1"} = 1; | ||
2142 : | $all_genomes{"155864.1"} = 1; | ||
2143 : | $all_genomes{"83334.1"} = 1; | ||
2144 : | $all_genomes{"316407.3"} = 1; | ||
2145 : | |||
2146 : | foreach my $comp_genome (keys %all_genomes){ | ||
2147 : | my $return = $fig->bbh_list($comp_genome,[$fid]); | ||
2148 : | my $feature_list = $return->{$fid}; | ||
2149 : | foreach my $peg1 (@$feature_list){ | ||
2150 : | my $location = $fig->feature_location($peg1); | ||
2151 : | my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); | ||
2152 : | arodri7 | 1.22 | $pair_genome = $fig->genome_of($peg1); |
2153 : | |||
2154 : | arodri7 | 1.21 | if($location =~/(.*)_(\d+)_(\d+)$/){ |
2155 : | $pair_contig = $1; | ||
2156 : | $pair_beg = $2; | ||
2157 : | $pair_end = $3; | ||
2158 : | if ($pair_beg < $pair_end) | ||
2159 : | { | ||
2160 : | $pair_region_start = $pair_beg - 4000; | ||
2161 : | $pair_region_stop = $pair_end + 4000; | ||
2162 : | arodri7 | 1.22 | $offset = ($2+(($3-$2)/2))-($gd_window_size/2); |
2163 : | arodri7 | 1.21 | } |
2164 : | else | ||
2165 : | { | ||
2166 : | $pair_region_start = $pair_end-4000; | ||
2167 : | $pair_region_stop = $pair_beg+4000; | ||
2168 : | arodri7 | 1.22 | $offset = ($3+(($2-$3)/2))-($gd_window_size/2); |
2169 : | arodri7 | 1.25 | $reverse_flag{$pair_genome} = $peg1; |
2170 : | arodri7 | 1.21 | } |
2171 : | |||
2172 : | arodri7 | 1.22 | push (@start_array_region, $offset); |
2173 : | arodri7 | 1.21 | $all_genomes{$pair_genome} = 1; |
2174 : | my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); | ||
2175 : | push(@$all_regions,$pair_features); | ||
2176 : | arodri7 | 1.25 | foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = $peg1;} |
2177 : | arodri7 | 1.21 | } |
2178 : | mkubal | 1.14 | } |
2179 : | arodri7 | 1.16 | } |
2180 : | mkubal | 1.14 | } |
2181 : | |||
2182 : | arodri7 | 1.21 | # get the PCH to each of the genes |
2183 : | my $pch_sets = []; | ||
2184 : | my %pch_already; | ||
2185 : | foreach my $gene_peg (keys %all_genes) | ||
2186 : | { | ||
2187 : | arodri7 | 1.32 | if ($pch_already{$gene_peg}){(next);}; |
2188 : | arodri7 | 1.21 | my $gene_set = [$gene_peg]; |
2189 : | foreach my $pch_peg ($fig->in_pch_pin_with($gene_peg)) { | ||
2190 : | $pch_peg =~ s/,.*$//; | ||
2191 : | my $pch_genome = $fig->genome_of($pch_peg); | ||
2192 : | if ( ($gene_peg ne $pch_peg) && ($all_genomes{$pch_genome})) { | ||
2193 : | push(@$gene_set,$pch_peg); | ||
2194 : | $pch_already{$pch_peg}=1; | ||
2195 : | mkubal | 1.14 | } |
2196 : | arodri7 | 1.21 | $pch_already{$gene_peg}=1; |
2197 : | mkubal | 1.14 | } |
2198 : | arodri7 | 1.21 | push(@$pch_sets,$gene_set); |
2199 : | mkubal | 1.14 | } |
2200 : | arodri7 | 1.21 | |
2201 : | #create a rank of the pch's | ||
2202 : | my %pch_set_rank; | ||
2203 : | mkubal | 1.14 | my $order = 0; |
2204 : | arodri7 | 1.21 | foreach my $set (@$pch_sets){ |
2205 : | mkubal | 1.14 | my $count = scalar(@$set); |
2206 : | arodri7 | 1.21 | $pch_set_rank{$order} = $count; |
2207 : | mkubal | 1.14 | $order++; |
2208 : | } | ||
2209 : | arodri7 | 1.21 | |
2210 : | mkubal | 1.14 | my %peg_rank; |
2211 : | my $counter = 1; | ||
2212 : | arodri7 | 1.21 | foreach my $pch_order (sort {$pch_set_rank{$b} <=> $pch_set_rank{$a}} keys %pch_set_rank){ |
2213 : | my $good_set = @$pch_sets[$pch_order]; | ||
2214 : | arodri7 | 1.18 | my $flag_set = 0; |
2215 : | if (scalar (@$good_set) > 1) | ||
2216 : | { | ||
2217 : | foreach my $peg (@$good_set){ | ||
2218 : | if ((!$peg_rank{$peg})){ | ||
2219 : | $peg_rank{$peg} = $counter; | ||
2220 : | $flag_set = 1; | ||
2221 : | } | ||
2222 : | } | ||
2223 : | $counter++ if ($flag_set == 1); | ||
2224 : | } | ||
2225 : | else | ||
2226 : | { | ||
2227 : | foreach my $peg (@$good_set){ | ||
2228 : | arodri7 | 1.26 | $peg_rank{$peg} = "20"; |
2229 : | mkubal | 1.17 | } |
2230 : | mkubal | 1.14 | } |
2231 : | } | ||
2232 : | arodri7 | 1.21 | |
2233 : | |||
2234 : | # my $bbh_sets = []; | ||
2235 : | # my %already; | ||
2236 : | # foreach my $gene_key (keys(%all_genes)){ | ||
2237 : | arodri7 | 1.32 | # if($already{$gene_key}){(next);} |
2238 : | arodri7 | 1.21 | # my $gene_set = [$gene_key]; |
2239 : | # | ||
2240 : | # my $gene_key_genome = $fig->genome_of($gene_key); | ||
2241 : | # | ||
2242 : | # foreach my $genome_key (keys(%all_genomes)){ | ||
2243 : | arodri7 | 1.32 | # #(next) if ($gene_key_genome eq $genome_key); |
2244 : | arodri7 | 1.21 | # my $return = $fig->bbh_list($genome_key,[$gene_key]); |
2245 : | # | ||
2246 : | # my $feature_list = $return->{$gene_key}; | ||
2247 : | # foreach my $fl (@$feature_list){ | ||
2248 : | # push(@$gene_set,$fl); | ||
2249 : | # } | ||
2250 : | # } | ||
2251 : | # $already{$gene_key} = 1; | ||
2252 : | # push(@$bbh_sets,$gene_set); | ||
2253 : | # } | ||
2254 : | # | ||
2255 : | # my %bbh_set_rank; | ||
2256 : | # my $order = 0; | ||
2257 : | # foreach my $set (@$bbh_sets){ | ||
2258 : | # my $count = scalar(@$set); | ||
2259 : | # $bbh_set_rank{$order} = $count; | ||
2260 : | # $order++; | ||
2261 : | # } | ||
2262 : | # | ||
2263 : | # my %peg_rank; | ||
2264 : | # my $counter = 1; | ||
2265 : | # foreach my $bbh_order (sort {$bbh_set_rank{$b} <=> $bbh_set_rank{$a}} keys %bbh_set_rank){ | ||
2266 : | # my $good_set = @$bbh_sets[$bbh_order]; | ||
2267 : | # my $flag_set = 0; | ||
2268 : | # if (scalar (@$good_set) > 1) | ||
2269 : | # { | ||
2270 : | # foreach my $peg (@$good_set){ | ||
2271 : | # if ((!$peg_rank{$peg})){ | ||
2272 : | # $peg_rank{$peg} = $counter; | ||
2273 : | # $flag_set = 1; | ||
2274 : | # } | ||
2275 : | # } | ||
2276 : | # $counter++ if ($flag_set == 1); | ||
2277 : | # } | ||
2278 : | # else | ||
2279 : | # { | ||
2280 : | # foreach my $peg (@$good_set){ | ||
2281 : | arodri7 | 1.26 | # $peg_rank{$peg} = "20"; |
2282 : | arodri7 | 1.21 | # } |
2283 : | # } | ||
2284 : | # } | ||
2285 : | arodri7 | 1.18 | |
2286 : | mkubal | 1.14 | foreach my $region (@$all_regions){ |
2287 : | my $sample_peg = @$region[0]; | ||
2288 : | my $region_genome = $fig->genome_of($sample_peg); | ||
2289 : | my $region_gs = $fig->genus_species($region_genome); | ||
2290 : | arodri7 | 1.18 | my $abbrev_name = $fig->abbrev($region_gs); |
2291 : | arodri7 | 1.16 | my $line_config = { 'title' => $region_gs, |
2292 : | arodri7 | 1.18 | 'short_title' => $abbrev_name, |
2293 : | arodri7 | 1.16 | 'basepair_offset' => '0' |
2294 : | }; | ||
2295 : | |||
2296 : | arodri7 | 1.22 | my $offsetting = shift @start_array_region; |
2297 : | arodri7 | 1.16 | |
2298 : | arodri7 | 1.25 | my $second_line_config = { 'title' => "$region_gs", |
2299 : | 'short_title' => "", | ||
2300 : | 'basepair_offset' => '0' | ||
2301 : | }; | ||
2302 : | |||
2303 : | mkubal | 1.14 | my $line_data = []; |
2304 : | arodri7 | 1.25 | my $second_line_data = []; |
2305 : | |||
2306 : | # initialize variables to check for overlap in genes | ||
2307 : | my ($prev_start, $prev_stop, $prev_fig, $second_line_flag); | ||
2308 : | my $major_line_flag = 0; | ||
2309 : | my $prev_second_flag = 0; | ||
2310 : | |||
2311 : | arodri7 | 1.16 | foreach my $fid1 (@$region){ |
2312 : | arodri7 | 1.25 | $second_line_flag = 0; |
2313 : | mkubal | 1.14 | my $element_hash; |
2314 : | my $links_list = []; | ||
2315 : | my $descriptions = []; | ||
2316 : | |||
2317 : | arodri7 | 1.16 | my $color = $peg_rank{$fid1}; |
2318 : | arodri7 | 1.26 | |
2319 : | arodri7 | 1.18 | # get subsystem information |
2320 : | my $function = $fig->function_of($fid1); | ||
2321 : | my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$fid1; | ||
2322 : | |||
2323 : | my $link; | ||
2324 : | $link = {"link_title" => $fid1, | ||
2325 : | "link" => $url_link}; | ||
2326 : | push(@$links_list,$link); | ||
2327 : | |||
2328 : | my @subsystems = $fig->peg_to_subsystems($fid1); | ||
2329 : | foreach my $subsystem (@subsystems){ | ||
2330 : | my $link; | ||
2331 : | $link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem", | ||
2332 : | "link_title" => $subsystem}; | ||
2333 : | push(@$links_list,$link); | ||
2334 : | } | ||
2335 : | |||
2336 : | my $description_function; | ||
2337 : | $description_function = {"title" => "function", | ||
2338 : | "value" => $function}; | ||
2339 : | push(@$descriptions,$description_function); | ||
2340 : | |||
2341 : | my $description_ss; | ||
2342 : | my $ss_string = join (",", @subsystems); | ||
2343 : | $description_ss = {"title" => "subsystems", | ||
2344 : | "value" => $ss_string}; | ||
2345 : | push(@$descriptions,$description_ss); | ||
2346 : | |||
2347 : | arodri7 | 1.16 | |
2348 : | my $fid_location = $fig->feature_location($fid1); | ||
2349 : | mkubal | 1.14 | if($fid_location =~/(.*)_(\d+)_(\d+)$/){ |
2350 : | my($start,$stop); | ||
2351 : | arodri7 | 1.22 | $start = $2 - $offsetting; |
2352 : | $stop = $3 - $offsetting; | ||
2353 : | arodri7 | 1.25 | |
2354 : | if ( (($prev_start) && ($prev_stop) ) && | ||
2355 : | ( ($start < $prev_start) || ($start < $prev_stop) || | ||
2356 : | ($stop < $prev_start) || ($stop < $prev_stop) )){ | ||
2357 : | if (($second_line_flag == 0) && ($prev_second_flag == 0)) { | ||
2358 : | $second_line_flag = 1; | ||
2359 : | $major_line_flag = 1; | ||
2360 : | } | ||
2361 : | } | ||
2362 : | $prev_start = $start; | ||
2363 : | $prev_stop = $stop; | ||
2364 : | $prev_fig = $fid1; | ||
2365 : | |||
2366 : | if ((defined($reverse_flag{$region_genome})) && ($reverse_flag{$region_genome} eq $all_genes{$fid1})){ | ||
2367 : | arodri7 | 1.22 | $start = $gd_window_size - $start; |
2368 : | $stop = $gd_window_size - $stop; | ||
2369 : | } | ||
2370 : | |||
2371 : | mkubal | 1.14 | $element_hash = { |
2372 : | arodri7 | 1.16 | "title" => $fid1, |
2373 : | mkubal | 1.14 | "start" => $start, |
2374 : | "end" => $stop, | ||
2375 : | "type"=> 'arrow', | ||
2376 : | "color"=> $color, | ||
2377 : | arodri7 | 1.18 | "zlayer" => "2", |
2378 : | "links_list" => $links_list, | ||
2379 : | "description" => $descriptions | ||
2380 : | mkubal | 1.14 | }; |
2381 : | arodri7 | 1.25 | |
2382 : | # if there is an overlap, put into second line | ||
2383 : | if ($second_line_flag == 1){ push(@$second_line_data,$element_hash); $prev_second_flag = 1;} | ||
2384 : | else{ push(@$line_data,$element_hash); $prev_second_flag = 0;} | ||
2385 : | |||
2386 : | mkubal | 1.14 | } |
2387 : | } | ||
2388 : | $gd->add_line($line_data, $line_config); | ||
2389 : | arodri7 | 1.25 | $gd->add_line($second_line_data, $second_line_config) if ($major_line_flag == 1); |
2390 : | mkubal | 1.14 | } |
2391 : | return $gd; | ||
2392 : | } | ||
2393 : | |||
2394 : |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |