Parent Directory
|
Revision Log
modified get_objects
package Observation; require Exporter; @EXPORT_OK = qw(get_objects); use strict; use warnings; use HTML; 1; # $Id: Observation.pm,v 1.15 2007/06/25 16:51:02 arodri7 Exp $ =head1 NAME Observation -- A presentation layer for observations in SEED. =head1 DESCRIPTION The SEED environment contains various sources of information for sequence features. The purpose of this library is to provide a single interface to this data. The data can be used to display information for a given sequence feature (protein or other, but primarily information is computed for proteins). Example: use FIG; use Observation; my $fig = new FIG; my $fid = "fig|83333.1.peg.3"; my $observations = Observation::get_objects($fid); foreach my $observation (@$observations) { print "ID: " . $fid . "\n"; print "Start: " . $observation->start() . "\n"; ... } B<return an array of objects> print "$Observation->acc\n" prints the Accession number if present for the Observation =cut =head1 BACKGROUND =head2 Data incorporated in the Observations As the goal of this library is to provide an integrated view, we combine diverse sources of evidence. =head3 SEED core evidence The core SEED data structures provided by FIG.pm. These are Similarities, BBHs and PCHs. =head3 Attribute based Evidence We use the SEED attribute infrastructure to store information computed by a variety of computational procedures. These are e.g. InterPro hits via InterProScan (ipr), NCBI Conserved Domain Database Hits via PSSM(cdd), PFAM hits via HMM(pfam), SignalP results(signalp), and various others. =head1 METHODS The public methods this package provides are listed below: =head3 acc() A valid accession or remote ID (in the style of a db_xref) or a valid local ID (FID) in case this is supported. =cut sub acc { my ($self) = @_; return $self->{acc}; } =head3 description() The description of the hit. Taken from the data or from the our Ontology database for some cases e.g. IPR or PFAM. B<Please note:> Either remoteid or description is required. =cut sub description { my ($self) = @_; return $self->{description}; } =head3 class() The class of evidence (required). This is usually simply the name of the tool or the name of the SEED data structure. B<Please note> the connection of class and display_method and URL. Current valid classes are: =over 9 =item IDENTICAL (seq) =item SIM (seq) =item BBH (seq) =item PCH (fc) =item FIGFAM (seq) =item IPR (dom) =item CDD (dom) =item PFAM (dom) =item SIGNALP_CELLO_TMPRED (loc) =item TMHMM (loc) =item HMMTOP (loc) =back =cut sub class { my ($self) = @_; return $self->{class}; } =head3 type() The type of evidence (required). Where type is one of the following: =over 8 =item seq=Sequence similarity =item dom=domain based match =item loc=Localization of the feature =item fc=Functional coupling. =back =cut sub type { my ($self) = @_; return $self->{acc}; } =head3 start() Start of hit in query sequence. =cut sub start { my ($self) = @_; return $self->{start}; } =head3 end() End of the hit in query sequence. =cut sub stop { my ($self) = @_; return $self->{stop}; } =head3 start() Start of hit in query sequence. =cut sub qstart { my ($self) = @_; return $self->{qstart}; } =head3 qstop() End of the hit in query sequence. =cut sub qstop { my ($self) = @_; return $self->{qstop}; } =head3 hstart() Start of hit in hit sequence. =cut sub hstart { my ($self) = @_; return $self->{hstart}; } =head3 end() End of the hit in hit sequence. =cut sub hstop { my ($self) = @_; return $self->{hstop}; } =head3 qlength() length of the query sequence in similarities =cut sub qlength { my ($self) = @_; return $self->{qlength}; } =head3 hlength() length of the hit sequence in similarities =cut sub hlength { my ($self) = @_; return $self->{hlength}; } =head3 evalue() E-value or P-Value if present. =cut sub evalue { my ($self) = @_; return $self->{evalue}; } =head3 score() Score if present. B<Please note: > Either score or eval are required. =cut sub score { my ($self) = @_; return $self->{score}; } =head3 display() will be different for each type =cut sub display { die "Abstract Method Called\n"; } =head3 rank() Returns an integer from 1 - 10 indicating the importance of this observations. Currently always returns 1. =cut sub rank { my ($self) = @_; # return $self->{rank}; return 1; } =head3 supports_annotation() Does a this observation support the annotation of its feature? Returns =over 3 =item 10, if feature annotation is identical to $self->description =item 1, Feature annotation is similar to $self->annotation; this is computed using FIG::SameFunc() =item undef =back =cut sub supports_annotation { my ($self) = @_; # no code here so far return $self->{supports_annotation}; } =head3 url() URL describing the subject. In case of a BLAST hit against a sequence, this URL will lead to a page displaying the sequence record for the sequence. In case of an HMM hit, the URL will be to the URL description. =cut sub url { my ($self) = @_; my $url = get_url($self->type, $self->acc); return $url; } =head3 get_objects() This is the B<REAL WORKHORSE> method of this Package. It will probably have to: - get all sims for the feature - get all bbhs for the feature - copy information from sim to bbh (bbh have no match location etc) - get pchs (difficult) - get attributes (there is code for this that in get_attribute_based_observations - get_attributes_based_observations returns an array of arrays of hashes like this" my $dataset [ [ { name => 'acc', value => '1234' }, { name => 'from', value => '4' }, { name => 'to', value => '400' }, .... ], [ { name => 'acc', value => '456' }, { name => 'from', value => '1' }, { name => 'to', value => '100' }, .... ], ... ]; return $datasets; } It will invoke the required calls to the SEED API to retrieve the information required. =cut sub get_objects { my ($self,$fid,$classes) = @_; my $objects = []; my @matched_datasets=(); # call function that fetches attribute based observations # returns an array of arrays of hashes if(scalar(@$classes) < 1){ get_attribute_based_observations($fid,\@matched_datasets); get_sims_observations($fid,\@matched_datasets); get_identical_proteins($fid,\@matched_datasets); get_functional_coupling($fid,\@matched_datasets); } else{ my %domain_classes; my $identical_flag=0; my $pch_flag=0; my $location_flag = 0; my $sims_flag=0; my $cluster_flag = 0; foreach my $class (@$classes){ if($class =~ /(IPR|CDD|PFAM)/){ $domain_classes{$class} = 1; } elsif ($class eq "IDENTICAL") { $identical_flag = 1; } elsif ($class eq "PCH") { $pch_flag = 1; } elsif ($class =~/(SIGNALP_CELLO_TMPRED)/) { $location_flag = 1; } elsif ($class eq "SIM") { $sims_flag = 1; } elsif ($class eq "CLUSTER") { $cluster_flag = 1; } } if ($identical_flag ==1) { get_identical_proteins($fid,\@matched_datasets); } if ( (defined($domain_classes{IPR})) || (defined($domain_classes{CDD})) || (defined($domain_classes{PFAM})) ) { get_attribute_based_domain_observations($fid,\%domain_classes,\@matched_datasets); } if ($pch_flag == 1) { get_functional_coupling($fid,\@matched_datasets); } if ($sims_flag == 1) { get_sims_observations($fid,\@matched_datasets); } if ($location_flag == 1) { get_attribute_based_location_observations($fid,\@matched_datasets); } if ($cluster_flag == 1) { get_cluster_observations($fid,\@matched_datasets); } } foreach my $dataset (@matched_datasets) { my $object; if($dataset->{'type'} eq "dom"){ $object = Observation::Domain->new($dataset); } if($dataset->{'class'} eq "PCH"){ $object = Observation::FC->new($dataset); } if ($dataset->{'class'} eq "IDENTICAL"){ $object = Observation::Identical->new($dataset); } if ($dataset->{'class'} eq "SIGNALP_CELLO_TMPRED"){ $object = Observation::Location->new($dataset); } if ($dataset->{'class'} eq "SIM"){ $object = Observation::Sims->new($dataset); } if ($dataset->{'class'} eq "CLUSTER"){ $object = Observation::Cluster->new($dataset); } push (@$objects, $object); } return $objects; } =head1 Internal Methods These methods are not meant to be used outside of this package. B<Please do not use them outside of this package!> =cut =head3 get_url (internal) get_url() return a valid URL or undef for any observation. URLs are constructed by looking at the Accession acc() and name() Info from both attributes is combined with a table of base URLs stored in this function. =cut sub get_url { my ($self) = @_; my $url=''; # a hash with a URL for each observation; identified by name() #my $URL => { 'PFAM' => "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?" ,\ # 'IPR' => "http://www.ebi.ac.uk/interpro/DisplayIproEntry?ac=" ,\ # 'CDD' => "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=",\ # 'PIR' => "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=",\ # 'FIGFAM' => '',\ # 'sim'=> "http://www.theseed.org/linkin.cgi?id=",\ # 'bbh'=> "http://www.theseed.org/linkin.cgi?id=" #}; # if (defined $URL{$self->name}) { # $url = $URL{$self->name}.$self->acc; # return $url; # } # else return undef; } =head3 get_display_method (internal) get_display_method() return a valid URL or undef for any observation. URLs are constructed by looking at the Accession acc() and name() and Info from both attributes is combined with a table of base URLs stored in this function. =cut sub get_display_method { my ($self) = @_; # a hash with a URL for each observation; identified by name() #my $URL => { 'sim'=> "http://www.theseed.org/featalign.cgi?id1=",\ # 'bbh'=> "http://www.theseed.org/featalign.cgi?id1=" # }; #if (defined $URL{$self->name}) { # $url = $URL{$self->name}.$self->feature_id."&id2=".$self->acc; # return $url; # } # else return undef; } sub get_attribute_based_domain_observations{ # we read a FIG ID and a reference to an array (of arrays of hashes, see above) my ($fid,$domain_classes,$datasets_ref) = (@_); my $fig = new FIG; foreach my $attr_ref ($fig->get_attributes($fid)) { my $key = @$attr_ref[1]; my @parts = split("::",$key); my $class = $parts[0]; if($domain_classes->{$parts[0]}){ my $val = @$attr_ref[2]; if($val =~/^(\d+\.\d+|0\.0);(\d+)-(\d+)/){ my $raw_evalue = $1; my $from = $2; my $to = $3; my $evalue; if($raw_evalue =~/(\d+)\.(\d+)/){ my $part2 = 1000 - $1; my $part1 = $2/100; $evalue = $part1."e-".$part2; } else{ $evalue = "0.0"; } my $dataset = {'class' => $class, 'acc' => $key, 'type' => "dom" , 'evalue' => $evalue, 'start' => $from, 'stop' => $to }; push (@{$datasets_ref} ,$dataset); } } } } sub get_attribute_based_location_observations{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; my $location_attributes = ['SignalP','CELLO','TMPRED']; my $dataset = {'type' => "loc", 'class' => 'SIGNALP_CELLO_TMPRED'}; foreach my $attr_ref ($fig->get_attributes($fid,$location_attributes)) { my $key = @$attr_ref[1]; my @parts = split("::",$key); my $sub_class = $parts[0]; my $sub_key = $parts[1]; my $value = @$attr_ref[2]; if($sub_class eq "SignalP"){ if($sub_key eq "cleavage_site"){ my @value_parts = split(";",$value); $dataset->{'cleavage_prob'} = $value_parts[0]; $dataset->{'cleavage_loc'} = $value_parts[1]; } elsif($sub_key eq "signal_peptide"){ $dataset->{'signal_peptide_score'} = $value; } } elsif($sub_class eq "CELLO"){ $dataset->{'cello_location'} = $sub_key; $dataset->{'cello_score'} = $value; } elsif($sub_class eq "TMPRED"){ my @value_parts = split(";",$value); $dataset->{'tmpred_score'} = $value_parts[0]; $dataset->{'tmpred_locations'} = $value_parts[1]; } } push (@{$datasets_ref} ,$dataset); } =head3 get_attribute_based_evidence (internal) This method retrieves evidence from the attribute server =cut sub get_attribute_based_observations{ # we read a FIG ID and a reference to an array (of arrays of hashes, see above) my ($fid,$datasets_ref) = (@_); my $_myfig = new FIG; foreach my $attr_ref ($_myfig->get_attributes($fid)) { # convert the ref into a string for easier handling my ($string) = "@$attr_ref"; # print "S:$string\n"; my ($key,$val) = ( $string =~ /\S+\s(\S+)\s(\S+)/); # THIS SHOULD BE DONE ANOTHER WAY FM->TD # we need to do the right thing for each type, ie no evalue for CELLO and no coordinates, but a score, etc # as fas as possible this should be configured so that the type of observation and the regexp are # stored somewhere for easy expansion # if (($key =~ /PFAM::/) || ( $key =~ /IPR::/) || ( $key =~ /CDD::/) ) { # some keys are composite CDD::1233244 or PFAM:PF1233 if ( $key =~ /::/ ) { my ($firstkey,$restkey) = ( $key =~ /([a-zA-Z0-9]+)::(.*)/); $val=$restkey.";".$val; $key=$firstkey; } my ($acc,$raw_evalue, $from,$to) = ($val =~ /(\S+);(\S+);(\d+)-(\d+)/ ); my $evalue= 255; if (defined $raw_evalue) { # some of the tool do not give us an evalue my ($k,$expo) = ( $raw_evalue =~ /(\d+).(\d+)/); my ($new_k, $new_exp); # # THIS DOES NOT WORK PROPERLY # if($raw_evalue =~/(\d+).(\d+)/){ # $new_exp = (1000+$expo); # $new_k = $k / 100; } $evalue = "0.01"#new_k."e-".$new_exp; } # unroll it all into an array of hashes # this needs to be done differently for different types of observations my $dataset = [ { name => 'class', value => $key }, { name => 'acc' , value => $acc}, { name => 'type', value => "dom"} , # this clearly needs to be done properly FM->TD { name => 'evalue', value => $evalue }, { name => 'start', value => $from}, { name => 'stop' , value => $to} ]; push (@{$datasets_ref} ,$dataset); } } } =head3 get_cluster_observations() (internal) This methods sets the type and class for cluster observations =cut sub get_cluster_observations{ my ($fid,$datasets_ref) = (@_); $dataset = {'class' => 'CLUSTER', 'type' => 'fc' }; push (@{$datasets_ref} ,$dataset); } =head3 get_sims_observations() (internal) This methods retrieves sims fills the internal data structures. =cut sub get_sims_observations{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; # my @sims= $fig->nsims($fid,100,1e-20,"fig"); my @sims= $fig->nsims($fid,100,1e-20,"all"); my ($dataset); foreach my $sim (@sims){ my $hit = $sim->[1]; my $percent = $sim->[2]; my $evalue = $sim->[10]; my $qfrom = $sim->[6]; my $qto = $sim->[7]; my $hfrom = $sim->[8]; my $hto = $sim->[9]; my $qlength = $sim->[12]; my $hlength = $sim->[13]; my $db = get_database($hit); my $func = $fig->function_of($hit); my $organism = $fig->org_of($hit); $dataset = {'class' => 'SIM', 'acc' => $hit, 'identity' => $percent, 'type' => 'seq', 'evalue' => $evalue, 'qstart' => $qfrom, 'qstop' => $qto, 'hstart' => $hfrom, 'hstop' => $hto, 'database' => $db, 'organism' => $organism, 'function' => $func, 'qlength' => $qlength, 'hlength' => $hlength }; push (@{$datasets_ref} ,$dataset); } } =head3 get_database (internal) This method gets the database association from the sequence id =cut sub get_database{ my ($id) = (@_); my ($db); if ($id =~ /^fig\|/) { $db = "FIG" } elsif ($id =~ /^gi\|/) { $db = "NCBI" } elsif ($id =~ /^^[NXYZA]P_/) { $db = "RefSeq" } elsif ($id =~ /^sp\|/) { $db = "SwissProt" } elsif ($id =~ /^uni\|/) { $db = "UniProt" } elsif ($id =~ /^tigr\|/) { $db = "TIGR" } elsif ($id =~ /^pir\|/) { $db = "PIR" } elsif ($id =~ /^kegg\|/) { $db = "KEGG" } elsif ($id =~ /^tr\|/) { $db = "TrEMBL" } elsif ($id =~ /^eric\|/) { $db = "ASAP" } elsif ($id =~ /^img\|/) { $db = "JGI" } return ($db); } =head3 get_identical_proteins() (internal) This methods retrieves sims fills the internal data structures. =cut sub get_identical_proteins{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; my @funcs = (); my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid); foreach my $id (@maps_to) { my ($tmp, $who); if (($id ne $fid) && ($tmp = $fig->function_of($id))) { $who = &get_database($id); push(@funcs, [$id,$who,$tmp]); } } my ($dataset); foreach my $row (@funcs){ my $id = $row->[0]; my $organism = $fig->org_of($fid); my $who = $row->[1]; my $assignment = $row->[2]; my $dataset = {'class' => 'IDENTICAL', 'id' => $id, 'organism' => $organism, 'type' => 'seq', 'database' => $who, 'function' => $assignment }; push (@{$datasets_ref} ,$dataset); } } =head3 get_functional_coupling() (internal) This methods retrieves the functional coupling of a protein given a peg ID =cut sub get_functional_coupling{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; my @funcs = (); # initialize some variables my($sc,$neigh); # set default parameters for coupling and evidence my ($bound,$sim_cutoff,$coupling_cutoff) = (5000, 1.0e-10, 4); # get the fc data my @fc_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,1); # retrieve data my @rows = map { ($sc,$neigh) = @$_; [$sc,$neigh,scalar $fig->function_of($neigh)] } @fc_data; my ($dataset); foreach my $row (@rows){ my $id = $row->[1]; my $score = $row->[0]; my $description = $row->[2]; my $dataset = {'class' => 'PCH', 'score' => $score, 'id' => $id, 'type' => 'fc', 'function' => $description }; push (@{$datasets_ref} ,$dataset); } } =head3 get_sims_and_bbhs() (internal) This methods retrieves sims and also BBHs and fills the internal data structures. =cut # sub get_sims_and_bbhs{ # # blast m8 output format # # id1, id2, %ident, align len, mismatches, gaps, q.start, q.stop, s. start, s.stop, eval, bit # my $Sims=(); # @sims_src = $fig->sims($fid,80,500,"fig",0); # print "found $#sims_src SIMs\n"; # foreach $sims (@sims_src) { # my ($sims_string) = "@$sims"; # # print "$sims_string\n"; # my ($rfid,$start,$stop,$eval) = ( $sims_string =~ /\S+\s+(\S+)\s+\S+\s\S+\s+(\S+)\s+(\S+)\s+ # \S+\s+\S+\s+\S+\s+\S+\s+(\S+)+.*/); # # print "ID: $rfid, E:$eval, Start:$start stop:$stop\n"; # $Sims{$rfid}{'eval'}=$eval; # $Sims{$rfid}{'start'}=$start; # $Sims{$rfid}{'stop'}=$stop; # print "$rfid $Sims{$rfid}{'eval'}\n"; # } # # BBHs # my $BBHs=(); # @bbhs_src = $fig->bbhs($fid,1.0e-10); # print "found $#bbhs_src BBHs\n"; # foreach $bbh (@bbhs_src) { # #print "@$bbh\n"; # my ($bbh_string) = "@$bbh"; # my ($rfid,$eval,$score) = ( $bbh_string =~ /(\S+)\s(\S+)\s(\S+)/); # #print "ID: $rfid, E:$eval, S:$score\n"; # $BBHs{$rfid}{'eval'}=$eval; # $BBHs{$rfid}{'score'}=$score; # #print "$rfid $BBHs{$rfid}{'eval'}\n"; # } # } =head3 new (internal) Instantiate a new object. =cut sub new { my ($class,$dataset) = @_; #$self = { acc => '', # description => '', # class => '', # type => '', # start => '', # stop => '', # evalue => '', # score => '', # display_method => '', # feature_id => '', # rank => '', # supports_annotation => '', # id => '', # organism => '', # who => '' # }; my $self = { class => $dataset->{'class'}, type => $dataset->{'type'} }; bless($self,$class); return $self; } =head3 identity (internal) Returns the % identity of the similar sequence =cut sub identity { my ($self) = @_; return $self->{identity}; } =head3 feature_id (internal) =cut sub feature_id { my ($self) = @_; return $self->{feature_id}; } =head3 id (internal) Returns the ID of the identical sequence =cut sub id { my ($self) = @_; return $self->{id}; } =head3 organism (internal) Returns the organism of the identical sequence =cut sub organism { my ($self) = @_; return $self->{organism}; } =head3 function (internal) Returns the function of the identical sequence =cut sub function { my ($self) = @_; return $self->{function}; } =head3 database (internal) Returns the database of the identical sequence =cut sub database { my ($self) = @_; return $self->{database}; } ############################################################ ############################################################ package Observation::Identical; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{id} = $dataset->{'id'}; $self->{organism} = $dataset->{'organism'}; $self->{function} = $dataset->{'function'}; $self->{database} = $dataset->{'database'}; bless($self,$class); return $self; } =head3 display() If available use the function specified here to display the "raw" observation. This code will display a table for the identical protein B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi dence. =cut sub display{ my ($self, $cgi, $dataset) = @_; my $all_domains = []; my $count_identical = 0; my $content; foreach my $thing (@$dataset) { next if ($thing->class ne "IDENTICAL"); my $single_domain = []; push(@$single_domain,$thing->database); my $id = $thing->id; $count_identical++; push(@$single_domain,&HTML::set_prot_links($cgi,$id)); push(@$single_domain,$thing->organism); #push(@$single_domain,$thing->type); push(@$single_domain,$thing->function); push(@$all_domains,$single_domain); } if ($count_identical >0){ $content = $all_domains; } else{ $content = "<p>This PEG does not have any essentially identical proteins</p>"; } return ($content); } 1; ######################################### ######################################### package Observation::FC; 1; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{score} = $dataset->{'score'}; $self->{id} = $dataset->{'id'}; $self->{function} = $dataset->{'function'}; bless($self,$class); return $self; } =head3 display() If available use the function specified here to display the "raw" observation. This code will display a table for the identical protein B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi dence. =cut sub display { my ($self,$cgi,$dataset, $fid) = @_; my $functional_data = []; my $count = 0; my $content; foreach my $thing (@$dataset) { my $single_domain = []; next if ($thing->class ne "PCH"); $count++; # construct the score link my $score = $thing->score; my $toid = $thing->id; my $link = $cgi->url(-relative => 1) . "?user=master&request=show_coupling_evidence&prot=$fid&to=$toid&SPROUT="; my $sc_link = "<a href=$link>$score</a>"; push(@$single_domain,$sc_link); push(@$single_domain,$thing->id); push(@$single_domain,$thing->function); push(@$functional_data,$single_domain); } if ($count >0){ $content = $functional_data; } else { $content = "<p>This PEG does not have any functional coupling</p>"; } return ($content); } ######################################### ######################################### package Observation::Domain; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{evalue} = $dataset->{'evalue'}; $self->{acc} = $dataset->{'acc'}; $self->{start} = $dataset->{'start'}; $self->{stop} = $dataset->{'stop'}; bless($self,$class); return $self; } sub display { my ($thing,$gd) = @_; my $lines = []; my $line_config = { 'title' => $thing->acc, 'short_title' => $thing->type, 'basepair_offset' => '1' }; my $color = "4"; my $line_data = []; my $links_list = []; my $descriptions = []; my $description_function; $description_function = {"title" => $thing->class, "value" => $thing->acc}; push(@$descriptions,$description_function); my $score; $score = {"title" => "score", "value" => $thing->evalue}; push(@$descriptions,$score); my $link_id; if ($thing->acc =~/\w+::(\d+)/){ $link_id = $1; } my $link; my $link_url; if ($thing->class eq "CDD"){$link_url = "http://0-www.ncbi.nlm.nih.gov.library.vu.edu.au:80/Structure/cdd/cddsrv.cgi?uid=$link_id"} elsif($thing->class eq "PFAM"){$link_url = "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?$link_id"} else{$link_url = "NO_URL"} $link = {"link_title" => $thing->acc, "link" => $link_url}; push(@$links_list,$link); my $element_hash = { "title" => $thing->type, "start" => $thing->start, "end" => $thing->stop, "color"=> $color, "zlayer" => '2', "links_list" => $links_list, "description" => $descriptions}; push(@$line_data,$element_hash); $gd->add_line($line_data, $line_config); return $gd; } ######################################### ######################################### package Observation::Location; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{cleavage_prob} = $dataset->{'cleavage_prob'}; $self->{cleavage_loc} = $dataset->{'cleavage_loc'}; $self->{signal_peptide_score} = $dataset->{'signal_peptide_score'}; $self->{cello_location} = $dataset->{'cello_location'}; $self->{cello_score} = $dataset->{'cello_score'}; $self->{tmpred_score} = $dataset->{'tmpred_score'}; $self->{tmpred_locations} = $dataset->{'tmpred_locations'}; bless($self,$class); return $self; } sub display { my ($thing,$gd,$fid) = @_; my $fig= new FIG; my $length = length($fig->get_translation($fid)); my $cleavage_prob; if($thing->cleavage_prob){$cleavage_prob = $thing->cleavage_prob;} my ($cleavage_loc_begin,$cleavage_loc_end) = split("-",$thing->cleavage_loc); my $signal_peptide_score = $thing->signal_peptide_score; my $cello_location = $thing->cello_location; my $cello_score = $thing->cello_score; my $tmpred_score = $thing->tmpred_score; my @tmpred_locations = split(",",$thing->tmpred_locations); my $lines = []; my $line_config = { 'title' => 'Localization Evidence', 'short_title' => 'Local', 'basepair_offset' => '1' }; #color is my $color = "5"; my $line_data = []; if($cello_location){ my $cello_descriptions = []; my $description_cello_location = {"title" => 'Best Cello Location', "value" => $cello_location}; push(@$cello_descriptions,$description_cello_location); my $description_cello_score = {"title" => 'Cello Score', "value" => $cello_score}; push(@$cello_descriptions,$description_cello_score); my $element_hash = { "title" => "CELLO", "start" => "1", "end" => $length + 1, "color"=> $color, "type" => 'box', "zlayer" => '2', "description" => $cello_descriptions}; push(@$line_data,$element_hash); } my $color = "6"; #if(0){ if($tmpred_score){ foreach my $tmpred (@tmpred_locations){ my $descriptions = []; my ($begin,$end) =split("-",$tmpred); my $description_tmpred_score = {"title" => 'TMPRED score', "value" => $tmpred_score}; push(@$descriptions,$description_tmpred_score); my $element_hash = { "title" => "transmembrane location", "start" => $begin + 1, "end" => $end + 1, "color"=> $color, "zlayer" => '5', "type" => 'smallbox', "description" => $descriptions}; push(@$line_data,$element_hash); } } my $color = "1"; if($signal_peptide_score){ my $descriptions = []; my $description_signal_peptide_score = {"title" => 'signal peptide score', "value" => $signal_peptide_score}; push(@$descriptions,$description_signal_peptide_score); my $description_cleavage_prob = {"title" => 'cleavage site probability', "value" => $cleavage_prob}; push(@$descriptions,$description_cleavage_prob); my $element_hash = { "title" => "SignalP", "start" => $cleavage_loc_begin - 2, "end" => $cleavage_loc_end + 3, "type" => 'bigbox', "color"=> $color, "zlayer" => '10', "description" => $descriptions}; push(@$line_data,$element_hash); } $gd->add_line($line_data, $line_config); return ($gd); } sub cleavage_loc { my ($self) = @_; return $self->{cleavage_loc}; } sub cleavage_prob { my ($self) = @_; return $self->{cleavage_prob}; } sub signal_peptide_score { my ($self) = @_; return $self->{signal_peptide_score}; } sub tmpred_score { my ($self) = @_; return $self->{tmpred_score}; } sub tmpred_locations { my ($self) = @_; return $self->{tmpred_locations}; } sub cello_location { my ($self) = @_; return $self->{cello_location}; } sub cello_score { my ($self) = @_; return $self->{cello_score}; } ######################################### ######################################### package Observation::Sims; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{identity} = $dataset->{'identity'}; $self->{acc} = $dataset->{'acc'}; $self->{evalue} = $dataset->{'evalue'}; $self->{qstart} = $dataset->{'qstart'}; $self->{qstop} = $dataset->{'qstop'}; $self->{hstart} = $dataset->{'hstart'}; $self->{hstop} = $dataset->{'hstop'}; $self->{database} = $dataset->{'database'}; $self->{organism} = $dataset->{'organism'}; $self->{function} = $dataset->{'function'}; $self->{qlength} = $dataset->{'qlength'}; $self->{hlength} = $dataset->{'hlength'}; bless($self,$class); return $self; } =head3 display() If available use the function specified here to display the "raw" observation. This code will display a table for the similarities protein B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evidence. =cut sub display { my ($self,$cgi,$dataset) = @_; my $data = []; my $count = 0; my $content; my $fig = new FIG; foreach my $thing (@$dataset) { my $single_domain = []; next if ($thing->class ne "SIM"); $count++; my $id = $thing->acc; # add the subsystem information my @in_sub = $fig->peg_to_subsystems($id); my $in_sub; if (@in_sub > 0) { $in_sub = @in_sub; # RAE: add a javascript popup with all the subsystems my $ss_list=join "<br>", map { my $g = $_; $g =~ s/\_/ /g; $_ = $g } sort {$a cmp $b} @in_sub; $in_sub = $cgi->a( {id=>"subsystems", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Subsystems', '$ss_list', ''); this.tooltip.addHandler(); return false;"}, $in_sub); } else { $in_sub = " "; } # add evidence code with tool tip my $ev_codes=" "; my @ev_codes = ""; if ($id =~ /^fig\|\d+\.\d+\.peg\.\d+$/) { my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($id); @ev_codes = (); foreach my $code (@codes) { my $pretty_code = $code->[2]; if ($pretty_code =~ /;/) { my ($cd, $ss) = split(";", $code->[2]); $ss =~ s/_/ /g; $pretty_code = $cd;# . " in " . $ss; } push(@ev_codes, $pretty_code); } } if (scalar(@ev_codes) && $ev_codes[0]) { my $ev_code_help=join("<br />", map {&HTML::evidence_codes_explain($_)} @ev_codes); $ev_codes = $cgi->a( { id=>"evidence_codes", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Evidence Codes', '$ev_code_help', ''); this.tooltip.addHandler(); return false;"}, join("<br />", @ev_codes)); } # add the aliases my $aliases = undef; $aliases = &html_enc( join( ", ", $fig->feature_aliases($id) ) ); $aliases = &HTML::set_prot_links( $cgi, $aliases ); $aliases ||= " "; my $iden = $thing->identity; my $ln1 = $thing->qlength; my $ln2 = $thing->hlength; my $b1 = $thing->qstart; my $e1 = $thing->qstop; my $b2 = $thing->hstart; my $e2 = $thing->hstop; my $d1 = abs($e1 - $b1) + 1; my $d2 = abs($e2 - $b2) + 1; my $reg1 = "$b1-$e1 (<b>$d1/$ln1</b>)"; my $reg2 = "$b2-$e2 (<b>$d2/$ln2</b>)"; push(@$single_domain,$thing->database); push(@$single_domain,&HTML::set_prot_links($cgi,$thing->acc)); push(@$single_domain,$thing->evalue); push(@$single_domain,"$iden\%"); push(@$single_domain,$reg1); push(@$single_domain,$reg2); push(@$single_domain,$in_sub); push(@$single_domain,$ev_codes); push(@$single_domain,$thing->organism); push(@$single_domain,$thing->function); push(@$single_domain,$aliases); push(@$data,$single_domain); } if ($count >0){ $content = $data; } else { $content = "<p>This PEG does not have any similarities</p>"; } return ($content); } sub html_enc { $_ = $_[0]; s/\&/&/g; s/\>/>/g; s/\</</g; $_ } ############################ package Observation::Cluster; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); bless($self,$class); return $self; } sub display { my ($self,$gd, $fid) = @_; my $fig = new FIG; my $all_regions = []; #get the organism genome my $target_genome = $fig->genome_of($fid); # get location of the gene my $data = $fig->feature_location($fid); my ($contig, $beg, $end); if ($data =~ /(.*)_(\d+)_(\d+)$/){ $contig = $1; $beg = $2; $end = $3; } my ($region_start, $region_end); if ($beg < $end) { $region_start = $beg - 4000; $region_end = $end+4000; } else { $region_end = $end+4000; $region_start = $beg-4000; } # call genes in region my ($target_gene_features, $reg_beg, $reg_end) = $fig->genes_in_region($target_genome, $contig, $region_start, $region_stop); push(@$all_regions,$target_gene_features); my %all_genes; my %all_genomes; foreach my $feature (@$target_gene_features){ $all_genes{$feature} = 1} my @coup = $fig->coupling_and_evidence($fid,5000,1e-10,4,1); foreach my $pair (@$coup[0]->[2]){ my ($peg1,$peg2) = @$pair; my $location = $fig->feature_location($peg1); my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); if($location =~/(.*)_(\d+)_(\d+)$/){ $pair_contig = $1; $pair_beg = $2; $pair_end = $3; if ($pair_beg < $pair_end) { $pair_region_start = $pair_beg - 4000; $pair_region_end = $pair_end+4000; } else { $pair_region_end = $pair_end+4000; $pair_region_start = $pair_beg-4000; } $pair_genome = $fig->genome_of($peg1); $all_genomes{$pair_genome} = 1; my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); push(@$all_regions,$pair_features); foreach my $pair_feature (@$pair_features){ $all_genes{$feature} = 1} } } my $bbh_sets = []; my %already; foreach my $gene_key (keys(%all_genes)){ if($already{$gene_key}){next;} my $gene_set = [$gene_key]; foreach my $genome_key (keys(%all_genomes)){ my $return = $fig->bbh_list($genome_key,[$gene_key]); my @$feature_list = $return->{$gene_key}; foreach my $fl (@$feature_list){ push(@$gene_set,$fl); $already{$fl} = 1; } } $already{$gene_key} = 1; push(@$bbh_sets,$gene_set); } %bbh_set_rank; my $order = 0; foreach my $set (@$bbh_sets){ my $count = scalar(@$set); $bbh_rank{$order} = $count; $order++; } my %peg_rank; my $counter = 1; foreach my $bbh_order (sort {$bbh_set_rank{$b} <=> $bbh_set_rank{$a}} keys %bbh_set_rank){ my $good_set = @$bbh_sets[$bbh_order]; foreach my $peg (@$good_set){ $peg_rank{$peg} = $counter; } $counter++; } foreach my $region (@$all_regions){ my $sample_peg = @$region[0]; my $region_genome = $fig->genome_of($sample_peg); my $region_gs = $fig->genus_species($region_genome); my $line_config = { 'title' => $region_gs, 'short_title' => $region_gs, 'height' => 30, 'basepair_offset' => '0'; }; my $line_data = []; foreach my $fid (@$region){ my $element_hash; my $links_list = []; my $descriptions = []; my $color = $peg_rank{$fid}; my $fid_location = $fig->feature_location($fid); if($fid_location =~/(.*)_(\d+)_(\d+)$/){ my($start,$stop); if ($2 < $3){$start = $2; $stop = $3;} else{$stop = $2; $start = $3;} $element_hash = { "title" => $fid, "start" => $start, "end" => $stop, "type"=> 'arrow', "color"=> $color, "zlayer" => "2", }; push(@$line_data,$element_hash); } } $gd->add_line($line_data, $line_config); } return $gd; }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |