Parent Directory
|
Revision Log
added changes to sims display
package Observation; use lib '/vol/ontologies'; use DBMaster; require Exporter; @EXPORT_OK = qw(get_objects); use FIG_Config; #use strict; #use warnings; use HTML; 1; # $Id: Observation.pm,v 1.32 2007/08/21 22:45:54 arodri7 Exp $ =head1 NAME Observation -- A presentation layer for observations in SEED. =head1 DESCRIPTION The SEED environment contains various sources of information for sequence features. The purpose of this library is to provide a single interface to this data. The data can be used to display information for a given sequence feature (protein or other, but primarily information is computed for proteins). =cut =head1 BACKGROUND =head2 Data incorporated in the Observations As the goal of this library is to provide an integrated view, we combine diverse sources of evidence. =head3 SEED core evidence The core SEED data structures provided by FIG.pm. These are Similarities, BBHs and PCHs. =head3 Attribute based Evidence We use the SEED attribute infrastructure to store information computed by a variety of computational procedures. These are e.g. InterPro hits via InterProScan (ipr), NCBI Conserved Domain Database Hits via PSSM(cdd), PFAM hits via HMM(pfam), SignalP results(signalp), and various others. =head1 METHODS The public methods this package provides are listed below: =head3 context() Returns close or diverse for purposes of displaying genomic context =cut sub context { my ($self) = @_; return $self->{context}; } =head3 rows() each row in a displayed table =cut sub rows { my ($self) = @_; return $self->{rows}; } =head3 acc() A valid accession or remote ID (in the style of a db_xref) or a valid local ID (FID) in case this is supported. =cut sub acc { my ($self) = @_; return $self->{acc}; } =head3 class() The class of evidence (required). This is usually simply the name of the tool or the name of the SEED data structure. B<Please note> the connection of class and display_method and URL. Current valid classes are: =over 9 =item IDENTICAL (seq) =item SIM (seq) =item BBH (seq) =item PCH (fc) =item FIGFAM (seq) =item IPR (dom) =item CDD (dom) =item PFAM (dom) =item SIGNALP_CELLO_TMPRED (loc) =item PDB (seq) =item TMHMM (loc) =item HMMTOP (loc) =back =cut sub class { my ($self) = @_; return $self->{class}; } =head3 type() The type of evidence (required). Where type is one of the following: =over 8 =item seq=Sequence similarity =item dom=domain based match =item loc=Localization of the feature =item fc=Functional coupling. =back =cut sub type { my ($self) = @_; return $self->{type}; } =head3 start() Start of hit in query sequence. =cut sub start { my ($self) = @_; return $self->{start}; } =head3 end() End of the hit in query sequence. =cut sub stop { my ($self) = @_; return $self->{stop}; } =head3 start() Start of hit in query sequence. =cut sub qstart { my ($self) = @_; return $self->{qstart}; } =head3 qstop() End of the hit in query sequence. =cut sub qstop { my ($self) = @_; return $self->{qstop}; } =head3 hstart() Start of hit in hit sequence. =cut sub hstart { my ($self) = @_; return $self->{hstart}; } =head3 end() End of the hit in hit sequence. =cut sub hstop { my ($self) = @_; return $self->{hstop}; } =head3 qlength() length of the query sequence in similarities =cut sub qlength { my ($self) = @_; return $self->{qlength}; } =head3 hlength() length of the hit sequence in similarities =cut sub hlength { my ($self) = @_; return $self->{hlength}; } =head3 evalue() E-value or P-Value if present. =cut sub evalue { my ($self) = @_; return $self->{evalue}; } =head3 score() Score if present. =cut sub score { my ($self) = @_; return $self->{score}; } =head3 display() will be different for each type =cut sub display { die "Abstract Method Called\n"; } =head3 display_table() will be different for each type =cut sub display_table { die "Abstract Table Method Called\n"; } =head3 get_objects() This is the B<REAL WORKHORSE> method of this Package. =cut sub get_objects { my ($self,$fid,$scope) = @_; my $objects = []; my @matched_datasets=(); my $fig = new FIG; # call function that fetches attribute based observations # returns an array of arrays of hashes if($scope){ get_cluster_observations($fid,\@matched_datasets,$scope); } else{ my %domain_classes; my @attributes = $fig->get_attributes($fid); $domain_classes{'CDD'} = 1; #get_identical_proteins($fid,\@matched_datasets); get_attribute_based_domain_observations($fid,\%domain_classes,\@matched_datasets,\@attributes); get_sims_observations($fid,\@matched_datasets); get_functional_coupling($fid,\@matched_datasets); get_attribute_based_location_observations($fid,\@matched_datasets,\@attributes); get_pdb_observations($fid,\@matched_datasets,\@attributes); } foreach my $dataset (@matched_datasets) { my $object; if($dataset->{'type'} eq "dom"){ $object = Observation::Domain->new($dataset); } if($dataset->{'class'} eq "PCH"){ $object = Observation::FC->new($dataset); } if ($dataset->{'class'} eq "IDENTICAL"){ $object = Observation::Identical->new($dataset); } if ($dataset->{'class'} eq "SIGNALP_CELLO_TMPRED"){ $object = Observation::Location->new($dataset); } if ($dataset->{'class'} eq "SIM"){ $object = Observation::Sims->new($dataset); } if ($dataset->{'class'} eq "CLUSTER"){ $object = Observation::Cluster->new($dataset); } if ($dataset->{'class'} eq "PDB"){ $object = Observation::PDB->new($dataset); } push (@$objects, $object); } return $objects; } =head3 display_housekeeping This method returns the housekeeping data for a given peg in a table format =cut sub display_housekeeping { my ($self,$fid) = @_; my $fig = new FIG; my $content; my $org_name = $fig->org_of($fid); my $org_id = $fig->orgid_of_orgname($org_name); my $loc = $fig->feature_location($fid); my($contig, $beg, $end) = $fig->boundaries_of($loc); my $strand = ($beg <= $end)? '+' : '-'; my @subsystems = $fig->subsystems_for_peg($fid); my $function = $fig->function_of($fid); my @aliases = $fig->feature_aliases($fid); my $taxonomy = $fig->taxonomy_of($org_id); my @ecs = ($function =~ /\(EC\s(\d+\.[-\d+]+\.[-\d+]+\.[-\d+]+)\)/g); $content .= qq(<b>General Protein Data</b><br><br><br><table border="0">); $content .= qq(<tr width=15%><td >FIG ID</td><td>$fid</td></tr>\n); $content .= qq(<tr width=15%><td >Organism Name</td><td>$org_name, $org_id</td></tr>\n); $content .= qq(<tr><td width=15%>Taxonomy</td><td>$taxonomy</td></tr>\n); $content .= qq(<tr width=15%><td>FIG Organism ID</td><td>$org_id</td></tr>\n); $content .= qq(<tr width=15%><td>Gene Location</td><td>Contig $contig [$beg,$end], Strand $strand</td></tr>\n);; $content .= qq(<tr width=15%><td>Function</td><td>$function</td></tr>\n); if ( @ecs ) { $content .= qq(<tr><td>EC:</td><td>); foreach my $ec ( @ecs ) { my $ec_name = $fig->ec_name($ec); $content .= join(" -- ", $ec, $ec_name) . "<br>\n"; } $content .= qq(</td></tr>\n); } if ( @subsystems ) { $content .= qq(<tr><td>Subsystems</td><td>); foreach my $subsystem ( @subsystems ) { $content .= join(" -- ", @$subsystem) . "<br>\n"; } } my %groups; if ( @aliases ) { # get the db for each alias foreach my $alias (@aliases){ $groups{$alias} = &get_database($alias); } # group ids by aliases my %db_aliases; foreach my $key (sort {$groups{$a} cmp $groups{$b}} keys %groups){ push (@{$db_aliases{$groups{$key}}}, $key); } $content .= qq(<tr><td>Aliases</td><td><table border="0">); foreach my $key (sort keys %db_aliases){ $content .= qq(<tr><td>$key:</td><td>) . join(", ", @{$db_aliases{$key}}) . qq(</td></tr\n); } $content .= qq(</td></tr></table>\n); } $content .= qq(</table><p>\n); return ($content); } =head3 get_sims_summary This method uses as input the similarities of a peg and creates a tree view of their taxonomy =cut sub get_sims_summary { my ($observation, $fid) = @_; my $fig = new FIG; my %families; my @sims= $fig->nsims($fid,20000,10,"all"); foreach my $sim (@sims){ next if ($sim->[1] !~ /fig\|/); my $genome = $fig->genome_of($sim->[1]); my $taxonomy = $fig->taxonomy_of($fig->genome_of($sim->[1])); my $parent_tax = "Root"; foreach my $tax (split(/\; /, $taxonomy)){ push (@{$families{children}{$parent_tax}}, $tax); $families{parent}{$tax} = $parent_tax; $parent_tax = $tax; } } foreach my $key (keys %{$families{children}}){ $families{count}{$key} = @{$families{children}{$key}}; my %saw; my @out = grep(!$saw{$_}++, @{$families{children}{$key}}); $families{children}{$key} = \@out; } return (\%families); } =head1 Internal Methods These methods are not meant to be used outside of this package. B<Please do not use them outside of this package!> =cut sub get_attribute_based_domain_observations{ # we read a FIG ID and a reference to an array (of arrays of hashes, see above) my ($fid,$domain_classes,$datasets_ref,$attributes_ref) = (@_); my $fig = new FIG; foreach my $attr_ref (@$attributes_ref) { # foreach my $attr_ref ($fig->get_attributes($fid)) { my $key = @$attr_ref[1]; my @parts = split("::",$key); my $class = $parts[0]; if($domain_classes->{$parts[0]}){ my $val = @$attr_ref[2]; if($val =~/^(\d+\.\d+|0\.0);(\d+)-(\d+)/){ my $raw_evalue = $1; my $from = $2; my $to = $3; my $evalue; if($raw_evalue =~/(\d+)\.(\d+)/){ my $part2 = 1000 - $1; my $part1 = $2/100; $evalue = $part1."e-".$part2; } else{ $evalue = "0.0"; } my $dataset = {'class' => $class, 'acc' => $key, 'type' => "dom" , 'evalue' => $evalue, 'start' => $from, 'stop' => $to, 'fig_id' => $fid, 'score' => $raw_evalue }; push (@{$datasets_ref} ,$dataset); } } } } sub get_attribute_based_location_observations{ my ($fid,$datasets_ref, $attributes_ref) = (@_); my $fig = new FIG; my $location_attributes = ['SignalP','CELLO','TMPRED','Phobius']; my $dataset = {'type' => "loc", 'class' => 'SIGNALP_CELLO_TMPRED', 'fig_id' => $fid }; foreach my $attr_ref (@$attributes_ref){ # foreach my $attr_ref ($fig->get_attributes($fid,$location_attributes)) { my $key = @$attr_ref[1]; next if (($key !~ /SignalP/) && ($key !~ /CELLO/) && ($key !~ /TMPRED/) && ($key !~/Phobius/) ); my @parts = split("::",$key); my $sub_class = $parts[0]; my $sub_key = $parts[1]; my $value = @$attr_ref[2]; if($sub_class eq "SignalP"){ if($sub_key eq "cleavage_site"){ my @value_parts = split(";",$value); $dataset->{'cleavage_prob'} = $value_parts[0]; $dataset->{'cleavage_loc'} = $value_parts[1]; # print STDERR "LOC: $value_parts[1]"; } elsif($sub_key eq "signal_peptide"){ $dataset->{'signal_peptide_score'} = $value; } } elsif($sub_class eq "CELLO"){ $dataset->{'cello_location'} = $sub_key; $dataset->{'cello_score'} = $value; } elsif($sub_class eq "Phobius"){ if($sub_key eq "transmembrane"){ $dataset->{'phobius_tm_locations'} = $value; } elsif($sub_key eq "signal"){ $dataset->{'phobius_signal_location'} = $value; } } elsif($sub_class eq "TMPRED"){ my @value_parts = split(/\;/,$value); $dataset->{'tmpred_score'} = $value_parts[0]; $dataset->{'tmpred_locations'} = $value_parts[1]; } } push (@{$datasets_ref} ,$dataset); } =head3 get_pdb_observations() (internal) This methods sets the type and class for pdb observations =cut sub get_pdb_observations{ my ($fid,$datasets_ref, $attributes_ref) = (@_); my $fig = new FIG; foreach my $attr_ref (@$attributes_ref){ #foreach my $attr_ref ($fig->get_attributes($fid,'PDB')) { my $key = @$attr_ref[1]; next if ( ($key !~ /PDB/)); my($key1,$key2) =split("::",$key); my $value = @$attr_ref[2]; my ($evalue,$location) = split(";",$value); if($evalue =~/(\d+)\.(\d+)/){ my $part2 = 1000 - $1; my $part1 = $2/100; $evalue = $part1."e-".$part2; } my($start,$stop) =split("-",$location); my $url = @$attr_ref[3]; my $dataset = {'class' => 'PDB', 'type' => 'seq' , 'acc' => $key2, 'evalue' => $evalue, 'start' => $start, 'stop' => $stop, 'fig_id' => $fid }; push (@{$datasets_ref} ,$dataset); } } =head3 get_cluster_observations() (internal) This methods sets the type and class for cluster observations =cut sub get_cluster_observations{ my ($fid,$datasets_ref,$scope) = (@_); my $dataset = {'class' => 'CLUSTER', 'type' => 'fc', 'context' => $scope, 'fig_id' => $fid }; push (@{$datasets_ref} ,$dataset); } =head3 get_sims_observations() (internal) This methods retrieves sims fills the internal data structures. =cut sub get_sims_observations{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; my @sims= $fig->nsims($fid,500,1e-20,"all"); my ($dataset); my %id_list; foreach my $sim (@sims){ my $hit = $sim->[1]; next if ($hit !~ /^fig\|/); my @aliases = $fig->feature_aliases($hit); foreach my $alias (@aliases){ $id_list{$alias} = 1; } } my %already; my (@new_sims, @uniprot); foreach my $sim (@sims){ my $hit = $sim->[1]; my ($id) = ($hit) =~ /\|(.*)/; next if (defined($already{$id})); next if (defined($id_list{$hit})); push (@new_sims, $sim); $already{$id} = 1; } foreach my $sim (@new_sims){ my $hit = $sim->[1]; my $percent = $sim->[2]; my $evalue = $sim->[10]; my $qfrom = $sim->[6]; my $qto = $sim->[7]; my $hfrom = $sim->[8]; my $hto = $sim->[9]; my $qlength = $sim->[12]; my $hlength = $sim->[13]; my $db = get_database($hit); my $func = $fig->function_of($hit); my $organism = $fig->org_of($hit); $dataset = {'class' => 'SIM', 'acc' => $hit, 'identity' => $percent, 'type' => 'seq', 'evalue' => $evalue, 'qstart' => $qfrom, 'qstop' => $qto, 'hstart' => $hfrom, 'hstop' => $hto, 'database' => $db, 'organism' => $organism, 'function' => $func, 'qlength' => $qlength, 'hlength' => $hlength, 'fig_id' => $fid }; push (@{$datasets_ref} ,$dataset); } } =head3 get_database (internal) This method gets the database association from the sequence id =cut sub get_database{ my ($id) = (@_); my ($db); if ($id =~ /^fig\|/) { $db = "FIG" } elsif ($id =~ /^gi\|/) { $db = "NCBI" } elsif ($id =~ /^^[NXYZA]P_/) { $db = "RefSeq" } elsif ($id =~ /^sp\|/) { $db = "SwissProt" } elsif ($id =~ /^uni\|/) { $db = "UniProt" } elsif ($id =~ /^tigr\|/) { $db = "TIGR" } elsif ($id =~ /^pir\|/) { $db = "PIR" } elsif (($id =~ /^kegg\|/) || ($id =~ /Spy/)) { $db = "KEGG" } elsif ($id =~ /^tr\|/) { $db = "TrEMBL" } elsif ($id =~ /^eric\|/) { $db = "ASAP" } elsif ($id =~ /^img\|/) { $db = "JGI" } return ($db); } =head3 get_identical_proteins() (internal) This methods retrieves sims fills the internal data structures. =cut sub get_identical_proteins{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; my $funcs_ref; my %id_list; my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid); my @aliases = $fig->feature_aliases($fid); foreach my $alias (@aliases){ $id_list{$alias} = 1; } foreach my $id (@maps_to) { my ($tmp, $who); if (($id ne $fid) && ($tmp = $fig->function_of($id)) && (! defined ($id_list{$id}))) { $who = &get_database($id); push(@$funcs_ref, [$id,$who,$tmp]); } } my ($dataset); my $dataset = {'class' => 'IDENTICAL', 'type' => 'seq', 'fig_id' => $fid, 'rows' => $funcs_ref }; push (@{$datasets_ref} ,$dataset); } =head3 get_functional_coupling() (internal) This methods retrieves the functional coupling of a protein given a peg ID =cut sub get_functional_coupling{ my ($fid,$datasets_ref) = (@_); my $fig = new FIG; my @funcs = (); # initialize some variables my($sc,$neigh); # set default parameters for coupling and evidence my ($bound,$sim_cutoff,$coupling_cutoff) = (5000, 1.0e-10, 4); # get the fc data my @fc_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,1); # retrieve data my @rows = map { ($sc,$neigh) = @$_; [$sc,$neigh,scalar $fig->function_of($neigh)] } @fc_data; my ($dataset); my $dataset = {'class' => 'PCH', 'type' => 'fc', 'fig_id' => $fid, 'rows' => \@rows }; push (@{$datasets_ref} ,$dataset); } =head3 new (internal) Instantiate a new object. =cut sub new { my ($class,$dataset) = @_; my $self = { class => $dataset->{'class'}, type => $dataset->{'type'}, fig_id => $dataset->{'fig_id'}, score => $dataset->{'score'}, }; bless($self,$class); return $self; } =head3 identity (internal) Returns the % identity of the similar sequence =cut sub identity { my ($self) = @_; return $self->{identity}; } =head3 fig_id (internal) =cut sub fig_id { my ($self) = @_; return $self->{fig_id}; } =head3 feature_id (internal) =cut sub feature_id { my ($self) = @_; return $self->{feature_id}; } =head3 id (internal) Returns the ID of the identical sequence =cut sub id { my ($self) = @_; return $self->{id}; } =head3 organism (internal) Returns the organism of the identical sequence =cut sub organism { my ($self) = @_; return $self->{organism}; } =head3 function (internal) Returns the function of the identical sequence =cut sub function { my ($self) = @_; return $self->{function}; } =head3 database (internal) Returns the database of the identical sequence =cut sub database { my ($self) = @_; return $self->{database}; } sub score { my ($self) = @_; return $self->{score}; } ############################################################ ############################################################ package Observation::PDB; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{acc} = $dataset->{'acc'}; $self->{evalue} = $dataset->{'evalue'}; $self->{start} = $dataset->{'start'}; $self->{stop} = $dataset->{'stop'}; bless($self,$class); return $self; } =head3 display() displays data stored in best_PDB attribute and in Ontology server for given PDB id =cut sub display{ my ($self,$gd) = @_; my $fid = $self->fig_id; my $dbmaster = DBMaster->new(-database =>'Ontology'); my $acc = $self->acc; my ($pdb_description,$pdb_source,$pdb_ligand); my $pdb_objs = $dbmaster->pdb->get_objects( { 'id' => $acc } ); if(!scalar(@$pdb_objs)){ $pdb_description = "not available"; $pdb_source = "not available"; $pdb_ligand = "not available"; } else{ my $pdb_obj = $pdb_objs->[0]; $pdb_description = $pdb_obj->description; $pdb_source = $pdb_obj->source; $pdb_ligand = $pdb_obj->ligand; } my $lines = []; my $line_data = []; my $line_config = { 'title' => "PDB hit for $fid", 'short_title' => "best PDB", 'basepair_offset' => '1' }; my $fig = new FIG; my $seq = $fig->get_translation($fid); my $fid_stop = length($seq); my $fid_element_hash = { "title" => $fid, "start" => '1', "end" => $fid_stop, "color"=> '1', "zlayer" => '1' }; push(@$line_data,$fid_element_hash); my $links_list = []; my $descriptions = []; my $name; $name = {"title" => 'id', "value" => $acc}; push(@$descriptions,$name); my $description; $description = {"title" => 'pdb description', "value" => $pdb_description}; push(@$descriptions,$description); my $score; $score = {"title" => "score", "value" => $self->evalue}; push(@$descriptions,$score); my $start_stop; my $start_stop_value = $self->start."_".$self->stop; $start_stop = {"title" => "start-stop", "value" => $start_stop_value}; push(@$descriptions,$start_stop); my $source; $source = {"title" => "source", "value" => $pdb_source}; push(@$descriptions,$source); my $ligand; $ligand = {"title" => "pdb ligand", "value" => $pdb_ligand}; push(@$descriptions,$ligand); my $link; my $link_url ="http://www.rcsb.org/pdb/explore/explore.do?structureId=".$acc; $link = {"link_title" => $acc, "link" => $link_url}; push(@$links_list,$link); my $pdb_element_hash = { "title" => "PDB homology", "start" => $self->start, "end" => $self->stop, "color"=> '6', "zlayer" => '3', "links_list" => $links_list, "description" => $descriptions}; push(@$line_data,$pdb_element_hash); $gd->add_line($line_data, $line_config); return $gd; } 1; ############################################################ ############################################################ package Observation::Identical; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{rows} = $dataset->{'rows'}; bless($self,$class); return $self; } =head3 display_table() If available use the function specified here to display the "raw" observation. This code will display a table for the identical protein B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi dence. =cut sub display_table{ my ($self) = @_; my $fig = new FIG; my $fid = $self->fig_id; my $rows = $self->rows; my $cgi = new CGI; my $all_domains = []; my $count_identical = 0; my $content; foreach my $row (@$rows) { my $id = $row->[0]; my $who = $row->[1]; my $assignment = $row->[2]; my $organism = $fig->org_of($id); my $single_domain = []; push(@$single_domain,$who); push(@$single_domain,&HTML::set_prot_links($cgi,$id)); push(@$single_domain,$organism); push(@$single_domain,$assignment); push(@$all_domains,$single_domain); $count_identical++; } if ($count_identical >0){ $content = $all_domains; } else{ $content = "<p>This PEG does not have any essentially identical proteins</p>"; } return ($content); } 1; ######################################### ######################################### package Observation::FC; 1; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{rows} = $dataset->{'rows'}; bless($self,$class); return $self; } =head3 display_table() If available use the function specified here to display the "raw" observation. This code will display a table for the identical protein B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi dence. =cut sub display_table { my ($self,$dataset) = @_; my $fid = $self->fig_id; my $rows = $self->rows; my $cgi = new CGI; my $functional_data = []; my $count = 0; my $content; foreach my $row (@$rows) { my $single_domain = []; $count++; # construct the score link my $score = $row->[0]; my $toid = $row->[1]; my $link = $cgi->url(-relative => 1) . "?user=master&request=show_coupling_evidence&prot=$fid&to=$toid&SPROUT="; my $sc_link = "<a href=$link>$score</a>"; push(@$single_domain,$sc_link); push(@$single_domain,$row->[1]); push(@$single_domain,$row->[2]); push(@$functional_data,$single_domain); } if ($count >0){ $content = $functional_data; } else { $content = "<p>This PEG does not have any functional coupling</p>"; } return ($content); } ######################################### ######################################### package Observation::Domain; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{evalue} = $dataset->{'evalue'}; $self->{acc} = $dataset->{'acc'}; $self->{start} = $dataset->{'start'}; $self->{stop} = $dataset->{'stop'}; bless($self,$class); return $self; } sub display { my ($thing,$gd) = @_; my $lines = []; # my $line_config = { 'title' => $thing->acc, # 'short_title' => $thing->type, # 'basepair_offset' => '1' }; my $color = "4"; my $line_data = []; my $links_list = []; my $descriptions = []; my $db_and_id = $thing->acc; my ($db,$id) = split("::",$db_and_id); my $dbmaster = DBMaster->new(-database =>'Ontology'); my ($name_title,$name_value,$description_title,$description_value); if($db eq "CDD"){ my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } ); if(!scalar(@$cdd_objs)){ $name_title = "name"; $name_value = "not available"; $description_title = "description"; $description_value = "not available"; } else{ my $cdd_obj = $cdd_objs->[0]; $name_title = "name"; $name_value = $cdd_obj->term; $description_title = "description"; $description_value = $cdd_obj->description; } } my $line_config = { 'title' => $thing->acc, 'short_title' => $name_value, 'basepair_offset' => '1' }; my $name; $name = {"title" => $name_title, "value" => $name_value}; push(@$descriptions,$name); my $description; $description = {"title" => $description_title, "value" => $description_value}; push(@$descriptions,$description); my $score; $score = {"title" => "score", "value" => $thing->evalue}; push(@$descriptions,$score); my $link_id; if ($thing->acc =~/\w+::(\d+)/){ $link_id = $1; } my $link; my $link_url; if ($thing->class eq "CDD"){$link_url = "http://0-www.ncbi.nlm.nih.gov.library.vu.edu.au:80/Structure/cdd/cddsrv.cgi?uid=$link_id"} elsif($thing->class eq "PFAM"){$link_url = "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?$link_id"} else{$link_url = "NO_URL"} $link = {"link_title" => $thing->acc, "link" => $link_url}; push(@$links_list,$link); my $element_hash = { "title" => $thing->type, "start" => $thing->start, "end" => $thing->stop, "color"=> $color, "zlayer" => '2', "links_list" => $links_list, "description" => $descriptions}; push(@$line_data,$element_hash); $gd->add_line($line_data, $line_config); return $gd; } sub display_table { my ($self,$dataset) = @_; my $cgi = new CGI; my $data = []; my $count = 0; my $content; foreach my $thing (@$dataset) { next if ($thing->type !~ /dom/); my $single_domain = []; $count++; my $db_and_id = $thing->acc; my ($db,$id) = split("::",$db_and_id); my $dbmaster = DBMaster->new(-database =>'Ontology'); my ($name_title,$name_value,$description_title,$description_value); if($db eq "CDD"){ my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } ); if(!scalar(@$cdd_objs)){ $name_title = "name"; $name_value = "not available"; $description_title = "description"; $description_value = "not available"; } else{ my $cdd_obj = $cdd_objs->[0]; $name_title = "name"; $name_value = $cdd_obj->term; $description_title = "description"; $description_value = $cdd_obj->description; } } my $location = $thing->start . " - " . $thing->stop; push(@$single_domain,$db); push(@$single_domain,$thing->acc); push(@$single_domain,$name_value); push(@$single_domain,$location); push(@$single_domain,$thing->evalue); push(@$single_domain,$description_value); push(@$data,$single_domain); } if ($count >0){ $content = $data; } else { $content = "<p>This PEG does not have any similarities to domains</p>"; } } ######################################### ######################################### package Observation::Location; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{cleavage_prob} = $dataset->{'cleavage_prob'}; $self->{cleavage_loc} = $dataset->{'cleavage_loc'}; $self->{signal_peptide_score} = $dataset->{'signal_peptide_score'}; $self->{cello_location} = $dataset->{'cello_location'}; $self->{cello_score} = $dataset->{'cello_score'}; $self->{tmpred_score} = $dataset->{'tmpred_score'}; $self->{tmpred_locations} = $dataset->{'tmpred_locations'}; $self->{phobius_signal_location} = $dataset->{'phobius_signal_location'}; $self->{phobius_tm_locations} = $dataset->{'phobius_tm_locations'}; bless($self,$class); return $self; } sub display { my ($thing,$gd) = @_; my $fid = $thing->fig_id; my $fig= new FIG; my $length = length($fig->get_translation($fid)); my $cleavage_prob; if($thing->cleavage_prob){$cleavage_prob = $thing->cleavage_prob;} my ($cleavage_loc_begin,$cleavage_loc_end) = split("-",$thing->cleavage_loc); my $signal_peptide_score = $thing->signal_peptide_score; my $cello_location = $thing->cello_location; my $cello_score = $thing->cello_score; my $tmpred_score = $thing->tmpred_score; my @tmpred_locations = split(",",$thing->tmpred_locations); my $phobius_signal_location = $thing->phobius_signal_location; my @phobius_tm_locations = split(",",$thing->phobius_tm_locations); my $lines = []; #color is my $color = "6"; if($cello_location){ my $cello_descriptions = []; my $line_data =[]; my $line_config = { 'title' => 'Localization Evidence', 'short_title' => 'CELLO', 'basepair_offset' => '1' }; my $description_cello_location = {"title" => 'Best Cello Location', "value" => $cello_location}; push(@$cello_descriptions,$description_cello_location); my $description_cello_score = {"title" => 'Cello Score', "value" => $cello_score}; push(@$cello_descriptions,$description_cello_score); my $element_hash = { "title" => "CELLO", "start" => "1", "end" => $length + 1, "color"=> $color, "type" => 'box', "zlayer" => '1', "description" => $cello_descriptions}; push(@$line_data,$element_hash); $gd->add_line($line_data, $line_config); } $color = "2"; if($tmpred_score){ my $line_data =[]; my $line_config = { 'title' => 'Localization Evidence', 'short_title' => 'Transmembrane', 'basepair_offset' => '1' }; foreach my $tmpred (@tmpred_locations){ my $descriptions = []; my ($begin,$end) =split("-",$tmpred); my $description_tmpred_score = {"title" => 'TMPRED score', "value" => $tmpred_score}; push(@$descriptions,$description_tmpred_score); my $element_hash = { "title" => "transmembrane location", "start" => $begin + 1, "end" => $end + 1, "color"=> $color, "zlayer" => '5', "type" => 'smallbox', "description" => $descriptions}; push(@$line_data,$element_hash); } $gd->add_line($line_data, $line_config); } if((scalar(@phobius_tm_locations) > 0) || $phobius_signal_location){ my $line_data =[]; my $line_config = { 'title' => 'Localization Evidence', 'short_title' => 'Phobius', 'basepair_offset' => '1' }; foreach my $tm_loc (@phobius_tm_locations){ my $descriptions = []; my $description_phobius_tm_locations = {"title" => 'Phobius TM Location', "value" => $tm_loc}; push(@$descriptions,$description_phobius_tm_locations); my ($begin,$end) =split("-",$tm_loc); my $element_hash = { "title" => "phobius transmembrane location", "start" => $begin + 1, "end" => $end + 1, "color"=> '6', "zlayer" => '4', "type" => 'bigbox', "description" => $descriptions}; push(@$line_data,$element_hash); } if($phobius_signal_location){ my $descriptions = []; my $description_phobius_signal_location = {"title" => 'Phobius Signal Location', "value" => $phobius_signal_location}; push(@$descriptions,$description_phobius_signal_location); my ($begin,$end) =split("-",$phobius_signal_location); my $element_hash = { "title" => "phobius signal locations", "start" => $begin + 1, "end" => $end + 1, "color"=> '1', "zlayer" => '5', "type" => 'box', "description" => $descriptions}; push(@$line_data,$element_hash); } $gd->add_line($line_data, $line_config); } $color = "1"; if($signal_peptide_score){ my $line_data = []; my $descriptions = []; my $line_config = { 'title' => 'Localization Evidence', 'short_title' => 'SignalP', 'basepair_offset' => '1' }; my $description_signal_peptide_score = {"title" => 'signal peptide score', "value" => $signal_peptide_score}; push(@$descriptions,$description_signal_peptide_score); my $description_cleavage_prob = {"title" => 'cleavage site probability', "value" => $cleavage_prob}; push(@$descriptions,$description_cleavage_prob); my $element_hash = { "title" => "SignalP", "start" => $cleavage_loc_begin - 2, "end" => $cleavage_loc_end + 1, "type" => 'bigbox', "color"=> $color, "zlayer" => '10', "description" => $descriptions}; push(@$line_data,$element_hash); $gd->add_line($line_data, $line_config); } return ($gd); } sub cleavage_loc { my ($self) = @_; return $self->{cleavage_loc}; } sub cleavage_prob { my ($self) = @_; return $self->{cleavage_prob}; } sub signal_peptide_score { my ($self) = @_; return $self->{signal_peptide_score}; } sub tmpred_score { my ($self) = @_; return $self->{tmpred_score}; } sub tmpred_locations { my ($self) = @_; return $self->{tmpred_locations}; } sub cello_location { my ($self) = @_; return $self->{cello_location}; } sub cello_score { my ($self) = @_; return $self->{cello_score}; } sub phobius_signal_location { my ($self) = @_; return $self->{phobius_signal_location}; } sub phobius_tm_locations { my ($self) = @_; return $self->{phobius_tm_locations}; } ######################################### ######################################### package Observation::Sims; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{identity} = $dataset->{'identity'}; $self->{acc} = $dataset->{'acc'}; $self->{evalue} = $dataset->{'evalue'}; $self->{qstart} = $dataset->{'qstart'}; $self->{qstop} = $dataset->{'qstop'}; $self->{hstart} = $dataset->{'hstart'}; $self->{hstop} = $dataset->{'hstop'}; $self->{database} = $dataset->{'database'}; $self->{organism} = $dataset->{'organism'}; $self->{function} = $dataset->{'function'}; $self->{qlength} = $dataset->{'qlength'}; $self->{hlength} = $dataset->{'hlength'}; bless($self,$class); return $self; } =head3 display() If available use the function specified here to display a graphical observation. This code will display a graphical view of the similarities using the genome drawer object =cut sub display { my ($self,$gd) = @_; my $fig = new FIG; my $peg = $self->acc; my $organism = $self->organism; my $genome = $fig->genome_of($peg); my ($org_tax) = ($genome) =~ /(.*)\./; my $function = $self->function; my $abbrev_name = $fig->abbrev($organism); my $align_start = $self->qstart; my $align_stop = $self->qstop; my $hit_start = $self->hstart; my $hit_stop = $self->hstop; my $tax_link = "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=" . $org_tax; my $line_config = { 'title' => "$organism [$org_tax]", 'short_title' => "$abbrev_name", 'title_link' => '$tax_link', 'basepair_offset' => '0' }; my $line_data = []; my $element_hash; my $links_list = []; my $descriptions = []; # get subsystem information my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$peg; my $link; $link = {"link_title" => $peg, "link" => $url_link}; push(@$links_list,$link); my @subsystems = $fig->peg_to_subsystems($peg); foreach my $subsystem (@subsystems){ my $link; $link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem", "link_title" => $subsystem}; push(@$links_list,$link); } my $description_function; $description_function = {"title" => "function", "value" => $function}; push(@$descriptions,$description_function); my ($description_ss, $ss_string); $ss_string = join (",", @subsystems); $description_ss = {"title" => "subsystems", "value" => $ss_string}; push(@$descriptions,$description_ss); my $description_loc; $description_loc = {"title" => "location start", "value" => $hit_start}; push(@$descriptions, $description_loc); $description_loc = {"title" => "location stop", "value" => $hit_stop}; push(@$descriptions, $description_loc); my $evalue = $self->evalue; while ($evalue =~ /-0/) { my ($chunk1, $chunk2) = split(/-/, $evalue); $chunk2 = substr($chunk2,1); $evalue = $chunk1 . "-" . $chunk2; } my $color = &color($evalue); my $description_eval = {"title" => "E-Value", "value" => $evalue}; push(@$descriptions, $description_eval); my $identity = $self->identity; my $description_identity = {"title" => "Identity", "value" => $identity}; push(@$descriptions, $description_identity); $element_hash = { "title" => $peg, "start" => $align_start, "end" => $align_stop, "type"=> 'box', "color"=> $color, "zlayer" => "2", "links_list" => $links_list, "description" => $descriptions }; push(@$line_data,$element_hash); $gd->add_line($line_data, $line_config); return ($gd); } =head3 display_table() If available use the function specified here to display the "raw" observation. This code will display a table for the similarities protein B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evidence. =cut sub display_table { my ($self,$dataset, $columns, $query_fid) = @_; my $data = []; my $count = 0; my $content; my $fig = new FIG; my $cgi = new CGI; my @ids; foreach my $thing (@$dataset) { next if ($thing->class ne "SIM"); push (@ids, $thing->acc); } my (%box_column, %subsystems_column, %evidence_column, %e_identical); foreach my $col (@$columns){ # get the column for the subsystems if ($col eq "subsystem"){ %subsystems_column = &get_subsystems_column(\@ids); } # get the column for the evidence codes elsif ($col eq "evidence"){ %evidence_column = &get_evidence_column(\@ids); } } my %e_identical = &get_essentially_identical($query_fid); foreach my $thing (@$dataset) { next if ($thing->class ne "SIM"); my $single_domain = []; $count++; my $id = $thing->acc; my $iden = $thing->identity; my $ln1 = $thing->qlength; my $ln2 = $thing->hlength; my $b1 = $thing->qstart; my $e1 = $thing->qstop; my $b2 = $thing->hstart; my $e2 = $thing->hstop; my $d1 = abs($e1 - $b1) + 1; my $d2 = abs($e2 - $b2) + 1; my $reg1 = "$b1-$e1 (<b>$d1/$ln1</b>)"; my $reg2 = "$b2-$e2 (<b>$d2/$ln2</b>)"; # checkbox column my $field_name = "tables_" . $id; my $pair_name = "visual_" . $id; my $box_col = qq(<input type=checkbox name=seq value="$id" id="$field_name" onClick="VisualCheckPair('$field_name', '$pair_name');">); # get the linked fig id my $fig_col; if (defined ($e_identical{$id})){ $fig_col = &HTML::set_prot_links($cgi,$id) . "*"; } else{ $fig_col = &HTML::set_prot_links($cgi,$id); } push(@$single_domain,$box_col); # permanent column push(@$single_domain,$fig_col); # permanent column push(@$single_domain,$thing->evalue); # permanent column push(@$single_domain,"$iden\%"); # permanent column push(@$single_domain,$reg1); # permanent column push(@$single_domain,$reg2); # permanent column push(@$single_domain,$thing->organism); # permanent column push(@$single_domain,$thing->function); # permanent column foreach my $col (@$columns){ (push(@$single_domain,$subsystems_column{$id}) && (next)) if ($col eq "subsystem"); (push(@$single_domain,$evidence_column{$id}) && (next)) if ($col eq "evidence"); (push(@$single_domain,&get_prefer($thing->acc, 'NCBI')) && (next)) if ($col eq "ncbi_id"); (push(@$single_domain,&get_prefer($thing->acc, 'RefSeq')) && (next)) if ($col eq "refseq_id"); (push(@$single_domain,&get_prefer($thing->acc, 'SwissProt')) && (next)) if ($col eq "swissprot_id"); (push(@$single_domain,&get_prefer($thing->acc, 'UniProt')) && (next)) if ($col eq "uniprot_id"); (push(@$single_domain,&get_prefer($thing->acc, 'TIGR')) && (next)) if ($col eq "tigr_id"); (push(@$single_domain,&get_prefer($thing->acc, 'PIR')) && (next)) if ($col eq "pir_id"); (push(@$single_domain,&get_prefer($thing->acc, 'KEGG')) && (next)) if ($col eq "kegg_id"); (push(@$single_domain,&get_prefer($thing->acc, 'TrEMBL')) && (next)) if ($col eq "trembl_id"); (push(@$single_domain,&get_prefer($thing->acc, 'ASAP')) && (next)) if ($col eq "asap_id"); (push(@$single_domain,&get_prefer($thing->acc, 'JGI')) && (next)) if ($col eq "jgi_id"); } push(@$data,$single_domain); } if ($count >0 ){ $content = $data; } else{ $content = "<p>This PEG does not have any similarities</p>"; } return ($content); } sub get_box_column{ my ($ids) = @_; my %column; foreach my $id (@$ids){ my $field_name = "tables_" . $id; my $pair_name = "visual_" . $id; $column{$id} = qq(<input type=checkbox name=seq value="$id" id="$field_name" onClick="VisualCheckPair('$field_name', '$pair_name');">); } return (%column); } sub get_subsystems_column{ my ($ids) = @_; my $fig = new FIG; my $cgi = new CGI; my %in_subs = $fig->subsystems_for_pegs($ids); my %column; foreach my $id (@$ids){ my @in_sub = @{$in_subs{$id}} if (defined $in_subs{$id}); my @subsystems; if (@in_sub > 0) { my $count = 1; foreach my $array(@in_sub){ push (@subsystems, $count . ". " . $$array[0]); $count++; } my $in_sub_line = join ("<br>", @subsystems); $column{$id} = $in_sub_line; } else { $column{$id} = " "; } } return (%column); } sub get_essentially_identical{ my ($fid) = @_; my $fig = new FIG; my %id_list; my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid); foreach my $id (@maps_to) { if (($id ne $fid) && ($fig->function_of($id))) { $id_list{$id} = 1; } } return(%id_list); } sub get_evidence_column{ my ($ids) = @_; my $fig = new FIG; my $cgi = new CGI; my (%column, %code_attributes); my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($ids); foreach my $key (@codes){ push (@{$code_attributes{$$key[0]}}, $key); } foreach my $id (@$ids){ # add evidence code with tool tip my $ev_codes=" "; my @ev_codes = ""; if ($id =~ /^fig\|\d+\.\d+\.peg\.\d+$/) { my @codes; @codes = @{$code_attributes{$id}} if (defined @{$code_attributes{$id}}); @ev_codes = (); foreach my $code (@codes) { my $pretty_code = $code->[2]; if ($pretty_code =~ /;/) { my ($cd, $ss) = split(";", $code->[2]); $ss =~ s/_/ /g; $pretty_code = $cd;# . " in " . $ss; } push(@ev_codes, $pretty_code); } } if (scalar(@ev_codes) && $ev_codes[0]) { my $ev_code_help=join("<br />", map {&HTML::evidence_codes_explain($_)} @ev_codes); $ev_codes = $cgi->a( { id=>"evidence_codes", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Evidence Codes', '$ev_code_help', ''); this.tooltip.addHandler(); return false;"}, join("<br />", @ev_codes)); } $column{$id}=$ev_codes; } return (%column); } sub html_enc { $_ = $_[0]; s/\&/&/g; s/\>/>/g; s/\</</g; $_ } sub get_prefer { my ($fid, $db) = @_; my $fig = new FIG; my $cgi = new CGI; my @aliases = $fig->feature_aliases($fid); foreach my $alias (@aliases){ my $id_db = &Observation::get_database($alias); if ($id_db eq $db){ my $acc_col .= &HTML::set_prot_links($cgi,$alias); return ($acc_col); } } return (" "); } sub color { my ($evalue) = @_; my $color; if ($evalue <= 1e-170){ $color = 51; } elsif (($evalue <= 1e-120) && ($evalue > 1e-170)){ $color = 52; } elsif (($evalue <= 1e-90) && ($evalue > 1e-120)){ $color = 53; } elsif (($evalue <= 1e-70) && ($evalue > 1e-90)){ $color = 54; } elsif (($evalue <= 1e-40) && ($evalue > 1e-70)){ $color = 55; } elsif (($evalue <= 1e-20) && ($evalue > 1e-40)){ $color = 56; } elsif (($evalue <= 1e-5) && ($evalue > 1e-20)){ $color = 57; } elsif (($evalue <= 1) && ($evalue > 1e-5)){ $color = 58; } elsif (($evalue <= 10) && ($evalue > 1)){ $color = 59; } else{ $color = 60; } return ($color); } ############################ package Observation::Cluster; use base qw(Observation); sub new { my ($class,$dataset) = @_; my $self = $class->SUPER::new($dataset); $self->{context} = $dataset->{'context'}; bless($self,$class); return $self; } sub display { my ($self,$gd) = @_; my $fid = $self->fig_id; my $compare_or_coupling = $self->context; my $gd_window_size = $gd->window_size; my $fig = new FIG; my $all_regions = []; #get the organism genome my $target_genome = $fig->genome_of($fid); # get location of the gene my $data = $fig->feature_location($fid); my ($contig, $beg, $end); my %reverse_flag; if ($data =~ /(.*)_(\d+)_(\d+)$/){ $contig = $1; $beg = $2; $end = $3; } my $offset; my ($region_start, $region_end); if ($beg < $end) { $region_start = $beg - 4000; $region_end = $end+4000; $offset = ($2+(($3-$2)/2))-($gd_window_size/2); } else { $region_start = $end-4000; $region_end = $beg+4000; $offset = ($3+(($2-$3)/2))-($gd_window_size/2); $reverse_flag{$target_genome} = $fid; } # call genes in region my ($target_gene_features, $reg_beg, $reg_end) = $fig->genes_in_region($target_genome, $contig, $region_start, $region_end); push(@$all_regions,$target_gene_features); my (@start_array_region); push (@start_array_region, $offset); my %all_genes; my %all_genomes; foreach my $feature (@$target_gene_features){ $all_genes{$feature} = $fid;} if ($compare_or_coupling eq "diverse") { my @coup = grep { $_->[1]} $fig->coupling_and_evidence($fid,5000,1e-10,4,1); my $coup_count = 0; foreach my $pair (@{$coup[0]->[2]}) { # last if ($coup_count > 10); my ($peg1,$peg2) = @$pair; my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); $pair_genome = $fig->genome_of($peg1); my $location = $fig->feature_location($peg1); if($location =~/(.*)_(\d+)_(\d+)$/){ $pair_contig = $1; $pair_beg = $2; $pair_end = $3; if ($pair_beg < $pair_end) { $pair_region_start = $pair_beg - 4000; $pair_region_stop = $pair_end+4000; $offset = ($2+(($3-$2)/2))-($gd_window_size/2); } else { $pair_region_start = $pair_end-4000; $pair_region_stop = $pair_beg+4000; $offset = ($3+(($2-$3)/2))-($gd_window_size/2); $reverse_flag{$pair_genome} = $peg1; } push (@start_array_region, $offset); $all_genomes{$pair_genome} = 1; my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); push(@$all_regions,$pair_features); foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = $peg1;} } $coup_count++; } } elsif ($compare_or_coupling eq "close") { # make a hash of genomes that are phylogenetically close #my $close_threshold = ".26"; #my @genomes = $fig->genomes('complete'); #my %close_genomes = (); #foreach my $compared_genome (@genomes) #{ # my $dist = $fig->crude_estimate_of_distance($target_genome,$compared_genome); # #$close_genomes{$compared_genome} = $dist; # if ($dist <= $close_threshold) # { # $all_genomes{$compared_genome} = 1; # } #} $all_genomes{"216592.1"} = 1; $all_genomes{"79967.1"} = 1; $all_genomes{"199310.1"} = 1; $all_genomes{"216593.1"} = 1; $all_genomes{"155864.1"} = 1; $all_genomes{"83334.1"} = 1; $all_genomes{"316407.3"} = 1; foreach my $comp_genome (keys %all_genomes){ my $return = $fig->bbh_list($comp_genome,[$fid]); my $feature_list = $return->{$fid}; foreach my $peg1 (@$feature_list){ my $location = $fig->feature_location($peg1); my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome); $pair_genome = $fig->genome_of($peg1); if($location =~/(.*)_(\d+)_(\d+)$/){ $pair_contig = $1; $pair_beg = $2; $pair_end = $3; if ($pair_beg < $pair_end) { $pair_region_start = $pair_beg - 4000; $pair_region_stop = $pair_end + 4000; $offset = ($2+(($3-$2)/2))-($gd_window_size/2); } else { $pair_region_start = $pair_end-4000; $pair_region_stop = $pair_beg+4000; $offset = ($3+(($2-$3)/2))-($gd_window_size/2); $reverse_flag{$pair_genome} = $peg1; } push (@start_array_region, $offset); $all_genomes{$pair_genome} = 1; my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop); push(@$all_regions,$pair_features); foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = $peg1;} } } } } # get the PCH to each of the genes my $pch_sets = []; my %pch_already; foreach my $gene_peg (keys %all_genes) { if ($pch_already{$gene_peg}){(next);}; my $gene_set = [$gene_peg]; foreach my $pch_peg ($fig->in_pch_pin_with($gene_peg)) { $pch_peg =~ s/,.*$//; my $pch_genome = $fig->genome_of($pch_peg); if ( ($gene_peg ne $pch_peg) && ($all_genomes{$pch_genome})) { push(@$gene_set,$pch_peg); $pch_already{$pch_peg}=1; } $pch_already{$gene_peg}=1; } push(@$pch_sets,$gene_set); } #create a rank of the pch's my %pch_set_rank; my $order = 0; foreach my $set (@$pch_sets){ my $count = scalar(@$set); $pch_set_rank{$order} = $count; $order++; } my %peg_rank; my $counter = 1; foreach my $pch_order (sort {$pch_set_rank{$b} <=> $pch_set_rank{$a}} keys %pch_set_rank){ my $good_set = @$pch_sets[$pch_order]; my $flag_set = 0; if (scalar (@$good_set) > 1) { foreach my $peg (@$good_set){ if ((!$peg_rank{$peg})){ $peg_rank{$peg} = $counter; $flag_set = 1; } } $counter++ if ($flag_set == 1); } else { foreach my $peg (@$good_set){ $peg_rank{$peg} = "20"; } } } # my $bbh_sets = []; # my %already; # foreach my $gene_key (keys(%all_genes)){ # if($already{$gene_key}){(next);} # my $gene_set = [$gene_key]; # # my $gene_key_genome = $fig->genome_of($gene_key); # # foreach my $genome_key (keys(%all_genomes)){ # #(next) if ($gene_key_genome eq $genome_key); # my $return = $fig->bbh_list($genome_key,[$gene_key]); # # my $feature_list = $return->{$gene_key}; # foreach my $fl (@$feature_list){ # push(@$gene_set,$fl); # } # } # $already{$gene_key} = 1; # push(@$bbh_sets,$gene_set); # } # # my %bbh_set_rank; # my $order = 0; # foreach my $set (@$bbh_sets){ # my $count = scalar(@$set); # $bbh_set_rank{$order} = $count; # $order++; # } # # my %peg_rank; # my $counter = 1; # foreach my $bbh_order (sort {$bbh_set_rank{$b} <=> $bbh_set_rank{$a}} keys %bbh_set_rank){ # my $good_set = @$bbh_sets[$bbh_order]; # my $flag_set = 0; # if (scalar (@$good_set) > 1) # { # foreach my $peg (@$good_set){ # if ((!$peg_rank{$peg})){ # $peg_rank{$peg} = $counter; # $flag_set = 1; # } # } # $counter++ if ($flag_set == 1); # } # else # { # foreach my $peg (@$good_set){ # $peg_rank{$peg} = "20"; # } # } # } foreach my $region (@$all_regions){ my $sample_peg = @$region[0]; my $region_genome = $fig->genome_of($sample_peg); my $region_gs = $fig->genus_species($region_genome); my $abbrev_name = $fig->abbrev($region_gs); my $line_config = { 'title' => $region_gs, 'short_title' => $abbrev_name, 'basepair_offset' => '0' }; my $offsetting = shift @start_array_region; my $second_line_config = { 'title' => "$region_gs", 'short_title' => "", 'basepair_offset' => '0' }; my $line_data = []; my $second_line_data = []; # initialize variables to check for overlap in genes my ($prev_start, $prev_stop, $prev_fig, $second_line_flag); my $major_line_flag = 0; my $prev_second_flag = 0; foreach my $fid1 (@$region){ $second_line_flag = 0; my $element_hash; my $links_list = []; my $descriptions = []; my $color = $peg_rank{$fid1}; # get subsystem information my $function = $fig->function_of($fid1); my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$fid1; my $link; $link = {"link_title" => $fid1, "link" => $url_link}; push(@$links_list,$link); my @subsystems = $fig->peg_to_subsystems($fid1); foreach my $subsystem (@subsystems){ my $link; $link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem", "link_title" => $subsystem}; push(@$links_list,$link); } my $description_function; $description_function = {"title" => "function", "value" => $function}; push(@$descriptions,$description_function); my $description_ss; my $ss_string = join (",", @subsystems); $description_ss = {"title" => "subsystems", "value" => $ss_string}; push(@$descriptions,$description_ss); my $fid_location = $fig->feature_location($fid1); if($fid_location =~/(.*)_(\d+)_(\d+)$/){ my($start,$stop); $start = $2 - $offsetting; $stop = $3 - $offsetting; if ( (($prev_start) && ($prev_stop) ) && ( ($start < $prev_start) || ($start < $prev_stop) || ($stop < $prev_start) || ($stop < $prev_stop) )){ if (($second_line_flag == 0) && ($prev_second_flag == 0)) { $second_line_flag = 1; $major_line_flag = 1; } } $prev_start = $start; $prev_stop = $stop; $prev_fig = $fid1; if ((defined($reverse_flag{$region_genome})) && ($reverse_flag{$region_genome} eq $all_genes{$fid1})){ $start = $gd_window_size - $start; $stop = $gd_window_size - $stop; } $element_hash = { "title" => $fid1, "start" => $start, "end" => $stop, "type"=> 'arrow', "color"=> $color, "zlayer" => "2", "links_list" => $links_list, "description" => $descriptions }; # if there is an overlap, put into second line if ($second_line_flag == 1){ push(@$second_line_data,$element_hash); $prev_second_flag = 1;} else{ push(@$line_data,$element_hash); $prev_second_flag = 0;} } } $gd->add_line($line_data, $line_config); $gd->add_line($second_line_data, $second_line_config) if ($major_line_flag == 1); } return $gd; }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |