[Bio] / FigKernelPackages / Observation.pm Repository:
ViewVC logotype

View of /FigKernelPackages/Observation.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (download) (as text) (annotate)
Tue Aug 21 22:45:54 2007 UTC (12 years, 3 months ago) by arodri7
Branch: MAIN
Changes since 1.31: +28 -24 lines
added changes to sims display

package Observation;

use lib '/vol/ontologies';
use DBMaster;

require Exporter;
@EXPORT_OK = qw(get_objects); 

use FIG_Config;
#use strict;
#use warnings;
use HTML;

1;

# $Id: Observation.pm,v 1.32 2007/08/21 22:45:54 arodri7 Exp $

=head1 NAME

Observation -- A presentation layer for observations in SEED.

=head1 DESCRIPTION

The SEED environment contains various sources of information for sequence features. The purpose of this library is to provide a 
single interface to this data.

The data can be used to display information for a given sequence feature (protein or other, but primarily information is computed for proteins). 

=cut

=head1 BACKGROUND

=head2 Data incorporated in the Observations 

As the goal of this library is to provide an integrated view, we combine diverse sources of evidence.

=head3 SEED core evidence

The core SEED data structures provided by FIG.pm. These are Similarities, BBHs and PCHs.

=head3 Attribute based Evidence

We use the SEED attribute infrastructure to store information computed by a variety of computational procedures.

These are e.g. InterPro hits via InterProScan (ipr), NCBI Conserved Domain Database Hits via PSSM(cdd), 
PFAM hits via HMM(pfam), SignalP results(signalp), and various others.

=head1 METHODS

The public methods this package provides are listed below:


=head3 context()

Returns close or diverse for purposes of displaying genomic context

=cut

sub context {
  my ($self) = @_;

  return $self->{context};
}

=head3 rows()

each row in a displayed table

=cut

sub rows {
  my ($self) = @_;

  return $self->{rows};
}

=head3 acc()

A valid accession or remote ID (in the style of a db_xref) or a valid local ID (FID) in case this is supported.

=cut

sub acc {
  my ($self) = @_;
  return $self->{acc};
}

=head3 class()

The class of evidence (required). This is usually simply the name of the tool or the name of the SEED data structure.
B<Please note> the connection of class and display_method and URL.
    
Current valid classes are:

=over 9

=item IDENTICAL (seq)

=item SIM (seq)

=item BBH (seq)

=item PCH (fc)

=item FIGFAM (seq)

=item IPR (dom)

=item CDD (dom)

=item PFAM (dom)

=item SIGNALP_CELLO_TMPRED (loc)

=item PDB (seq)

=item TMHMM (loc)

=item HMMTOP (loc)

=back

=cut

sub class {
  my ($self) = @_;

  return $self->{class};
}

=head3 type()

The type of evidence (required).

Where type is one of the following:

=over 8

=item seq=Sequence similarity

=item dom=domain based match

=item loc=Localization of the feature

=item fc=Functional coupling.

=back

=cut

sub type {
  my ($self) = @_;

  return $self->{type};
}

=head3 start()

Start of hit in query sequence.

=cut

sub start {
  my ($self) = @_;

  return $self->{start};
}

=head3 end()

End of the hit in query sequence.

=cut

sub stop {
  my ($self) = @_;

  return $self->{stop};
}

=head3 start()

Start of hit in query sequence.

=cut

sub qstart {
    my ($self) = @_;

    return $self->{qstart};
}

=head3 qstop()

End of the hit in query sequence.

=cut

sub qstop {
    my ($self) = @_;

    return $self->{qstop};
}

=head3 hstart()

Start of hit in hit sequence.

=cut

sub hstart {
    my ($self) = @_;

    return $self->{hstart};
}

=head3 end()

End of the hit in hit sequence.

=cut

sub hstop {
    my ($self) = @_;

    return $self->{hstop};
}

=head3 qlength()

length of the query sequence in similarities

=cut

sub qlength {
    my ($self) = @_;

    return $self->{qlength};
}

=head3 hlength()

length of the hit sequence in similarities

=cut

sub hlength {
    my ($self) = @_;

    return $self->{hlength};
}

=head3 evalue()

E-value or P-Value if present.

=cut

sub evalue {
  my ($self) = @_;

  return $self->{evalue};
}

=head3 score()

Score if present. 

=cut

sub score {
  my ($self) = @_;
  return $self->{score};
}

=head3 display()

will be different for each type

=cut 

sub display {
  
  die "Abstract Method Called\n";

}

=head3 display_table()

will be different for each type

=cut 

sub display_table {
  
  die "Abstract Table Method Called\n";

}

=head3 get_objects()

This is the B<REAL WORKHORSE> method of this Package.

=cut

sub get_objects {
    my ($self,$fid,$scope) = @_;
    
    my $objects = [];
    my @matched_datasets=();
    my $fig = new FIG;

    # call function that fetches attribute based observations
    # returns an array of arrays of hashes
 
    if($scope){
	get_cluster_observations($fid,\@matched_datasets,$scope);
    }
    else{
	my %domain_classes;
	my @attributes = $fig->get_attributes($fid);
	$domain_classes{'CDD'} = 1;
	#get_identical_proteins($fid,\@matched_datasets);
	get_attribute_based_domain_observations($fid,\%domain_classes,\@matched_datasets,\@attributes);
	get_sims_observations($fid,\@matched_datasets);
	get_functional_coupling($fid,\@matched_datasets);
	get_attribute_based_location_observations($fid,\@matched_datasets,\@attributes);
	get_pdb_observations($fid,\@matched_datasets,\@attributes);
    }
 
    foreach my $dataset (@matched_datasets) {
	my $object;
	if($dataset->{'type'} eq "dom"){
	    $object = Observation::Domain->new($dataset);
	}
	if($dataset->{'class'} eq "PCH"){
            $object = Observation::FC->new($dataset);
        }
	if ($dataset->{'class'} eq "IDENTICAL"){
	    $object = Observation::Identical->new($dataset);
	}
	if ($dataset->{'class'} eq "SIGNALP_CELLO_TMPRED"){
	    $object = Observation::Location->new($dataset);
	}
	if ($dataset->{'class'} eq "SIM"){
            $object = Observation::Sims->new($dataset);
        }
	if ($dataset->{'class'} eq "CLUSTER"){
            $object = Observation::Cluster->new($dataset);
        }
	if ($dataset->{'class'} eq "PDB"){
            $object = Observation::PDB->new($dataset);
        }
	
	push (@$objects, $object);
    }
    
    return $objects;

}

=head3 display_housekeeping
This method returns the housekeeping data for a given peg in a table format

=cut
sub display_housekeeping {
    my ($self,$fid) = @_;
    my $fig = new FIG;
    my $content;

    my $org_name = $fig->org_of($fid);
    my $org_id   = $fig->orgid_of_orgname($org_name);
    my $loc      = $fig->feature_location($fid);
    my($contig, $beg, $end) = $fig->boundaries_of($loc);
    my $strand   = ($beg <= $end)? '+' : '-';
    my @subsystems = $fig->subsystems_for_peg($fid);
    my $function = $fig->function_of($fid);
    my @aliases  = $fig->feature_aliases($fid);
    my $taxonomy = $fig->taxonomy_of($org_id);
    my @ecs = ($function =~ /\(EC\s(\d+\.[-\d+]+\.[-\d+]+\.[-\d+]+)\)/g);

    $content .= qq(<b>General Protein Data</b><br><br><br><table border="0">);
    $content .= qq(<tr width=15%><td >FIG ID</td><td>$fid</td></tr>\n);
    $content .= qq(<tr width=15%><td >Organism Name</td><td>$org_name,&nbsp;&nbsp;$org_id</td></tr>\n);
    $content .= qq(<tr><td width=15%>Taxonomy</td><td>$taxonomy</td></tr>\n);
    $content .= qq(<tr width=15%><td>FIG Organism ID</td><td>$org_id</td></tr>\n);
    $content .= qq(<tr width=15%><td>Gene Location</td><td>Contig $contig [$beg,$end], Strand $strand</td></tr>\n);;
    $content .= qq(<tr width=15%><td>Function</td><td>$function</td></tr>\n);
    if ( @ecs ) {
	$content .= qq(<tr><td>EC:</td><td>);
	foreach my $ec ( @ecs ) {
	    my $ec_name = $fig->ec_name($ec);
	    $content .= join(" -- ", $ec, $ec_name) . "<br>\n";
	}
	$content .= qq(</td></tr>\n);
    }

    if ( @subsystems ) {
	$content .= qq(<tr><td>Subsystems</td><td>);
	foreach my $subsystem ( @subsystems ) {
	    $content .= join(" -- ", @$subsystem) . "<br>\n";
	}
    }

    my %groups;
    if ( @aliases ) {
	# get the db for each alias
	foreach my $alias (@aliases){
	    $groups{$alias} = &get_database($alias);
	}
	
	# group ids by aliases
	my %db_aliases;
	foreach my $key (sort {$groups{$a} cmp $groups{$b}} keys %groups){
	    push (@{$db_aliases{$groups{$key}}}, $key);
	}
	
	
	$content .= qq(<tr><td>Aliases</td><td><table border="0">);
	foreach my $key (sort keys %db_aliases){
	    $content .= qq(<tr><td>$key:</td><td>) . join(", ", @{$db_aliases{$key}}) . qq(</td></tr\n);
	}
	$content .= qq(</td></tr></table>\n);
    }

    $content .= qq(</table><p>\n);

    return ($content);
}

=head3 get_sims_summary
This method uses as input the similarities of a peg and creates a tree view of their taxonomy

=cut

sub get_sims_summary {
    my ($observation, $fid) = @_;
    my $fig = new FIG;
    my %families;
    my @sims= $fig->nsims($fid,20000,10,"all");

    foreach my $sim (@sims){
        next if ($sim->[1] !~ /fig\|/);
        my $genome = $fig->genome_of($sim->[1]);
        my $taxonomy = $fig->taxonomy_of($fig->genome_of($sim->[1]));
        my $parent_tax = "Root";
        foreach my $tax (split(/\; /, $taxonomy)){
            push (@{$families{children}{$parent_tax}}, $tax);
            $families{parent}{$tax} = $parent_tax;
            $parent_tax = $tax;
        }
    }

    foreach my $key (keys %{$families{children}}){
        $families{count}{$key} = @{$families{children}{$key}};

        my %saw;
        my @out = grep(!$saw{$_}++, @{$families{children}{$key}});
        $families{children}{$key} = \@out;
    }
    return (\%families);
}

=head1 Internal Methods 

These methods are not meant to be used outside of this package. 

B<Please do not use them outside of this package!>

=cut

sub get_attribute_based_domain_observations{

    # we read a FIG ID and a reference to an array (of arrays of hashes, see above)
    my ($fid,$domain_classes,$datasets_ref,$attributes_ref) = (@_);

    my $fig = new FIG;
 
    foreach my $attr_ref (@$attributes_ref) {
#    foreach my $attr_ref ($fig->get_attributes($fid)) {
	my $key = @$attr_ref[1];
	my @parts = split("::",$key);
	my $class = $parts[0];
	
	if($domain_classes->{$parts[0]}){
	    my $val = @$attr_ref[2];
	    if($val =~/^(\d+\.\d+|0\.0);(\d+)-(\d+)/){
		my $raw_evalue = $1;
		my $from = $2;
		my $to = $3;
		my $evalue;
		if($raw_evalue =~/(\d+)\.(\d+)/){
		    my $part2 = 1000 - $1;
		    my $part1 = $2/100;
		    $evalue = $part1."e-".$part2;
		}
		else{
		    $evalue = "0.0";
		}

		my $dataset = {'class' => $class,
			       'acc' => $key,
			       'type' => "dom" ,
			       'evalue' => $evalue,
			       'start' => $from,
			       'stop' => $to,
			       'fig_id' => $fid,
			       'score' => $raw_evalue
			       };
		
		push (@{$datasets_ref} ,$dataset);
	    }
	}
    }
}

sub get_attribute_based_location_observations{

    my ($fid,$datasets_ref, $attributes_ref) = (@_);
    my $fig = new FIG;
    
    my $location_attributes = ['SignalP','CELLO','TMPRED','Phobius'];
    
    my $dataset = {'type' => "loc", 
		   'class' => 'SIGNALP_CELLO_TMPRED',
		   'fig_id' => $fid
		   };

    foreach my $attr_ref (@$attributes_ref){
#    foreach my $attr_ref ($fig->get_attributes($fid,$location_attributes)) {
	my $key = @$attr_ref[1];
	next if (($key !~ /SignalP/) && ($key !~ /CELLO/) && ($key !~ /TMPRED/)  && ($key !~/Phobius/) );
	my @parts = split("::",$key);
	my $sub_class = $parts[0];
	my $sub_key = $parts[1];
	my $value = @$attr_ref[2];
	if($sub_class eq "SignalP"){
	    if($sub_key eq "cleavage_site"){
		my @value_parts = split(";",$value);
		$dataset->{'cleavage_prob'} = $value_parts[0];
		$dataset->{'cleavage_loc'} = $value_parts[1];
#		print STDERR "LOC: $value_parts[1]";
	    }
	    elsif($sub_key eq "signal_peptide"){
		$dataset->{'signal_peptide_score'} = $value;
	    }
	}
	
	elsif($sub_class eq "CELLO"){
	    $dataset->{'cello_location'} = $sub_key;
	    $dataset->{'cello_score'} = $value;
	}
	
	elsif($sub_class eq "Phobius"){
	    if($sub_key eq "transmembrane"){
		$dataset->{'phobius_tm_locations'} = $value;
	    }
	    elsif($sub_key eq "signal"){
		$dataset->{'phobius_signal_location'} = $value;
	    }
	}
	
	elsif($sub_class eq "TMPRED"){
	    my @value_parts = split(/\;/,$value);
	    $dataset->{'tmpred_score'} = $value_parts[0];
	    $dataset->{'tmpred_locations'} = $value_parts[1];
	}
    }
    
    push (@{$datasets_ref} ,$dataset);
    
}

=head3 get_pdb_observations() (internal)

This methods sets the type and class for pdb observations

=cut

sub get_pdb_observations{
    my ($fid,$datasets_ref, $attributes_ref) = (@_);
    
    my $fig = new FIG;
    
    foreach my $attr_ref (@$attributes_ref){
    #foreach my $attr_ref ($fig->get_attributes($fid,'PDB')) {
	
	my $key = @$attr_ref[1];
	next if ( ($key !~ /PDB/));
	my($key1,$key2) =split("::",$key);
	my $value = @$attr_ref[2];
	my ($evalue,$location) = split(";",$value);
	
	if($evalue =~/(\d+)\.(\d+)/){
	    my $part2 = 1000 - $1;
	    my $part1 = $2/100;
	    $evalue = $part1."e-".$part2;
	} 

	my($start,$stop) =split("-",$location);

	my $url = @$attr_ref[3];
	my $dataset = {'class' => 'PDB',
		       'type' => 'seq' ,
		       'acc' => $key2,
		       'evalue' => $evalue,
                       'start' => $start,
                       'stop' => $stop,
		       'fig_id' => $fid
		       };

	push (@{$datasets_ref} ,$dataset);
    }
}

=head3 get_cluster_observations() (internal)

This methods sets the type and class for cluster observations

=cut

sub get_cluster_observations{
    my ($fid,$datasets_ref,$scope) = (@_);

    my $dataset = {'class' => 'CLUSTER',
		   'type' => 'fc',
		   'context' => $scope,
		   'fig_id' => $fid
		   };
    push (@{$datasets_ref} ,$dataset);
}


=head3 get_sims_observations() (internal)

This methods retrieves sims fills the internal data structures.

=cut

sub get_sims_observations{

    my ($fid,$datasets_ref) = (@_);
    my $fig = new FIG;
    my @sims= $fig->nsims($fid,500,1e-20,"all");
    my ($dataset);

    my %id_list;
    foreach my $sim (@sims){
	my $hit = $sim->[1];

	next if ($hit !~ /^fig\|/);
	my @aliases = $fig->feature_aliases($hit);
	foreach my $alias (@aliases){
	    $id_list{$alias} = 1;
	}
    }

    my %already;
    my (@new_sims, @uniprot);
    foreach my $sim (@sims){
	my $hit = $sim->[1];
	my ($id) = ($hit) =~ /\|(.*)/;
	next if (defined($already{$id}));
        next if (defined($id_list{$hit}));
	push (@new_sims, $sim);
	$already{$id} = 1;
    }
    
    foreach my $sim (@new_sims){
	my $hit = $sim->[1];
	my $percent = $sim->[2];
	my $evalue = $sim->[10];
	my $qfrom = $sim->[6];
	my $qto = $sim->[7];
	my $hfrom = $sim->[8];
	my $hto = $sim->[9];
	my $qlength = $sim->[12];
	my $hlength = $sim->[13];
	my $db = get_database($hit);
	my $func = $fig->function_of($hit);
	my $organism = $fig->org_of($hit);

	$dataset = {'class' => 'SIM',
		    'acc' => $hit,
		    'identity' => $percent,
		    'type' => 'seq',
		    'evalue' => $evalue,
		    'qstart' => $qfrom,
		    'qstop' => $qto,
		    'hstart' => $hfrom,
                    'hstop' => $hto,
		    'database' => $db,
		    'organism' => $organism,
		    'function' => $func,
		    'qlength' => $qlength,
		    'hlength' => $hlength,
		    'fig_id' => $fid
		    };

	push (@{$datasets_ref} ,$dataset);
    }
}

=head3 get_database (internal)
This method gets the database association from the sequence id

=cut

sub get_database{
    my ($id) = (@_);
    
    my ($db);
    if ($id =~ /^fig\|/)              { $db = "FIG" }
    elsif ($id =~ /^gi\|/)            { $db = "NCBI" }
    elsif ($id =~ /^^[NXYZA]P_/)      { $db = "RefSeq" }
    elsif ($id =~ /^sp\|/)            { $db = "SwissProt" }
    elsif ($id =~ /^uni\|/)           { $db = "UniProt" }
    elsif ($id =~ /^tigr\|/)          { $db = "TIGR" }
    elsif ($id =~ /^pir\|/)           { $db = "PIR" }
    elsif (($id =~ /^kegg\|/) || ($id =~ /Spy/))    { $db = "KEGG" }
    elsif ($id =~ /^tr\|/)                          { $db = "TrEMBL" }
    elsif ($id =~ /^eric\|/)          { $db = "ASAP" }
    elsif ($id =~ /^img\|/)           { $db = "JGI" }

    return ($db);

}


=head3 get_identical_proteins() (internal)

This methods retrieves sims fills the internal data structures.

=cut

sub get_identical_proteins{

    my ($fid,$datasets_ref) = (@_);
    my $fig = new FIG;
    my $funcs_ref;

    my %id_list;
    my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid);
    my @aliases = $fig->feature_aliases($fid);
    foreach my $alias (@aliases){
	$id_list{$alias} = 1;
    }

    foreach my $id (@maps_to) {
        my ($tmp, $who);
        if (($id ne $fid) && ($tmp = $fig->function_of($id)) && (! defined ($id_list{$id}))) {
	    $who = &get_database($id);
            push(@$funcs_ref, [$id,$who,$tmp]);
        }
    }

    my ($dataset);
    my $dataset = {'class' => 'IDENTICAL',
		   'type' => 'seq',
		   'fig_id' => $fid,
		   'rows' => $funcs_ref   
		   };
    
    push (@{$datasets_ref} ,$dataset);
    

}

=head3 get_functional_coupling() (internal)

This methods retrieves the functional coupling of a protein given a peg ID

=cut

sub get_functional_coupling{

    my ($fid,$datasets_ref) = (@_);
    my $fig = new FIG;
    my @funcs = ();

    # initialize some variables
    my($sc,$neigh);

    # set default parameters for coupling and evidence
    my ($bound,$sim_cutoff,$coupling_cutoff) = (5000, 1.0e-10, 4);

    # get the fc data
    my @fc_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,1);

    # retrieve data
    my @rows = map { ($sc,$neigh) = @$_;
		     [$sc,$neigh,scalar $fig->function_of($neigh)]
		  } @fc_data;
		     
    my ($dataset);
    my $dataset = {'class' => 'PCH',
		   'type' => 'fc',
		   'fig_id' => $fid,
		   'rows' => \@rows
		   };
    
    push (@{$datasets_ref} ,$dataset);

}

=head3 new (internal)

Instantiate a new object.

=cut

sub new {
  my ($class,$dataset) = @_;
  
  my $self = { class => $dataset->{'class'},
	       type => $dataset->{'type'},
	       fig_id => $dataset->{'fig_id'},
	       score => $dataset->{'score'},
	   };
  
  bless($self,$class);
  
  return $self;
}

=head3 identity (internal)

Returns the % identity of the similar sequence

=cut

sub identity {
    my ($self) = @_;

    return $self->{identity};
}

=head3 fig_id (internal)

=cut

sub fig_id {
  my ($self) = @_;
  return $self->{fig_id};
}

=head3 feature_id (internal)


=cut

sub feature_id {
  my ($self) = @_;

  return $self->{feature_id};
}

=head3 id (internal)

Returns the ID  of the identical sequence

=cut

sub id {
    my ($self) = @_;

    return $self->{id};
}

=head3 organism (internal)

Returns the organism  of the identical sequence

=cut

sub organism {
    my ($self) = @_;

    return $self->{organism};
}

=head3 function (internal)

Returns the function of the identical sequence

=cut

sub function {
    my ($self) = @_;

    return $self->{function};
}

=head3 database (internal)

Returns the database of the identical sequence

=cut

sub database {
    my ($self) = @_;

    return $self->{database};
}

sub score {
  my ($self) = @_;

  return $self->{score};
}

############################################################
############################################################
package Observation::PDB;

use base qw(Observation);

sub new {
    
    my ($class,$dataset) = @_; 
    my $self = $class->SUPER::new($dataset);
    $self->{acc} = $dataset->{'acc'};
    $self->{evalue} = $dataset->{'evalue'};
    $self->{start} = $dataset->{'start'};
    $self->{stop} = $dataset->{'stop'};
    bless($self,$class);
    return $self;
}

=head3 display()

displays data stored in best_PDB attribute and in Ontology server for given PDB id

=cut

sub display{
    my ($self,$gd) = @_;

    my $fid = $self->fig_id;
    my $dbmaster = DBMaster->new(-database =>'Ontology');
    
    my $acc = $self->acc;
  
    my ($pdb_description,$pdb_source,$pdb_ligand);
    my $pdb_objs = $dbmaster->pdb->get_objects( { 'id' => $acc } );
    if(!scalar(@$pdb_objs)){
	$pdb_description = "not available";
	$pdb_source = "not available";
	$pdb_ligand = "not available";
    }
    else{
	my $pdb_obj = $pdb_objs->[0];
	$pdb_description = $pdb_obj->description;
	$pdb_source = $pdb_obj->source;
	$pdb_ligand = $pdb_obj->ligand;
    }

    my $lines = [];
    my $line_data = [];
    my $line_config = { 'title' => "PDB hit for $fid",
			'short_title' => "best PDB",
			'basepair_offset' => '1' };

    my $fig = new FIG;
    my $seq = $fig->get_translation($fid);
    my $fid_stop = length($seq);

    my $fid_element_hash = {
	"title" => $fid,
	"start" => '1',
	"end" =>  $fid_stop,
	"color"=> '1',
	"zlayer" => '1'
	};
    
    push(@$line_data,$fid_element_hash);
    
    my $links_list = [];
    my $descriptions = [];

    my $name;
    $name = {"title" => 'id',
	     "value" => $acc};
    push(@$descriptions,$name);

    my $description;
    $description = {"title" => 'pdb description',
		    "value" => $pdb_description};
    push(@$descriptions,$description);
    
    my $score;
    $score = {"title" => "score",
	      "value" => $self->evalue};
    push(@$descriptions,$score);

    my $start_stop;
    my $start_stop_value = $self->start."_".$self->stop; 
    $start_stop = {"title" => "start-stop",
		   "value" => $start_stop_value};
    push(@$descriptions,$start_stop);
        
    my $source;
    $source = {"title" => "source",
	      "value" => $pdb_source};
    push(@$descriptions,$source);

    my $ligand;
    $ligand = {"title" => "pdb ligand",
	       "value" => $pdb_ligand};
    push(@$descriptions,$ligand);
 
    my $link;
    my $link_url ="http://www.rcsb.org/pdb/explore/explore.do?structureId=".$acc;
    
    $link = {"link_title" => $acc,
	     "link" => $link_url};
    push(@$links_list,$link);
    
    my $pdb_element_hash = {
	"title" => "PDB homology",
	"start" => $self->start,
	"end" =>  $self->stop,
	"color"=> '6',
	"zlayer" => '3',
	"links_list" => $links_list,
	"description" => $descriptions};
    
    push(@$line_data,$pdb_element_hash);
    $gd->add_line($line_data, $line_config);

    return $gd;
}

1;

############################################################
############################################################
package Observation::Identical;

use base qw(Observation);

sub new {
    
    my ($class,$dataset) = @_; 
    my $self = $class->SUPER::new($dataset);
    $self->{rows} = $dataset->{'rows'};
    
    bless($self,$class);
    return $self;
}

=head3 display_table()

If available use the function specified here to display the "raw" observation.
This code will display a table for the identical protein


B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi
dence.

=cut


sub display_table{
    my ($self) = @_;
    
    my $fig = new FIG;
    my $fid = $self->fig_id;
    my $rows = $self->rows;
    my $cgi = new CGI;
    my $all_domains = [];
    my $count_identical = 0;
    my $content;
    foreach my $row (@$rows) {
	my $id = $row->[0];
	my $who = $row->[1];
	my $assignment = $row->[2];
	my $organism = $fig->org_of($id);
        my $single_domain = [];
        push(@$single_domain,$who);
	push(@$single_domain,&HTML::set_prot_links($cgi,$id));
        push(@$single_domain,$organism);
	push(@$single_domain,$assignment);
        push(@$all_domains,$single_domain);
	$count_identical++;
    }

    if ($count_identical >0){
        $content = $all_domains;
    }
    else{
        $content = "<p>This PEG does not have any essentially identical proteins</p>";
    }
    return ($content);
}

1;

#########################################
#########################################
package Observation::FC;
1;

use base qw(Observation);

sub new {
    
    my ($class,$dataset) = @_; 
    my $self = $class->SUPER::new($dataset);
    $self->{rows} = $dataset->{'rows'};
    
    bless($self,$class);
    return $self;
}

=head3 display_table()

If available use the function specified here to display the "raw" observation.
This code will display a table for the identical protein


B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evi
dence.

=cut

sub display_table {

    my ($self,$dataset) = @_;
    my $fid = $self->fig_id;
    my $rows = $self->rows;
    my $cgi = new CGI;
    my $functional_data = [];
    my $count = 0;
    my $content;

    foreach my $row (@$rows) {
	my $single_domain = [];
	$count++;

	# construct the score link
	my $score = $row->[0];
	my $toid = $row->[1];
	my $link = $cgi->url(-relative => 1) . "?user=master&request=show_coupling_evidence&prot=$fid&to=$toid&SPROUT=";
	my $sc_link = "<a href=$link>$score</a>";

	push(@$single_domain,$sc_link);
	push(@$single_domain,$row->[1]);
	push(@$single_domain,$row->[2]);
	push(@$functional_data,$single_domain);
    }

    if ($count >0){
	$content = $functional_data;
    }
    else
    {
	$content = "<p>This PEG does not have any functional coupling</p>";
    }
    return ($content);
}


#########################################
#########################################
package Observation::Domain;

use base qw(Observation);

sub new {
    
    my ($class,$dataset) = @_; 
    my $self = $class->SUPER::new($dataset);
    $self->{evalue} = $dataset->{'evalue'};
    $self->{acc} = $dataset->{'acc'};
    $self->{start} = $dataset->{'start'};
    $self->{stop} = $dataset->{'stop'};
    
    bless($self,$class);
    return $self;
}

sub display {
    my ($thing,$gd) = @_;
    my $lines = [];
#    my $line_config = { 'title' => $thing->acc,
#			'short_title' => $thing->type,
#			'basepair_offset' => '1' };
    my $color = "4";
    
    my $line_data = [];
    my $links_list = [];
    my $descriptions = [];

    my $db_and_id = $thing->acc;
    my ($db,$id) = split("::",$db_and_id);
    
    my $dbmaster = DBMaster->new(-database =>'Ontology');
    
    my ($name_title,$name_value,$description_title,$description_value);
    if($db eq "CDD"){
	my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } );
	if(!scalar(@$cdd_objs)){
	    $name_title = "name";
	    $name_value = "not available";
	    $description_title = "description";
	    $description_value = "not available";
	}
	else{
	    my $cdd_obj = $cdd_objs->[0];
	    $name_title = "name";
	    $name_value = $cdd_obj->term;
	    $description_title = "description";
	    $description_value = $cdd_obj->description;
	}
    }

    my $line_config = { 'title' => $thing->acc,
                        'short_title' => $name_value,
                        'basepair_offset' => '1' };
    
    my $name;
    $name = {"title" => $name_title,
	     "value" => $name_value};
    push(@$descriptions,$name);

    my $description;
    $description = {"title" => $description_title,
			     "value" => $description_value};
    push(@$descriptions,$description);
    
    my $score;
    $score = {"title" => "score",
	      "value" => $thing->evalue};
    push(@$descriptions,$score);
    
    my $link_id;
    if ($thing->acc =~/\w+::(\d+)/){
	$link_id = $1;
    }
    
    my $link;
    my $link_url;
    if ($thing->class eq "CDD"){$link_url = "http://0-www.ncbi.nlm.nih.gov.library.vu.edu.au:80/Structure/cdd/cddsrv.cgi?uid=$link_id"}
    elsif($thing->class eq "PFAM"){$link_url = "http://www.sanger.ac.uk/cgi-bin/Pfam/getacc?$link_id"}
    else{$link_url = "NO_URL"}
    
    $link = {"link_title" => $thing->acc,
	     "link" => $link_url};
    push(@$links_list,$link);
    
    my $element_hash = {
	"title" => $thing->type,
	"start" => $thing->start,
	"end" =>  $thing->stop,
	"color"=> $color,
	"zlayer" => '2',
	"links_list" => $links_list,
	"description" => $descriptions};
    
    push(@$line_data,$element_hash);
    $gd->add_line($line_data, $line_config);
    
    return $gd;

}

sub display_table {
    my ($self,$dataset) = @_;
    my $cgi = new CGI;
    my $data = [];
    my $count = 0;
    my $content;

    foreach my $thing (@$dataset) {
	next if ($thing->type !~ /dom/);
        my $single_domain = [];
	$count++;

	my $db_and_id = $thing->acc;
	my ($db,$id) = split("::",$db_and_id);

	my $dbmaster = DBMaster->new(-database =>'Ontology');

	my ($name_title,$name_value,$description_title,$description_value);
	if($db eq "CDD"){
	    my $cdd_objs = $dbmaster->cdd->get_objects( { 'id' => $id } );
	    if(!scalar(@$cdd_objs)){
		$name_title = "name";
		$name_value = "not available";
		$description_title = "description";
		$description_value = "not available";
	    }
	    else{
		my $cdd_obj = $cdd_objs->[0];
		$name_title = "name";
		$name_value = $cdd_obj->term;
		$description_title = "description";
		$description_value = $cdd_obj->description;
	    }
	}

	my $location =  $thing->start . " - " . $thing->stop;
	
        push(@$single_domain,$db);
	push(@$single_domain,$thing->acc);
	push(@$single_domain,$name_value);
	push(@$single_domain,$location);
	push(@$single_domain,$thing->evalue);
        push(@$single_domain,$description_value);
        push(@$data,$single_domain);
    }

    if ($count >0){
        $content = $data;
    }
    else
    {
        $content = "<p>This PEG does not have any similarities to domains</p>";
    }
}

 
#########################################
#########################################
package Observation::Location;

use base qw(Observation);

sub new {

    my ($class,$dataset) = @_; 
    my $self = $class->SUPER::new($dataset);
    $self->{cleavage_prob} = $dataset->{'cleavage_prob'};
    $self->{cleavage_loc} = $dataset->{'cleavage_loc'};
    $self->{signal_peptide_score} = $dataset->{'signal_peptide_score'};
    $self->{cello_location} = $dataset->{'cello_location'};
    $self->{cello_score} = $dataset->{'cello_score'};	
    $self->{tmpred_score} = $dataset->{'tmpred_score'};
    $self->{tmpred_locations} = $dataset->{'tmpred_locations'};	
    $self->{phobius_signal_location} = $dataset->{'phobius_signal_location'};	
    $self->{phobius_tm_locations} = $dataset->{'phobius_tm_locations'};	
    
    bless($self,$class);
    return $self;
}

sub display {
    my ($thing,$gd) = @_;
    
    my $fid = $thing->fig_id;
    my $fig= new FIG;
    my $length = length($fig->get_translation($fid));
    
    my $cleavage_prob;
    if($thing->cleavage_prob){$cleavage_prob = $thing->cleavage_prob;}
    my ($cleavage_loc_begin,$cleavage_loc_end) = split("-",$thing->cleavage_loc);
    my $signal_peptide_score = $thing->signal_peptide_score;
    my $cello_location = $thing->cello_location;
    my $cello_score = $thing->cello_score;
    my $tmpred_score = $thing->tmpred_score;
    my @tmpred_locations = split(",",$thing->tmpred_locations);
    
    my $phobius_signal_location = $thing->phobius_signal_location;
    my @phobius_tm_locations = split(",",$thing->phobius_tm_locations);
 
    my $lines = [];
    
    #color is 
    my $color = "6";
    
    if($cello_location){
	my $cello_descriptions = [];
	my $line_data =[];

	my $line_config = { 'title' => 'Localization Evidence',
			    'short_title' => 'CELLO',
			    'basepair_offset' => '1' };

	my $description_cello_location = {"title" => 'Best Cello Location',
					  "value" => $cello_location};
	
	push(@$cello_descriptions,$description_cello_location);
	
	my $description_cello_score = {"title" => 'Cello Score',
				       "value" => $cello_score};
    
	push(@$cello_descriptions,$description_cello_score);
    
	my $element_hash = {
	    "title" => "CELLO",
	    "start" => "1",
	    "end" =>  $length + 1,
	    "color"=> $color,
	    "type" => 'box',
	    "zlayer" => '1',
	    "description" => $cello_descriptions};
	
	push(@$line_data,$element_hash);
	$gd->add_line($line_data, $line_config);
    }
    
    
    $color = "2";
    if($tmpred_score){
	my $line_data =[];
	my $line_config = { 'title' => 'Localization Evidence',
			    'short_title' => 'Transmembrane',
			    'basepair_offset' => '1' };


	foreach my $tmpred (@tmpred_locations){
	    my $descriptions = [];
	    my ($begin,$end) =split("-",$tmpred);
	    my $description_tmpred_score = {"title" => 'TMPRED score',
			     "value" => $tmpred_score};
	
	    push(@$descriptions,$description_tmpred_score);
	    
	    my $element_hash = {
	    "title" => "transmembrane location",
	    "start" => $begin + 1,
	    "end" =>  $end + 1,
	    "color"=> $color,
	    "zlayer" => '5',
	    "type" => 'smallbox',
	    "description" => $descriptions};
	    
	    push(@$line_data,$element_hash);

	}
	$gd->add_line($line_data, $line_config);
    }

    if((scalar(@phobius_tm_locations) > 0) || $phobius_signal_location){
	my $line_data =[];
	my $line_config = { 'title' => 'Localization Evidence',
			    'short_title' => 'Phobius',
			    'basepair_offset' => '1' };

	foreach my $tm_loc (@phobius_tm_locations){
	    my $descriptions = [];
	    my $description_phobius_tm_locations = {"title" => 'Phobius TM Location',
			     "value" => $tm_loc};
	    push(@$descriptions,$description_phobius_tm_locations);
	    
	    my ($begin,$end) =split("-",$tm_loc);
	    
	    my $element_hash = {
	    "title" => "phobius transmembrane location",
	    "start" => $begin + 1,
	    "end" =>  $end + 1,
	    "color"=> '6',
	    "zlayer" => '4',
	    "type" => 'bigbox',
	    "description" => $descriptions};
	    
	    push(@$line_data,$element_hash);

	}
	
	if($phobius_signal_location){
	    my $descriptions = [];
	    my $description_phobius_signal_location = {"title" => 'Phobius Signal Location',
			     "value" => $phobius_signal_location};
	    push(@$descriptions,$description_phobius_signal_location);
	    

	    my ($begin,$end) =split("-",$phobius_signal_location);
	    my $element_hash = {
	    "title" => "phobius signal locations",
	    "start" => $begin + 1,
	    "end" =>  $end + 1,
	    "color"=> '1',
	    "zlayer" => '5',
	    "type" => 'box',
	    "description" => $descriptions};
	    push(@$line_data,$element_hash);
	}
	
	$gd->add_line($line_data, $line_config);
    }


    $color = "1";
    if($signal_peptide_score){
	my $line_data = [];
	my $descriptions = [];

	my $line_config = { 'title' => 'Localization Evidence',
			    'short_title' => 'SignalP',
			    'basepair_offset' => '1' };

	my $description_signal_peptide_score = {"title" => 'signal peptide score',
						"value" => $signal_peptide_score};
	
	push(@$descriptions,$description_signal_peptide_score);

	my $description_cleavage_prob = {"title" => 'cleavage site probability',
					 "value" => $cleavage_prob};
	
	push(@$descriptions,$description_cleavage_prob);
	    
	my $element_hash = {
	    "title" => "SignalP",
	    "start" => $cleavage_loc_begin - 2,
	    "end" =>  $cleavage_loc_end + 1,
	    "type" => 'bigbox',
	    "color"=> $color,
	    "zlayer" => '10',
	    "description" => $descriptions};
	
	push(@$line_data,$element_hash);
	$gd->add_line($line_data, $line_config);
    }
    
    return ($gd);

}

sub cleavage_loc {
  my ($self) = @_;

  return $self->{cleavage_loc};
}

sub cleavage_prob {
  my ($self) = @_;

  return $self->{cleavage_prob};
}

sub signal_peptide_score {
  my ($self) = @_;

  return $self->{signal_peptide_score};
}

sub tmpred_score {
  my ($self) = @_;

  return $self->{tmpred_score};
}

sub tmpred_locations {
  my ($self) = @_;

  return $self->{tmpred_locations};
}

sub cello_location {
  my ($self) = @_;

  return $self->{cello_location};
}

sub cello_score {
  my ($self) = @_;

  return $self->{cello_score};
}

sub phobius_signal_location {
  my ($self) = @_;
  return $self->{phobius_signal_location};
}

sub phobius_tm_locations {
  my ($self) = @_;
  return $self->{phobius_tm_locations};
}



#########################################
#########################################
package Observation::Sims;

use base qw(Observation);

sub new {

    my ($class,$dataset) = @_;
    my $self = $class->SUPER::new($dataset);
    $self->{identity} = $dataset->{'identity'};
    $self->{acc} = $dataset->{'acc'};
    $self->{evalue} = $dataset->{'evalue'};
    $self->{qstart} = $dataset->{'qstart'};
    $self->{qstop} = $dataset->{'qstop'};
    $self->{hstart} = $dataset->{'hstart'};
    $self->{hstop} = $dataset->{'hstop'};
    $self->{database} = $dataset->{'database'};
    $self->{organism} = $dataset->{'organism'};
    $self->{function} = $dataset->{'function'};
    $self->{qlength} = $dataset->{'qlength'};
    $self->{hlength} = $dataset->{'hlength'};

    bless($self,$class);
    return $self;
}

=head3 display()

If available use the function specified here to display a graphical observation.
This code will display a graphical view of the similarities using the genome drawer object

=cut

sub display {
    my ($self,$gd) = @_;
    
    my $fig = new FIG;
    my $peg = $self->acc;

    my $organism = $self->organism;
    my $genome = $fig->genome_of($peg);
    my ($org_tax) = ($genome) =~ /(.*)\./;
    my $function = $self->function;
    my $abbrev_name = $fig->abbrev($organism);
    my $align_start = $self->qstart;
    my $align_stop = $self->qstop;
    my $hit_start = $self->hstart;
    my $hit_stop = $self->hstop;

    my $tax_link = "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=" . $org_tax;

    my $line_config = { 'title' => "$organism [$org_tax]",
			'short_title' => "$abbrev_name",
			'title_link' => '$tax_link',
			'basepair_offset' => '0'
			};
    
    my $line_data = [];
    
    my $element_hash;
    my $links_list = [];
    my $descriptions = [];
    
    # get subsystem information
    my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$peg;
    
    my $link;
    $link = {"link_title" => $peg,
	     "link" => $url_link};
    push(@$links_list,$link);
    
    my @subsystems = $fig->peg_to_subsystems($peg);
    foreach my $subsystem (@subsystems){
	my $link;
	$link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem",
		 "link_title" => $subsystem};
	push(@$links_list,$link);
    }
    
    my $description_function;
    $description_function = {"title" => "function",
			     "value" => $function};
    push(@$descriptions,$description_function);
    
    my ($description_ss, $ss_string);
    $ss_string = join (",", @subsystems);
    $description_ss = {"title" => "subsystems",
		       "value" => $ss_string};
    push(@$descriptions,$description_ss);
    
    my $description_loc;
    $description_loc = {"title" => "location start",
			"value" => $hit_start};
    push(@$descriptions, $description_loc);
    
    $description_loc = {"title" => "location stop",
			"value" => $hit_stop};
    push(@$descriptions, $description_loc);
    
    my $evalue = $self->evalue;
    while ($evalue =~ /-0/)
    {
	my ($chunk1, $chunk2) = split(/-/, $evalue);
	$chunk2 = substr($chunk2,1);
	$evalue = $chunk1 . "-" . $chunk2;
    }
    
    my $color = &color($evalue);
    
    my $description_eval = {"title" => "E-Value",
			    "value" => $evalue};
    push(@$descriptions, $description_eval);
    
    my $identity = $self->identity;
    my $description_identity = {"title" => "Identity",
				"value" => $identity};
    push(@$descriptions, $description_identity);
    
    $element_hash = {
	"title" => $peg,
	"start" => $align_start,
	"end" =>  $align_stop,
	"type"=> 'box',
	"color"=> $color,
	"zlayer" => "2",
	"links_list" => $links_list,
	"description" => $descriptions
	};
    push(@$line_data,$element_hash);
    $gd->add_line($line_data, $line_config);
    
    return ($gd);
    
}

=head3 display_table()

If available use the function specified here to display the "raw" observation.
This code will display a table for the similarities protein

B<Please note> that URL linked to in display_method() is an external component and needs to added to the code for every class of evidence.

=cut

sub display_table {
    my ($self,$dataset, $columns, $query_fid) = @_;
    
    my $data = [];
    my $count = 0;
    my $content;
    my $fig = new FIG;
    my $cgi = new CGI;
    my @ids;
    foreach my $thing (@$dataset) {
	next if ($thing->class ne "SIM");
	push (@ids, $thing->acc);
    }

    my (%box_column, %subsystems_column, %evidence_column, %e_identical);
    foreach my $col (@$columns){
	# get the column for the subsystems
	if ($col eq "subsystem"){
	    %subsystems_column = &get_subsystems_column(\@ids);
	}
	# get the column for the evidence codes
	elsif ($col eq "evidence"){
	    %evidence_column = &get_evidence_column(\@ids);
	}
    }

    my %e_identical = &get_essentially_identical($query_fid);

    foreach my $thing (@$dataset) {
        next if ($thing->class ne "SIM");
        my $single_domain = [];
        $count++;

	my $id = $thing->acc;

        my $iden    = $thing->identity;
        my $ln1     = $thing->qlength;
        my $ln2     = $thing->hlength;
        my $b1      = $thing->qstart;
        my $e1      = $thing->qstop;
        my $b2      = $thing->hstart;
        my $e2      = $thing->hstop;
        my $d1      = abs($e1 - $b1) + 1;
        my $d2      = abs($e2 - $b2) + 1;
        my $reg1    = "$b1-$e1 (<b>$d1/$ln1</b>)";
        my $reg2    = "$b2-$e2 (<b>$d2/$ln2</b>)";

	# checkbox column
	my $field_name = "tables_" . $id;
        my $pair_name = "visual_" . $id;
	my $box_col = qq(<input type=checkbox name=seq value="$id" id="$field_name" onClick="VisualCheckPair('$field_name', '$pair_name');">);
	
	# get the linked fig id
	my $fig_col;
	if (defined ($e_identical{$id})){
	    $fig_col = &HTML::set_prot_links($cgi,$id) . "*";
	}
	else{	    
	    $fig_col = &HTML::set_prot_links($cgi,$id);
	}

	push(@$single_domain,$box_col);                        # permanent column
        push(@$single_domain,$fig_col);                        # permanent column
	push(@$single_domain,$thing->evalue);                  # permanent column
	push(@$single_domain,"$iden\%");                       # permanent column
        push(@$single_domain,$reg1);                           # permanent column
        push(@$single_domain,$reg2);                           # permanent column
	push(@$single_domain,$thing->organism);                # permanent column
	push(@$single_domain,$thing->function);                # permanent column
	foreach my $col (@$columns){
	    (push(@$single_domain,$subsystems_column{$id}) && (next)) if ($col eq "subsystem");
	    (push(@$single_domain,$evidence_column{$id}) && (next)) if ($col eq "evidence");
	    (push(@$single_domain,&get_prefer($thing->acc, 'NCBI')) && (next)) if ($col eq "ncbi_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'RefSeq')) && (next)) if ($col eq "refseq_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'SwissProt')) && (next)) if ($col eq "swissprot_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'UniProt')) && (next)) if ($col eq "uniprot_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'TIGR')) && (next)) if ($col eq "tigr_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'PIR')) && (next)) if ($col eq "pir_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'KEGG')) && (next)) if ($col eq "kegg_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'TrEMBL')) && (next)) if ($col eq "trembl_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'ASAP')) && (next)) if ($col eq "asap_id");
	    (push(@$single_domain,&get_prefer($thing->acc, 'JGI')) && (next)) if ($col eq "jgi_id");
	}
        push(@$data,$single_domain);
    }

    if ($count >0 ){
	$content = $data;
    }
    else{
        $content = "<p>This PEG does not have any similarities</p>";
    }
    return ($content);
}

sub get_box_column{
    my ($ids) = @_;
    my %column;
    foreach my $id (@$ids){
        my $field_name = "tables_" . $id;
        my $pair_name = "visual_" . $id;
	$column{$id} = qq(<input type=checkbox name=seq value="$id" id="$field_name" onClick="VisualCheckPair('$field_name', '$pair_name');">);
    }
    return (%column);
}

sub get_subsystems_column{
    my ($ids) = @_;

    my $fig = new FIG;
    my $cgi = new CGI;
    my %in_subs  = $fig->subsystems_for_pegs($ids);
    my %column;
    foreach my $id (@$ids){
	my @in_sub = @{$in_subs{$id}} if (defined $in_subs{$id});
	my @subsystems;
	
        if (@in_sub > 0) {
	    my $count = 1;
	    foreach my $array(@in_sub){
		push (@subsystems, $count . ". " . $$array[0]);
		$count++;
	    }
            my $in_sub_line = join ("<br>", @subsystems);
	    $column{$id} = $in_sub_line;
        } else {
            $column{$id} = "&nbsp;";
        }
    }
    return (%column);
}

sub get_essentially_identical{
    my ($fid) = @_;
    my $fig = new FIG;

    my %id_list;
    my @maps_to = grep { $_ ne $fid and $_ !~ /^xxx/ } map { $_->[0] } $fig->mapped_prot_ids($fid);

    foreach my $id (@maps_to) {
        if (($id ne $fid) && ($fig->function_of($id))) {
	    $id_list{$id} = 1;
        }
    }
    return(%id_list);
}


sub get_evidence_column{
    my ($ids) = @_;
    my $fig = new FIG;
    my $cgi = new CGI;
    my (%column, %code_attributes);

    my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($ids);
    foreach my $key (@codes){
        push (@{$code_attributes{$$key[0]}}, $key);
    }

    foreach my $id (@$ids){
	# add evidence code with tool tip
        my $ev_codes=" &nbsp; ";
        my @ev_codes = "";
	
        if ($id =~ /^fig\|\d+\.\d+\.peg\.\d+$/) {
            my @codes;
            @codes = @{$code_attributes{$id}} if (defined @{$code_attributes{$id}});
            @ev_codes = ();
            foreach my $code (@codes) {
                my $pretty_code = $code->[2];
                if ($pretty_code =~ /;/) {
                    my ($cd, $ss) = split(";", $code->[2]);
                    $ss =~ s/_/ /g;
                    $pretty_code = $cd;# . " in " . $ss;
                }
                push(@ev_codes, $pretty_code);
            }
        }

        if (scalar(@ev_codes) && $ev_codes[0]) {
            my $ev_code_help=join("<br />", map {&HTML::evidence_codes_explain($_)} @ev_codes);
            $ev_codes = $cgi->a(
                                {
                                    id=>"evidence_codes", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Evidence Codes', '$ev_code_help', ''); this.tooltip.addHandler(); return false;"}, join("<br />", @ev_codes));
        }
	$column{$id}=$ev_codes;
    }
    return (%column);
}

sub html_enc { $_ = $_[0]; s/\&/&amp;/g; s/\>/&gt;/g; s/\</&lt;/g; $_ }

sub get_prefer {
    my ($fid, $db) = @_;
    my $fig = new FIG;
    my $cgi = new CGI;

    my @aliases = $fig->feature_aliases($fid);

    foreach my $alias (@aliases){
	my $id_db = &Observation::get_database($alias);
	if ($id_db eq $db){
	    my $acc_col .= &HTML::set_prot_links($cgi,$alias);
	    return ($acc_col);
	}
    }
    return (" ");
}

sub color {
    my ($evalue) = @_;
    
    my $color;
    if ($evalue <= 1e-170){
        $color = 51;
    }
    elsif (($evalue <= 1e-120) && ($evalue > 1e-170)){
        $color = 52;
    }
    elsif (($evalue <= 1e-90) && ($evalue > 1e-120)){
        $color = 53;
    }
    elsif (($evalue <= 1e-70) && ($evalue > 1e-90)){
        $color = 54;
    }
    elsif (($evalue <= 1e-40) && ($evalue > 1e-70)){
        $color = 55;
    }
    elsif (($evalue <= 1e-20) && ($evalue > 1e-40)){
        $color = 56;
    }
    elsif (($evalue <= 1e-5) && ($evalue > 1e-20)){
        $color = 57;
    }
    elsif (($evalue <= 1) && ($evalue > 1e-5)){
        $color = 58;
    }
    elsif (($evalue <= 10) && ($evalue > 1)){
        $color = 59;
    }
    else{
        $color = 60;
    }


    return ($color);
}


############################
package Observation::Cluster;

use base qw(Observation);

sub new {

    my ($class,$dataset) = @_;
    my $self = $class->SUPER::new($dataset);
    $self->{context} = $dataset->{'context'}; 
    bless($self,$class);
    return $self;
}

sub display {
    my ($self,$gd) = @_;
    
    my $fid = $self->fig_id;
    my $compare_or_coupling = $self->context;
    my $gd_window_size = $gd->window_size;
    my $fig = new FIG;
    my $all_regions = [];

    #get the organism genome
    my $target_genome = $fig->genome_of($fid);

    # get location of the gene
    my $data = $fig->feature_location($fid);
    my ($contig, $beg, $end);
    my %reverse_flag;

    if ($data =~ /(.*)_(\d+)_(\d+)$/){
	$contig = $1;
	$beg = $2;
	$end = $3;
    }

    my $offset;
    my ($region_start, $region_end);
    if ($beg < $end)
    {
	$region_start = $beg - 4000;
	$region_end = $end+4000;
	$offset = ($2+(($3-$2)/2))-($gd_window_size/2);
    }
    else
    {
	$region_start = $end-4000;
	$region_end = $beg+4000;
	$offset = ($3+(($2-$3)/2))-($gd_window_size/2);
	$reverse_flag{$target_genome} = $fid;
    }

    # call genes in region
    my ($target_gene_features, $reg_beg, $reg_end) = $fig->genes_in_region($target_genome, $contig, $region_start, $region_end);
    push(@$all_regions,$target_gene_features);
    my (@start_array_region);
    push (@start_array_region, $offset);

    my %all_genes;
    my %all_genomes; 
    foreach my $feature (@$target_gene_features){ $all_genes{$feature} = $fid;}

    if ($compare_or_coupling eq "diverse")
    {	
	my @coup = grep { $_->[1]} $fig->coupling_and_evidence($fid,5000,1e-10,4,1);
	
	my $coup_count = 0;
	
	foreach my $pair (@{$coup[0]->[2]}) {
	    #	last if ($coup_count > 10);
	    my ($peg1,$peg2) = @$pair;

	    my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome);
	    $pair_genome = $fig->genome_of($peg1);
	    
	    my $location = $fig->feature_location($peg1);
	    if($location =~/(.*)_(\d+)_(\d+)$/){
		$pair_contig = $1;
		$pair_beg = $2;
		$pair_end = $3;
		if ($pair_beg < $pair_end)
		{
		    $pair_region_start = $pair_beg - 4000;
		    $pair_region_stop = $pair_end+4000;
		    $offset = ($2+(($3-$2)/2))-($gd_window_size/2);
		}
		else
		{
		    $pair_region_start = $pair_end-4000;
		    $pair_region_stop = $pair_beg+4000;
		    $offset = ($3+(($2-$3)/2))-($gd_window_size/2);
		    $reverse_flag{$pair_genome} = $peg1;
		}
		
		push (@start_array_region, $offset);
		
		$all_genomes{$pair_genome} = 1;
		my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop);
		push(@$all_regions,$pair_features);
		foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = $peg1;}
	    }
	    $coup_count++;
	}
    }

    elsif ($compare_or_coupling eq "close")
    {
	# make a hash of genomes that are phylogenetically close
	#my $close_threshold = ".26";
	#my @genomes = $fig->genomes('complete');
	#my %close_genomes = ();
	#foreach my $compared_genome (@genomes)
	#{
	#    my $dist = $fig->crude_estimate_of_distance($target_genome,$compared_genome);
	#    #$close_genomes{$compared_genome} = $dist;
	#    if ($dist <= $close_threshold)
	#    {
	#	$all_genomes{$compared_genome} = 1;
	#    }
	#}
	$all_genomes{"216592.1"} = 1;
	$all_genomes{"79967.1"} = 1;
	$all_genomes{"199310.1"} = 1;
	$all_genomes{"216593.1"} = 1;
	$all_genomes{"155864.1"} = 1;
	$all_genomes{"83334.1"} = 1;
	$all_genomes{"316407.3"} = 1;

	foreach my $comp_genome (keys %all_genomes){
	    my $return = $fig->bbh_list($comp_genome,[$fid]);
	    my $feature_list = $return->{$fid};
	    foreach my $peg1 (@$feature_list){
		my $location = $fig->feature_location($peg1);
		my ($pair_contig,$pair_beg,$pair_end,$pair_region_start,$pair_region_stop,$pair_genome);
		$pair_genome = $fig->genome_of($peg1);
		
		if($location =~/(.*)_(\d+)_(\d+)$/){
		    $pair_contig = $1;
		    $pair_beg = $2;
		    $pair_end = $3;
		    if ($pair_beg < $pair_end)
		    {
			$pair_region_start = $pair_beg - 4000;
			$pair_region_stop = $pair_end + 4000;
			$offset = ($2+(($3-$2)/2))-($gd_window_size/2);
		    }
		    else
		    {
			$pair_region_start = $pair_end-4000;
			$pair_region_stop = $pair_beg+4000;
			$offset = ($3+(($2-$3)/2))-($gd_window_size/2);
			$reverse_flag{$pair_genome} = $peg1;
		    }
		    
		    push (@start_array_region, $offset);
		    $all_genomes{$pair_genome} = 1;
		    my ($pair_features) = $fig->genes_in_region($pair_genome, $pair_contig, $pair_region_start, $pair_region_stop);
		    push(@$all_regions,$pair_features);
		    foreach my $pair_feature (@$pair_features){ $all_genes{$pair_feature} = $peg1;}
		}
	    }
	}
    }

    # get the PCH to each of the genes
    my $pch_sets = [];
    my %pch_already;
    foreach my $gene_peg (keys %all_genes)
    {
	if ($pch_already{$gene_peg}){(next);};
	my $gene_set = [$gene_peg];
	foreach my $pch_peg ($fig->in_pch_pin_with($gene_peg)) {
	    $pch_peg =~ s/,.*$//;
	    my $pch_genome = $fig->genome_of($pch_peg);
	    if ( ($gene_peg ne $pch_peg) && ($all_genomes{$pch_genome})) {
		push(@$gene_set,$pch_peg);
		$pch_already{$pch_peg}=1;
	    }
	    $pch_already{$gene_peg}=1;
	}
	push(@$pch_sets,$gene_set);
    }
    
    #create a rank of the pch's
    my %pch_set_rank;
    my $order = 0;
    foreach my $set (@$pch_sets){
	my $count = scalar(@$set);
	$pch_set_rank{$order} = $count;
	$order++;
    }
    
    my %peg_rank;
    my $counter =  1;
    foreach my $pch_order (sort {$pch_set_rank{$b} <=> $pch_set_rank{$a}} keys %pch_set_rank){
	my $good_set = @$pch_sets[$pch_order];
	my $flag_set = 0;
	if (scalar (@$good_set) > 1)
	{
	    foreach my $peg (@$good_set){
		if ((!$peg_rank{$peg})){
		    $peg_rank{$peg} = $counter;
		    $flag_set = 1;
		}
	    }
	    $counter++ if ($flag_set == 1);
	}
	else
	{
	    foreach my $peg (@$good_set){
		$peg_rank{$peg} = "20";
	    }
	}
    }


#    my $bbh_sets = [];
#    my %already;
#    foreach my $gene_key (keys(%all_genes)){
#	if($already{$gene_key}){(next);}
#	my $gene_set = [$gene_key];
#
#	my $gene_key_genome = $fig->genome_of($gene_key);
#
#	foreach my $genome_key (keys(%all_genomes)){
#	    #(next) if ($gene_key_genome eq $genome_key);
#	    my $return = $fig->bbh_list($genome_key,[$gene_key]);
#
#	    my $feature_list = $return->{$gene_key};
#	    foreach my $fl (@$feature_list){
#		push(@$gene_set,$fl);
#	    }
#	}
#	$already{$gene_key} = 1;
#	push(@$bbh_sets,$gene_set);
#    }
#	    
#    my %bbh_set_rank;
#    my $order = 0;
#    foreach my $set (@$bbh_sets){
#	my $count = scalar(@$set);
#	$bbh_set_rank{$order} = $count;
#	$order++;
#    }
#
#    my %peg_rank;
#    my $counter =  1;
#    foreach my $bbh_order (sort {$bbh_set_rank{$b} <=> $bbh_set_rank{$a}} keys %bbh_set_rank){
#	my $good_set = @$bbh_sets[$bbh_order];
#	my $flag_set = 0;
#	if (scalar (@$good_set) > 1)
#	{
#	    foreach my $peg (@$good_set){
#		if ((!$peg_rank{$peg})){
#		    $peg_rank{$peg} = $counter;
#		    $flag_set = 1;
#		}
#	    }
#	    $counter++ if ($flag_set == 1);
#	}
#	else
#	{
#	    foreach my $peg (@$good_set){
#		$peg_rank{$peg} = "20";
#	    }
#	}
#    }
  
    foreach my $region (@$all_regions){
	my $sample_peg = @$region[0];
	my $region_genome = $fig->genome_of($sample_peg);
	my $region_gs = $fig->genus_species($region_genome);
	my $abbrev_name = $fig->abbrev($region_gs);
        my $line_config = { 'title' => $region_gs,
			    'short_title' => $abbrev_name,
			    'basepair_offset' => '0'
			    };
	
	my $offsetting = shift @start_array_region;

	my $second_line_config = { 'title' => "$region_gs",
				   'short_title' => "",
				   'basepair_offset' => '0'
				   };
                
	my $line_data = [];
	my $second_line_data = [];

	# initialize variables to check for overlap in genes
	my ($prev_start, $prev_stop, $prev_fig, $second_line_flag);
	my $major_line_flag = 0;
	my $prev_second_flag = 0;

	foreach my $fid1 (@$region){
	    $second_line_flag = 0;
	    my $element_hash;
	    my $links_list = [];
	    my $descriptions = [];
	    
	    my $color = $peg_rank{$fid1};

	    # get subsystem information
	    my $function = $fig->function_of($fid1);
	    my $url_link = "http://seed-viewer.theseed.org/index.cgi?action=ShowAnnotation&prot=".$fid1;

	    my $link;
            $link = {"link_title" => $fid1,
                     "link" => $url_link};
            push(@$links_list,$link);

	    my @subsystems = $fig->peg_to_subsystems($fid1);
	    foreach my $subsystem (@subsystems){
		my $link;
		$link = {"link" => "http://seed-viewer.theseed.org/index.cgi?action=ShowSubsystem&subsystem_name=$subsystem",
			 "link_title" => $subsystem};
		push(@$links_list,$link);
	    }
	    
            my $description_function;
            $description_function = {"title" => "function",
                                     "value" => $function};
            push(@$descriptions,$description_function);

            my $description_ss;
            my $ss_string = join (",", @subsystems);
            $description_ss = {"title" => "subsystems",
                               "value" => $ss_string};
            push(@$descriptions,$description_ss);


	    my $fid_location = $fig->feature_location($fid1);
	    if($fid_location =~/(.*)_(\d+)_(\d+)$/){
		my($start,$stop);
		$start = $2 - $offsetting;
		$stop = $3 - $offsetting;
		
		if ( (($prev_start) && ($prev_stop) ) &&
		     ( ($start < $prev_start) || ($start < $prev_stop) ||
		       ($stop < $prev_start) || ($stop < $prev_stop) )){
		    if (($second_line_flag == 0) && ($prev_second_flag == 0)) { 
			$second_line_flag = 1; 
			$major_line_flag = 1;
		    }
		}
		$prev_start = $start;
		$prev_stop = $stop;
		$prev_fig = $fid1;
		
		if ((defined($reverse_flag{$region_genome})) && ($reverse_flag{$region_genome} eq $all_genes{$fid1})){
		    $start = $gd_window_size - $start;
		    $stop = $gd_window_size - $stop;
		}
		
		$element_hash = {
		    "title" => $fid1,
		    "start" => $start,
		    "end" =>  $stop,
		    "type"=> 'arrow',
		    "color"=> $color,
		    "zlayer" => "2",
		    "links_list" => $links_list,
		    "description" => $descriptions
		};

		# if there is an overlap, put into second line
		if ($second_line_flag == 1){ push(@$second_line_data,$element_hash); $prev_second_flag = 1;}
		else{ push(@$line_data,$element_hash); $prev_second_flag = 0;}

	    }
	}
	$gd->add_line($line_data, $line_config);
	$gd->add_line($second_line_data, $second_line_config) if ($major_line_flag == 1);
    }
    return $gd;
}
    


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3