# This is a SAS component # # Copyright (c) 2003-2010 University of Chicago and Fellowship # for Interpretations of Genomes. All Rights Reserved. # # This file is part of the SEED Toolkit. # # The SEED Toolkit is free software. You can redistribute # it and/or modify it under the terms of the SEED Toolkit # Public License. # # You should have received a copy of the SEED Toolkit Public License # along with this program; if not write to the University of Chicago # at info@ci.uchicago.edu or the Fellowship for Interpretation of # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # package gjoalign2html; use strict; # Use FIGjs.pm if available: my $have_FIGjs = eval { require FIGjs; 1 }; eval { use Data::Dumper }; # Not in all installations #------------------------------------------------------------------------------- # Prepend and/or append unaligned sequence data to a trimmed alignment: # # \@align = add_alignment_context( \@align, \@seqs, \%options ) # # ( \@align, $pre_len, $ali_len, $suf_len ) # = add_alignment_context( \@align, \@seqs, \%options ) # # Options: # # max_prefix => $limit # limit of residues to be added at beginning # max_suffix => $limit # limit of residues to be added at end # pad_char => $char # character to pad beginning and end (D = ' ') # #------------------------------------------------------------------------------- # Change the pad character at the ends of an alignment: # # \@align = repad_alignment( \@align, \%options ) # \@align = repad_alignment( \%options ) # @align = repad_alignment( \@align, \%options ) # @align = repad_alignment( \%options ) # # Options: # # pad_char => $char # character to pad beginning and end (D = ' ') # old_pad => $regexp # characters to replace at end (D = [^A-Za-z.*]) # #------------------------------------------------------------------------------- # Color an alignment by residue type # # \@align = color_alignment_by_residue( \@align, \%options ) # \@align = color_alignment_by_residue( \%options ) # ( \@align, \@legend ) = color_alignment_by_residue( \@align, \%options ) # ( \@align, \@legend ) = color_alignment_by_residue( \%options ) # # Options: # # align => \@alignment # alignment if not supplied as parameter # alignment => \@alignment # alignment if not supplied as parameter # colors => \%colors # character colors (html spec.) # pallet => $pallet # ale | gde | default # protein => $bool # indicates a protein alignment # #------------------------------------------------------------------------------- # Color an alignment by consensus # # \@align = color_alignment_by_consensus( \@align, \%options ) # \@align = color_alignment_by_consensus( \%options ) # ( \@align, \%legend ) = color_alignment_by_consensus( \@align, \%options ) # ( \@align, \%legend ) = color_alignment_by_consensus( \%options ) # # Options: # # align => \@alignment # Alignment if not supplied as parameter # alignment => \@alignment # Alignment if not supplied as parameter # colors => \%colors # HTML colors for consensus categories # matrix => \%scr_matrix # Hash of hashes of character align scores # max_f_diff => $max_f_diff # Maximum fraction exceptions to consensus # max_n_diff => $max_n_diff # Maximum number of exceptions to consensus # min_score => $score # Score for conservative change (D=1) # protein => $is_protein # Indicates a protein alignment # #------------------------------------------------------------------------------- # Make an html table with an alignment: # # $html = alignment_2_html_table( \@alignment, \%options ) # $html = alignment_2_html_table( \%options ) # ( $html, $javascript ) = alignment_2_html_table( \@alignment, \%options ) # ( $html, $javascript ) = alignment_2_html_table( \%options ) # # In scalar context, the routine returns a single block of html that includes # the JavaScript followed by the table. In list context, they are returned # separately. # # Options: # # align => \@alignment # Alignment, when not a parameter # alignment => \@alignment # Alignment, when not a parameter # key => \@legend # Append the supplied legend # legend => \@legend # Append the supplied legend # nojavascript => $boolean # Omit the JavaScript for pop-ups # tooltip => $boolean # Add pop-up tooltip to sequences # tooltip => \%id2tip # (specify the tip for each id) # # Each sequence can be a string, or an array of [ text, color ] pairs. # @legend is an array of lines of strings and/or [ text, color ] pairs. # # Default tooltip is the id and description, but user can supply a # hash with arrays of alternative mouseover parameters: # # mouseover( @{ $tooltip->{ $id } } ) # mouseover( $ttl, $text, $menu, $parent, $ttl_color, $text_color ) #------------------------------------------------------------------------------- # Make an html page with an alignment: # # $html = alignment_2_html_page( \@alignment, \%options ) # $html = alignment_2_html_page( \%options ) # # Options: # # align => \@alignment # alignment => \@alignment # key => \@legend # legend => \@legend # title => $page_title # # Each sequence can be a string, or an array of character-color pairs. #------------------------------------------------------------------------------- # # 2009/08/25 -- Fix problem with wrap in the alignment. # Change all #abc to #aabbcc format due to a browser issue. # #------------------------------------------------------------------------------- # Some global defaults: my $max_n_diff = 1; # Maximum number of exceptions to consensus my $max_f_diff = 0.10; # Maximum fraction exceptions to consensus my $minblos = 1; # Minimum score to be called a conservative change #------------------------------------------------------------------------------- # Prepend and/or append unaligned sequence data to a trimmed alignment: # # \@align = add_alignment_context( \@align, \@seqs, \%options ) # ( \@align, $pre_len, $ali_len, $suf_len ) = add_alignment_context( \@align, \@seqs, \%options ) # # Options: # # max_prefix => $limit # limit of residues to be added at beginning # max_suffix => $limit # limit of residues to be added at end # pad_char => $char # character to pad beginning and end (D = ' ') # #------------------------------------------------------------------------------- sub add_alignment_context { my ( $align, $seqs, $options ) = @_; $align && ( ref( $align ) eq 'ARRAY' ) && ( @$align > 0 ) or print STDERR "add_alignment_context called without valid alignment\n" and return undef; $seqs && ( ref( $seqs ) eq 'ARRAY' ) && ( @$seqs > 0 ) or print STDERR "add_alignment_context called without valid sequences\n" and return undef; my %index = map { $_->[0], $_ } @$seqs; my %options = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %options ) { $options{ canonical_key( $_ ) } = $options{ $_ } } my $max_prefix = defined( $options{ maxprefix } ) ? $options{ maxprefix } : 1e100; my $max_suffix = defined( $options{ maxsuffix } ) ? $options{ maxsuffix } : 1e100; my $pad_char = $options{ padchar } ? substr( $options{ padchar }, 0, 1 ) : ' '; my $pre_len = 0; my $ali_len = length( $align->[0]->[2] ); my $suf_len = 0; my %fix_data = (); my ( $id, $def, $aln_seq ); my ( $pre0, $npre, $suf0, $nsuf ); my ( $aligned, $full, $pos ); foreach ( @$align ) { ( $id, $def, $aln_seq ) = @$_; if ( $index{$id} ) { $aligned = lc $aln_seq; $aligned =~ tr/a-z//cd; $full = lc $index{$id}->[2]; $pos = index( $full, $aligned ); if ( $pos > -1 ) { $npre = ( $pos <= $max_prefix ) ? $pos : $max_prefix; $pre0 = $pos - $npre; if ( $npre > $pre_len ) { $pre_len = $npre } $suf0 = $pos + length( $aligned ); $nsuf = length( $full ) - $suf0; $nsuf = $max_suffix if $nsuf > $max_suffix; if ( $nsuf > $suf_len ) { $suf_len = $nsuf } } else { $npre = 0; $nsuf = 0; } } $fix_data{ $id } = [ $pre0, $npre, $suf0, $nsuf, $index{$id} ]; } my @align2; my ( @parts, $seq_entry ); foreach ( @$align ) { ( $id, $def, $aln_seq ) = @$_; ( $pre0, $npre, $suf0, $nsuf, $seq_entry ) = @{ $fix_data{ $id } }; @parts = (); push @parts, $pad_char x ( $pre_len - $npre ) if ( $npre < $pre_len ); push @parts, lc substr( $seq_entry->[2], $pre0, $npre ) if $npre; $aln_seq =~ s/^([^A-Za-z.]+)/$pad_char x length($1)/e if ( $pre_len && ! $npre ); $aln_seq =~ s/([^A-Za-z.]+)$/$pad_char x length($1)/e if ( $suf_len && ! $nsuf ); push @parts, uc $aln_seq; push @parts, lc substr( $seq_entry->[2], $suf0, $nsuf ) if $nsuf; push @parts, $pad_char x ( $suf_len - $nsuf ) if ( $nsuf < $suf_len ); push @align2, [ $id, $def, join( '', @parts ) ]; } wantarray ? ( \@align2, $pre_len, $ali_len, $suf_len ) : \@align2; } #------------------------------------------------------------------------------- # Change the pad character at the ends of an alignment: # # \@align = repad_alignment( \@align, \%options ) # \@align = repad_alignment( \%options ) # @align = repad_alignment( \@align, \%options ) # @align = repad_alignment( \%options ) # # Options: # # pad_char => $char # character to pad beginning and end (D = ' ') # old_pad => $regexp # characters to replace at end (D = [^A-Za-z.*]) # #------------------------------------------------------------------------------- sub repad_alignment { my $align; $align = shift if ( ref($_[0]) eq 'ARRAY' ); my %data = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %data ) { $data{ canonical_key( $_ ) } = $data{ $_ } } $align ||= $data{ align } || $data{ alignment }; if ( ! $align || ( ref( $align ) ne 'ARRAY' ) ) { print STDERR "repad_alignment called without alignment\n"; return (); } $data{ padchar } ||= $data{ pad }; # Make this a fallback synonym; my $pad_char = $data{ padchar } ? substr( $data{ padchar }, 0, 1 ) : ' '; $data{ oldpad } ||= $data{ old }; # Make this a fallback synonym; my $old_pad = $data{ oldpad } ? $data{ padchar } : '[^A-Za-z.*]'; my $reg1 = qr/^($old_pad+)/; my $reg2 = qr/($old_pad+)$/; my ( $id, $def, $seq ); my @align2 = (); foreach ( @$align ) { ( $id, $def, $seq ) = @$_; $seq =~ s/$reg1/$pad_char x length($1)/e; $seq =~ s/$reg2/$pad_char x length($1)/e; push @align2, [ $id, $def, $seq ]; } wantarray ? @align2 : \@align2; } #------------------------------------------------------------------------------- # Color an alignment by residue type # # \@align = color_alignment_by_residue( \@align, \%options ) # \@align = color_alignment_by_residue( \%options ) # ( \@align, \@legend ) = color_alignment_by_residue( \@align, \%options ) # ( \@align, \@legend ) = color_alignment_by_residue( \%options ) # # Options: # # align => \@alignment # alignment if not supplied as parameter # alignment => \@alignment # alignment if not supplied as parameter # colors => \%colors # character colors (html spec.) # pallet => $pallet # ale | gde | default # protein => $bool # indicates a protein alignment # #------------------------------------------------------------------------------- sub color_alignment_by_residue { my $align; $align = shift if ( ref($_[0]) eq 'ARRAY' ); my %data = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %data ) { $data{ canonical_key( $_ ) } = $data{ $_ } } $align ||= $data{ align } || $data{ alignment }; if ( ! $align || ( ref( $align ) ne 'ARRAY' ) ) { print STDERR "color_alignment_by_residue called without alignment\n"; return (); } my $colors = $data{ color }; if ( $colors && ( ref( $colors ) eq 'HASH' ) ) { print STDERR "color_alignment_by_residue called without invalid colors hash\n"; return (); } if ( ! $colors ) { my $is_prot = defined( $data{ protein } ) ? $data{ protein } : &guess_prot( $align ); my $pallet = $data{ pallet }; $colors = $is_prot ? aa_colors( $pallet ) : nt_colors( $pallet ); } my ( $id, $def, $seq ); my $pad_char = $data{ padchar } || $data{ pad } || ' '; my $reg1 = qr/^([^A-Za-z.*]+)/; my $reg2 = qr/([^A-Za-z.*]+)$/; my @colored_align = (); foreach ( @$align ) { ( $id, $def, $seq ) = @$_; $seq =~ s/$reg1/$pad_char x length($1)/e; $seq =~ s/$reg2/$pad_char x length($1)/e; push @colored_align, [ $id, $def, scalar color_sequence( $seq, $colors ) ]; } my @legend = (); # Need to create this still if ( wantarray ) { my ( $i, $chr ); my @row = (); foreach ( $i = 32; $i < 127; $i++ ) { $chr = chr( $i ); push @row, [ $chr, $colors->{$chr} || '#ffffff' ]; if ( $i % 32 == 31 ) { push @legend, [ @row ]; @row = () } } push @legend, [ @row ]; } wantarray ? ( \@colored_align, \@legend ) : \@colored_align; } #------------------------------------------------------------------------------- # Convert sequence to list of character-color pairs: # # \@colored_sequence = color_sequence( $sequence, \%colors ) # @colored_sequence = color_sequence( $sequence, \%colors ) #------------------------------------------------------------------------------- sub color_sequence { my ( $seq, $colors ) = @_; my %colors = ref($colors) eq 'HASH' ? %$colors : (); my @colored_seq = map { [ $_, $colors{ $_ } || '#ffffff' ] } split //, $seq; wantarray ? @colored_seq : \@colored_seq; } #------------------------------------------------------------------------------- # Color an alignment by consensus # # \@align = color_alignment_by_consensus( \@align, \%options ) # \@align = color_alignment_by_consensus( \%options ) # ( \@align, \%legend ) = color_alignment_by_consensus( \@align, \%options ) # ( \@align, \%legend ) = color_alignment_by_consensus( \%options ) # # Options: # # align => \@alignment # Alignment if not supplied as parameter # alignment => \@alignment # Alignment if not supplied as parameter # colors => \%colors # HTML colors for consensus categories # matrix => \%scr_matrix # Hash of hashes of character align scores # max_f_diff => $max_f_diff # Maximum fraction exceptions to consensus # max_n_diff => $max_n_diff # Maximum number of exceptions to consensus # min_score => $score # Score for conservative change (D=1) # protein => $is_protein # Indicates a protein alignment # #------------------------------------------------------------------------------- sub color_alignment_by_consensus { my $align; $align = shift if ( ref($_[0]) eq 'ARRAY' ); # Options, with canonical form of keys my %data = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %data ) { $data{ canonical_key( $_ ) } = $data{ $_ } } $align ||= $data{ align } || $data{ alignment }; if ( ! $align || ( ref( $align ) ne 'ARRAY' ) ) { print STDERR "color_alignment_by_consensus called without alignment\n"; return (); } my ( $pallet, $legend ) = consensus_pallet( $data{ color } ); my $conserve_list = conservative_change_list( \%data ); my $conserve_hash = conservative_change_hash( \%data ); my $chars = qr/^[-*A-Za-z]$/; my $s; my $pad_char = $data{ padchar } || $data{ pad } || ' '; my $reg1 = qr/^([^A-Za-z.*]+)/; my $reg2 = qr/([^A-Za-z.*]+)$/; my @seq = map { $s = uc $_->[2]; $s =~ s/$reg1/$pad_char x length($1)/e; $s =~ s/$reg2/$pad_char x length($1)/e; $s } @$align; # Define the consensus type(s) for each site. There are a 3 options: # 1. There is a single consensus nucleotide. # 2. Two residue types are sufficient to describe the position. # 3. A residue and conservative changes are sufficient. my $len = length( $seq[0] ); $max_n_diff = $data{ maxndiff } if defined( $data{ maxndiff } ); $max_f_diff = $data{ maxfdiff } if defined( $data{ maxfdiff } ); my @col_clr; # An array of hashes, one for each column my $cons1 = ' ' x $len; # Primary consensus characters my $cons2 = ' ' x $len; # Secondary consensus characters my ( $i, %cnt, $chr, @c, $n_signif, $min_consen, $c1, $c2, $clr ); for ( $i = 0; $i < $len; $i++) { # Count the number of each residue type in the column %cnt = (); foreach ( @seq ) { $chr = substr($_,$i,1); $cnt{$chr}++ if $chr =~ /$chars/ } $n_signif = sum( map { $cnt{$_} } keys %cnt ); $min_consen = $n_signif - max( $max_n_diff, int( $max_f_diff * $n_signif ) ); ( $c1, $c2, @c ) = consensus_residues( \%cnt, $min_consen, $conserve_hash ); substr( $cons1, $i, 1 ) = $c1 if $c1; substr( $cons2, $i, 1 ) = $c2 if $c2; push @col_clr, consensus_colors( $pallet, $conserve_list, $c1, $c2, @c ); } my @color_align = (); my ( $id, $def, $seq ); foreach ( @$align, [ 'Consen1', 'Primary consensus', $cons1 ], [ 'Consen2', 'Secondary consensus', $cons2 ] ) { ( $id, $def, $seq ) = @$_; $seq =~ s/^([^A-Za-z.]+)/$pad_char x length($1)/e; $seq =~ s/([^A-Za-z.]+)$/$pad_char x length($1)/e; $i = 0; my @clr_seq = map { [ $_, $col_clr[$i++]->{$_} || '#ffffff' ] } split //, $seq; push @color_align, [ $id, $def, \@clr_seq ]; } wantarray ? ( \@color_align, $legend ) : \@color_align; } #------------------------------------------------------------------------------- # Work out the consensus residues at a site: # # ( $consen1, $consen2, @chars ) = consensus_residues( $counts, $min_match, # $conserve_hash ) #------------------------------------------------------------------------------- sub consensus_residues { my ( $cnt_hash, $min_match, $conserve_hash ) = @_; # Sort the residues from most to least frequent, and note first 2: my %cnt = %$cnt_hash; my ( $c1, $c2, @c ); ( $c1, $c2 ) = @c = sort { $cnt{$b} <=> $cnt{$a} } keys %cnt; ( $cnt{$c1} >= 2 ) or return ( '', '' ); # Are there at least $min_match of the most abundant? if ( $cnt{$c1} >= $min_match ) { $c2 = ''; } # Are there at least $min_match of the two most abundant? elsif ( ( $cnt{$c2} >= 2 ) && ( ( $cnt{$c1} + $cnt{$c2} ) >= $min_match ) ) { $c1 = lc $c1; $c2 = lc $c2; } # Can we make a consensus of conservative changes? else { $c2 = ''; my ( $is_conservative, @pos, $total ); my $found = 0; foreach $c1 ( grep { /^[AC-IK-NP-TVWY]$/ } @c ) { ( $is_conservative = $conserve_hash->{ $c1 } ) or next; @pos = grep { $is_conservative->{ $_ } } @c; $total = sum( map { $cnt{ $_ } } @pos ); if ( $total >= $min_match ) { $found = 1; last } } $c1 = $found ? lc $c1 : ''; } return ( $c1, $c2, @c ); } #------------------------------------------------------------------------------- # Work out the residue colors for the consensus at a site: # # \%color = consensus_colors( $pallet, $consevative, $cons1, $cons2, @chars ) #------------------------------------------------------------------------------- sub consensus_colors { my ( $pallet, $conservative, $c1, $c2, @c ) = @_; # print STDERR Dumper( $c1, $c2, \@c ); exit; return {} if ! $c1; my %pallet = ( ref($pallet) eq 'HASH' ) ? %$pallet : @{ scalar consensus_pallet() }; $conservative = {} if ref( $conservative ) ne 'HASH'; # Mark everything but ' ' and . as mismatch, then overwrite exceptions: my %color = map { $_ => $pallet{ mismatch } } grep { ! /^[ .]$/ } @c; if ( $c1 ne '-' ) { $c1 = uc $c1; foreach ( @{ $conservative->{$c1} || [] } ) { $color{ $_ } = $pallet{ positive } } $color{ $c1 } = $pallet{ consen1 }; if ( $c2 ) { $color{ uc $c2 } = ( $c2 ne '-' ) ? $pallet{ consen2 } : $pallet{ consen2g }; } } else { $color{ $c1 } = $pallet{ consen1g }; if ( $c2 ) { $color{ uc $c2 } = $pallet{ consen2 } } } # Copy colors to lowercase letters: foreach ( grep { /^[A-Z]$/ } keys %color ) { $color{ lc $_ } = $color{ $_ } } return \%color; } #------------------------------------------------------------------------------- # Numerical maximum: # # $max = max( $a, $b ) #------------------------------------------------------------------------------- sub max { $_[0] > $_[1] ? $_[0] : $_[1] } #------------------------------------------------------------------------------- # Define the colors used to color by consensus: # # \%color_pallet = consensus_pallet() # \%color_pallet = consensus_pallet( \%user_pallet ) # ( \%color_pallet, \@legend ) = consensus_pallet() # ( \%color_pallet, \@legend ) = consensus_pallet( \%user_pallet ) # # \%color_pallet is key/color pairs, where key is a residue category # \@legend is lines of text/color pairs #------------------------------------------------------------------------------- sub consensus_pallet { # Initialize with a standard set, ensuring that all keys are covered: my %pallet = ( '' => '#ffffff', other => '#ffffff', consen1 => '#bbddff', consen1g => '#ddeeff', positive => '#66ee99', consen2 => '#eeee44', consen2g => '#eeeeaa', mismatch => '#ff99ff' ); # Overwrite defaults with user-supplied colors if ( ref($_[0]) eq 'HASH' ) { my %user_pallet = %{ $_[0] }; foreach ( keys %user_pallet ) { $pallet{ $_ } = $user_pallet{ $_ } } } my @legend; if ( wantarray ) { @legend = ( [ [ 'Consensus 1' => $pallet{ consen1 } ], [ ' (when a gap)' => $pallet{ consen1g } ] ], [ [ 'Conservative difference' => $pallet{ positive } ] ], [ [ 'Consensus 2' => $pallet{ consen2 } ], [ ' (when a gap)' => $pallet{ consen2g } ] ], [ [ 'Nonconservative diff.' => $pallet{ mismatch } ] ], [ [ 'Other character' => $pallet{ '' } ] ], ); } wantarray ? ( \%pallet, \@legend ) : \%pallet; } #------------------------------------------------------------------------------- # Define the list of conserved amino acid replacements for each amino acid: # # \%conserve_change_lists = conservative_change_list( \%options ) # \%conserve_change_lists = conservative_change_list( %options ) # # \@conserve_changes = $conserve_change_lists->{ $aa }; # # Options: # # min_score => $score # Minimum score for conservative designation # matrix => \%score_hash # Score matrix as hash of hashes #------------------------------------------------------------------------------- sub conservative_change_list { my %options = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %options ) { $options{ canonical_key( $_ ) } = $options{ $_ } } my $min_score = defined( $options{ minscore } ) ? $options{ minscore } : 1; my $matrix = ( ref( $options{ matrix } ) eq 'HASH' ) ? $options{ matrix } : blosum62_hash_hash(); my %hash; foreach ( keys %$matrix ) { my $score = $matrix->{ $_ }; $hash{ $_ } = [ grep { $score->{ $_ } >= $min_score } keys %$score ]; } return \%hash; } #------------------------------------------------------------------------------- # Define a hash of conserved amino acid replacements for each amino acid: # # \%conserve_change_hashes = conservative_change_hash( \%options ) # \%conserve_change_hashes = conservative_change_hash( %options ) # # \%conserve_changes = $conserve_change_hashes->{ $aa }; # # Options: # # min_score => $score # Minimum score for conservative designation # matrix => \%score_hash # Score matrix as hash of hashes #------------------------------------------------------------------------------- sub conservative_change_hash { my %options = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %options ) { $options{ canonical_key( $_ ) } = $options{ $_ } } my $min_score = defined( $options{ minscore } ) ? $options{ minscore } : 1; my $matrix = ( ref( $options{ matrix } ) eq 'HASH' ) ? $options{ matrix } : blosum62_hash_hash(); my %hash; foreach ( keys %$matrix ) { my $score = $matrix->{ $_ }; $hash{ $_ } = { map { $_ => 1 } grep { $score->{ $_ } >= $min_score } keys %$score }; } return \%hash; } #------------------------------------------------------------------------------- # Define a hash of hashes with the blosum62 scores for each amino acid: # # \%blosum62 = blosum62_hash_hash() # $score = $blosum62->{$aa1}->{$aa2}; # #------------------------------------------------------------------------------- sub blosum62_hash_hash { my ( $aa_list, $raw_scores ) = raw_blosum62(); my %hash; my @scores = @$raw_scores; foreach ( @$aa_list ) { my @scr = @{ shift @scores }; $hash{ $_ } = { map { $_ => shift @scr } @$aa_list }; } return \%hash; } #------------------------------------------------------------------------------- # Define an ordered list of aminoacids and lists of each of their blosum scores # # ( \@aa_list, \@scores ) = raw_blosum62() # #------------------------------------------------------------------------------- sub raw_blosum62 { return ( [ qw( A R N D C Q E G H I L K M F P S T W Y V B Z X * ) ], [ map { shift @$_; $_ } ( # A R N D C Q E G H I L K M F P S T W Y V B Z X * # [ qw( A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 ) ], [ qw( R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 ) ], [ qw( N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 ) ], [ qw( D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 ) ], [ qw( C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 ) ], [ qw( Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 ) ], [ qw( E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 ) ], [ qw( G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 ) ], [ qw( H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 ) ], [ qw( I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 ) ], [ qw( L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 ) ], [ qw( K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 ) ], [ qw( M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 ) ], [ qw( F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 ) ], [ qw( P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 ) ], [ qw( S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 ) ], [ qw( T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 ) ], [ qw( W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 ) ], [ qw( Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 ) ], [ qw( V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 ) ], [ qw( B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 ) ], [ qw( Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 ) ], [ qw( X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 ) ], [ qw( * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 ) ] ) ] ) } #------------------------------------------------------------------------------- # Make an html table with an alignment: # # $html = alignment_2_html_table( \@alignment, \%options ) # $html = alignment_2_html_table( \%options ) # ( $html, $javascript ) = alignment_2_html_table( \@alignment, \%options ) # ( $html, $javascript ) = alignment_2_html_table( \%options ) # # In scalar context, the routine returns a single block of html that includes # the JavaScript followed by the table. In list context, they are returned # separately. # # Options: # # align => \@alignment # Alignment, when not a parameter # alignment => \@alignment # Alignment, when not a parameter # key => \@legend # Append the supplied legend # legend => \@legend # Append the supplied legend # nojavascript => $boolean # Omit the JavaScript for pop-ups # tooltip => $boolean # Add pop-up tooltip to sequences # tooltip => \%id2tip # (specify the tip for each id) # # Each sequence can be a string, or an array of [ text, color ] pairs. # @legend is an array of lines of strings and/or [ text, color ] pairs. # # Default tooltip is the id and description, but user can supply a # hash with arrays of alternative mouseover parameters: # # mouseover( @{ $tooltip->{ $id } } ) # mouseover( $ttl, $text, $menu, $parent, $ttl_color, $text_color ) #------------------------------------------------------------------------------- sub alignment_2_html_table { my $align; $align = shift if ( ref($_[0]) eq 'ARRAY' ); # Options, with canonical form of keys my %options = ( ref( $_[0] ) eq 'HASH' ) ? %{ $_[0] } : @_; foreach ( keys %options ) { $options{ canonical_key( $_ ) } = $options{ $_ } } $align ||= $options{ align } || $options{ alignment }; if ( ! $align || ( ref( $align ) ne 'ARRAY' ) ) { print STDERR "alignment_2_html_table called without alignment\n"; return ''; } my $tooltip = $options{ tooltip } || $options{ popup } || 0; my $tiplink = ''; my $nojavascript = $options{ nojavascript } || ( $tooltip ? 0 : 1 ); my @html; push @html, "\n"; foreach ( @$align ) { if ( $tooltip ) { # Default tooltip is the id and description, but user can supply a # hash with alternative mouseover parameters: # # mouseover( $ttl, $text, $menu, $parent, $ttl_color, $text_color ) # my @args; if ( ( ref( $tooltip ) eq 'HASH' ) && ( ref( $tooltip->{ $_->[0] } ) eq 'ARRAY' ) ) { @args = @{ $tooltip->{ $_->[0] } } } else { @args = ( $_->[0], ( $_->[1] || ' ' ) ); } $tiplink = ''; } push @html, " \n", " \n", " \n", # Getting rid of padding, so ... " \n", " \n", # Getting rid of padding, so ... " \n", " \n", " \n"; } push @html, "
$_->[0] $_->[1]  ", # Switch from
 to  requires nowrap -- 2009/08/25
                             ( $tooltip ? $tiplink : () ),
                             sequence_2_html( $_->[2] ),
                             ( $tooltip ? '' : () ),
                             "
\n"; my $legend = $options{ key } || $options{ legend }; if ( ref( $legend ) eq 'ARRAY' ) { push @html, "
\n", "\n"; foreach ( @$legend ) { push @html, " \n"; } push @html, "
", sequence_2_html( $_ ), "
\n"; } my $javascript = $nojavascript ? '' : &mouseover_JavaScript(); wantarray && $javascript ? ( join( '', @html ), $javascript ) # ( $html, $script ) : join( '', $javascript, @html ); # $html } #------------------------------------------------------------------------------- # Make html to display a possibly colored sequence: # # $html = sequence_2_html( $string ) # $html = sequence_2_html( \@character_color_pairs ) # # Each sequence can be a string, or an array of character-color pairs. #------------------------------------------------------------------------------- sub sequence_2_html { return $_[0] if ref( $_[0] ) ne 'ARRAY'; my $string = shift; my @html = (); my ( $txt, $clr ); foreach ( @{ merge_common_color( $string ) } ) { $txt = html_esc( $_->[0] ); $txt or next; $txt =~ s/ / /g; # 2009-03-02 -- Change from
 to  wrapper
        $clr = $_->[1];
        push @html, ( $clr ? qq($txt)
                           : $txt
                    )
    }
    join '', @html;
}


#-------------------------------------------------------------------------------
#  Merge adjacent strings with same color to cut amount of html:
#
#     \@character_color_pairs = merge_common_color( \@character_color_pairs )
#
#-------------------------------------------------------------------------------
sub merge_common_color
{
    return $_[0] if ref( $_[0] ) ne 'ARRAY';

    my @string = ();
    my $color  = '';
    my @common_color = ();
    foreach ( @{ $_[0] }, [ '', 0 ] )  # One bogus empty string to flush it
    {
        if ( $_->[1] ne $color )
        {
            push @string, [ join( '', @common_color ), $color ],
            @common_color = ();
            $color = $_->[1]
        }
        push @common_color, $_->[0];
    }
    return \@string;
}


#-------------------------------------------------------------------------------
#  Make an html page with an alignment:
#
#     $html = alignment_2_html_page( \@alignment, \%options )
#     $html = alignment_2_html_page(              \%options )
#
#  Options:
#
#     align     => \@alignment
#     alignment => \@alignment
#     key       => \@legend
#     legend    => \@legend
#     title     =>  $page_title
#
#  Each sequence can be a string, or an array of character-color pairs.
#-------------------------------------------------------------------------------
sub alignment_2_html_page
{
    my $options = ref( $_[0] ) eq 'HASH' ? $_[0] :
                  ref( $_[1] ) eq 'HASH' ? $_[1] : {};

    join '', html_prefix( $options ),
             ( alignment_2_html_table( @_ ) )[1,0],
             html_suffix( $options );
}


#-------------------------------------------------------------------------------
#  $html_page_start = html_prefix()
#-------------------------------------------------------------------------------
sub html_prefix
{
    my $options = ref( $_[0] ) eq 'HASH' ? $_[0] : {};

    my $title = $options->{ title } || 'Alignment';

    return <<"End_of_Prefix";


$title


End_of_Prefix
}


#-------------------------------------------------------------------------------
#  $html_page_end = html_suffix()
#-------------------------------------------------------------------------------
sub html_suffix
{
    return <<"End_of_Suffix";


End_of_Suffix
}


#-------------------------------------------------------------------------------
#  $html_text = html_esc{ $text )
#-------------------------------------------------------------------------------
sub html_esc
{
    my $txt = shift;
    $txt =~ s/\&/&/g;
    $txt =~ s/\/>/g;
    return $txt;
}


sub sum
{
    my $cnt = 0;
    while ( defined( $_[0] ) ) { $cnt += shift }
    $cnt
}


#-------------------------------------------------------------------------------
#  A canonical key is lower case, has no underscores, and no terminal s
#
#     $key = canonical_key( $key )
#-------------------------------------------------------------------------------
sub canonical_key { my $key = lc shift; $key =~ s/_//g; $key =~ s/s$//; $key }


#-------------------------------------------------------------------------------
#  $is_protein_alignment = guess_prot( \@alignment )
#-------------------------------------------------------------------------------
sub guess_prot
{
    my $align = shift;
    my $seq = uc $align->[0]->[-1];               # First sequence
    my $nt  = $seq =~ tr/ACGTU//;                 # Nucleotides
    my $res = $seq =~ tr/ACDEFGHIKLMNPQRSTUVWY//; # Total residues
    return ( $nt > 0.7 * $res ) ? 0 : 1;          # >70% of total?
}



#-------------------------------------------------------------------------------
#  \%character_color_pallet = aa_colors()             #  Default
#  \%character_color_pallet = aa_colors( $set_name )  #  ale
#-------------------------------------------------------------------------------
sub aa_colors
{
    my $pallet = shift || '';
    my %colors;

    if ( $pallet =~ /ale/i )
    {
        %colors = (
            ' ' => '#bebebe',  # Grey
            '~' => '#bebebe',  # Grey
            '-' => '#696969',  # DimGray
            '.' => '#828282',  # Grey51
            '*' => '#ff0000',  # Red

             G  => '#ffffff',  # White

             A  => '#d3d3d3',  # LightGray
             V  => '#d3d3d3',  # LightGray
             L  => '#d3d3d3',  # LightGray
             I  => '#d3d3d3',  # LightGray
             M  => '#d3d3d3',  # LightGray
             C  => '#d3d3d3',  # LightGray
             U  => '#d3d3d3',  # LightGray

             W  => '#ffd700',  # Gold
             F  => '#ffd700',  # Gold
             Y  => '#ffd700',  # Gold

             K  => '#00bfff',  # DeepSkyBlue
             R  => '#00bfff',  # DeepSkyBlue
             H  => '#40e0d0',  # Turquoise

             N  => '#98fb98',  # PaleGreen
             Q  => '#98fb98',  # PaleGreen
             S  => '#98fb98',  # PaleGreen
             T  => '#98fb98',  # PaleGreen
             P  => '#98fb98',  # PaleGreen

             D  => '#fa8072',  # Salmon
             E  => '#fa8072',  # Salmon
             );
    }
    else
    {
        %colors = (
            ' ' => '#ffffff',  # White
            '~' => '#ffffff',  # White
            '.' => '#ffffff',  # White
            '-' => '#888888',  # Gray
            '*' => '#ff0000',  # Red

             G  => '#dd88dd',  # DullMagenta

             A  => '#dddddd',  # LightGray
             V  => '#dddddd',  # LightGray
             L  => '#dddddd',  # LightGray
             I  => '#dddddd',  # LightGray
             M  => '#dddddd',  # LightGray

             C  => '#ffff00',  # Yellow
             U  => '#ffff00',  # Yellow

             W  => '#ddaa22',  # Goldenrod
             F  => '#ddaa22',  # Goldenrod
             Y  => '#ddaa22',  # Goldenrod

             K  => '#00bbff',  # DeepSkyBlue
             R  => '#00bbff',  # DeepSkyBlue
             H  => '#44eedd',  # Turquoise

             N  => '#99ff99',  # PaleGreen
             Q  => '#99ff99',  # PaleGreen
             S  => '#99ff99',  # PaleGreen
             T  => '#99ff99',  # PaleGreen

             P  => '#aaddaa',  # DullGreen

             D  => '#ff8877',  # Salmon
             E  => '#ff8877',  # Salmon
             );
    }

    foreach ( keys %colors ) { $colors{ lc $_ } = $colors{ $_ } }

    return \%colors;
}


#-------------------------------------------------------------------------------
#  \%character_color_pallet = nt_colors()             #  Default
#  \%character_color_pallet = nt_colors( $set_name )  #  ale | gde
#-------------------------------------------------------------------------------
sub nt_colors
{
    my $pallet = shift || '';
    my %colors;

    if ( $pallet =~ /ale/i )
    {
        %colors = (
            ' ' => '#666666',  # DimGray
            '~' => '#666666',  # DimGray
            '-' => '#bbbbbb',  # Gray
            '.' => '#888888',  # Gray51

             A  => '#ffdd00',  # Gold
             C  => '#00ffff',  # Cyan
             G  => '#ffff00',  # Yellow
             T  => '#99ff99',  # PaleGreen
             U  => '#99ff99',  # PaleGreen
             );
    }
    elsif ( $pallet =~ /gde/i )
    {
        %colors = (
            ' ' => '#666666',  # DimGray
            '~' => '#666666',  # DimGray
            '-' => '#bbbbbb',  # Gray
            '.' => '#888888',  # Gray51

             A  => '#ff0000',  # Red
             C  => '#0000ff',  # Blue
             G  => '#ffff88',  # PaleYellow
             T  => '#00ff00',  # Green
             U  => '#00ff00',  # Green
             );
    }
    else
    {
        %colors = (
            ' ' => '#777777',
            '~' => '#777777',
            '-' => '#bbbbbb',
            '.' => '#888888',

             A  => '#ff6666',
             G  => '#ffff00',
             C  => '#00ff00',
             T  => '#8888ff',
             U  => '#8888ff',

             R  => '#ffaa44',
             Y  => '#44dd88',
             K  => '#bbbb99',
             M  => '#eeee66',
             S  => '#aaff55',
             W  => '#cc88cc',

             B  => '#bbdddd',
             H  => '#bbbbdd',
             D  => '#ddbbdd',
             V  => '#ddddaa',

             N  => '#dddddd',
             );
    }

    foreach ( keys %colors ) { $colors{ lc $_ } = $colors{ $_ } }

    return \%colors;
}


#-------------------------------------------------------------------------------
#  Return a string for adding an onMouseover tooltip handler:
#
#     mouseover( $title, $text, $menu, $parent, $titlecolor, $bodycolor)
#
#  The code here is virtually identical to that in FIGjs.pm, but makes this
#  SEED independent.
#-------------------------------------------------------------------------------
sub mouseover
{
    if ( $have_FIGjs ) { return &FIGjs::mouseover( @_ ) }

    my ( $title, $text, $menu, $parent, $titlecolor, $bodycolor ) = @_;

    defined( $title ) or $title = '';
    $title =~ s/'/\\'/g;    # escape '
    $title =~ s/"/"/g; # escape "

    defined( $text ) or $text = '';
    $text =~ s/'/\\'/g;    # escape '
    $text =~ s/"/"/g; # escape "

    defined( $menu ) or $menu = '';
    $menu =~ s/'/\\'/g;    # escape '
    $menu =~ s/"/"/g; # escape "

    $parent     = '' if ! defined $parent;
    $titlecolor = '' if ! defined $titlecolor;
    $bodycolor  = '' if ! defined $bodycolor;

    qq( onMouseover="javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this,'$title','$text','$menu','$parent','$titlecolor','$bodycolor');this.tooltip.addHandler(); return false;" );
}


#-------------------------------------------------------------------------------
#  Return a text string with the necessary JavaScript for the mouseover
#  tooltips.
#
#     $html = mouseover_JavaScript()
#
#  The code here is virtually identical to that in FIGjs.pm, but makes this
#  SEED independent.
#-------------------------------------------------------------------------------
sub mouseover_JavaScript
{
    if ( $have_FIGjs ) { return &FIGjs::toolTipScript( ) }

    return <<'End_of_JavaScript';

End_of_JavaScript
}


1;