[Bio] / FigKernelPackages / gjoseqlib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjoseqlib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.20, Sun Aug 29 22:13:28 2010 UTC revision 1.21, Fri Sep 17 20:04:05 2010 UTC
# Line 173  Line 173 
173  #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_nt_align( $seq1, $seq2 )  #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_nt_align( $seq1, $seq2 )
174  #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_aa_align( $seq1, $seq2 )  #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_aa_align( $seq1, $seq2 )
175  #  #
176    #  @sims = oligomer_similarity( $seq1, $seq2, \%opts )
177    #
178    #===============================================================================
179    
180  use strict;  use strict;
181  use Carp;  use Carp;
# Line 255  Line 258 
258          fraction_nt_diff          fraction_nt_diff
259          interpret_nt_align          interpret_nt_align
260          interpret_aa_align          interpret_aa_align
261            oligomer_similarity
262          );          );
263    
264  our @EXPORT_OK = qw(  our @EXPORT_OK = qw(
# Line 2446  Line 2450 
2450  }  }
2451    
2452    
2453    #-------------------------------------------------------------------------------
2454    #  Return the fraction identity for oligomers over a range of lengths
2455    #
2456    #     @sims = oligomer_similarity( $seq1, $seq2, \%opts )
2457    #
2458    #-------------------------------------------------------------------------------
2459    sub oligomer_similarity
2460    {
2461        my ( $seq1, $seq2, $opts ) = @_;
2462        $seq1 && $seq2 or return ();
2463        $seq1 = $seq1->[2] if ref( $seq1 ) eq 'ARRAY';
2464        $seq2 = $seq2->[2] if ref( $seq2 ) eq 'ARRAY';
2465        $opts && ref( $opts ) eq 'HASH' or ( $opts = {} );
2466    
2467        my $min = $opts->{ min } || $opts->{ max } || 2;
2468        my $max = $opts->{ max } || $min;
2469        return () if $max < $min;
2470    
2471        #  Remove shared gaps
2472    
2473        my $mask1 = gap_mask( $seq1 );
2474        my $mask2 = gap_mask->( $seq2 );
2475        my $mask  = $mask1 | $mask2;
2476        $seq1    &=  $mask;          # Apply mask to sequence
2477        $seq1     =~ tr/\000//d;     # Delete null characters
2478        $seq2    &=  $mask;          # Apply mask to sequence
2479        $seq2     =~ tr/\000//d;     # Delete null characters
2480    
2481        #  Remove terminal gaps
2482    
2483        $mask  = $mask1 & $mask2;
2484        my $n1 = $mask =~ /^(\000+)/ ? length( $1 ) : 0;
2485        my $n2 = $mask =~ /(\000+)$/ ? length( $1 ) : 0;
2486        if ( $n1 || $n2 )
2487        {
2488            my $len = length( $seq1 ) - ( $n1 + $n2 );
2489            $seq1 = substr( $seq1, $n1, $len );
2490            $seq2 = substr( $seq2, $n1, $len );
2491        }
2492    
2493        #  Find the runs of identity
2494    
2495        my $xor = $seq1 ^ $seq2;           # xor of identical residues is null byte
2496    
2497        # Runs with one or more identities
2498    
2499        my %cnt;
2500        foreach ( $xor =~ m/(\000+)/ ) { $cnt{ length }++ }
2501    
2502        map { my $n = $_;
2503              my $ttl = 0;
2504              for ( grep { $_ >= $n } keys %cnt ) { $ttl += $cnt{$_} * ( $_ - ($n-1) ) }
2505              my $nmax = length( $xor ) - ($n-1);
2506              $nmax > 0 ? $ttl / $nmax : undef;
2507            } ( $min .. $max );
2508    }
2509    
2510    
2511  1;  1;

Legend:
Removed from v.1.20  
changed lines
  Added in v.1.21

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3