[Bio] / FigKernelPackages / gjoseqlib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjoseqlib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16, Tue Nov 24 21:22:46 2009 UTC revision 1.17, Mon Jan 18 20:01:14 2010 UTC
# Line 140  Line 140 
140  #  @seq_entries = read_qual(  $filename )  #  @seq_entries = read_qual(  $filename )
141  # \@seq_entries = read_qual(  $filename )  # \@seq_entries = read_qual(  $filename )
142  #  #
143  #  Evaluate nucleotide alignments:  #  Evaluate alignments:
144  #  #
145  #  $fraction_diff = fraction_nt_diff( $seq1, $seq2, \%options )  #  $fraction_diff = fraction_nt_diff( $seq1, $seq2, \%options )
146  #  $fraction_diff = fraction_nt_diff( $seq1, $seq2 )  #  $fraction_diff = fraction_nt_diff( $seq1, $seq2 )
# Line 148  Line 148 
148  #  $fraction_diff = fraction_nt_diff( $seq1, $seq2, $gap_open, $gap_extend )  #  $fraction_diff = fraction_nt_diff( $seq1, $seq2, $gap_open, $gap_extend )
149  #  #
150  #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_nt_align( $seq1, $seq2 )  #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_nt_align( $seq1, $seq2 )
151    #  ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_aa_align( $seq1, $seq2 )
152  #  #
153    
154  use strict;  use strict;
# Line 225  Line 226 
226    
227          fraction_nt_diff          fraction_nt_diff
228          interpret_nt_align          interpret_nt_align
229            interpret_aa_align
230          );          );
231    
232  our @EXPORT_OK = qw(  our @EXPORT_OK = qw(
# Line 785  Line 787 
787  sub print_seq_as_fasta  sub print_seq_as_fasta
788  {  {
789      my $fh = ( ref $_[0] eq 'GLOB' ) ? shift : \*STDOUT;      my $fh = ( ref $_[0] eq 'GLOB' ) ? shift : \*STDOUT;
790        return if ( @_ < 2 ) || ( @_ > 3 ) || ! ( defined $_[0] && defined $_[-1] );
791      #  Print header line      #  Print header line
792      print $fh  ( @_ == 3 && $_[1] =~ /\S/ ) ? ">$_[0] $_[1]\n" : ">$_[0]\n";      print $fh  ( @_ == 3 && defined $_[1] && $_[1] =~ /\S/ ) ? ">$_[0] $_[1]\n" : ">$_[0]\n";
793      #  Print sequence, 60 chars per line      #  Print sequence, 60 chars per line
794      print $fh  join( "\n", $_[-1] =~ m/.{1,60}/g ), "\n";      print $fh  join( "\n", $_[-1] =~ m/.{1,60}/g ), "\n";
795  }  }
# Line 2156  Line 2159 
2159  }  }
2160    
2161    
2162    #-------------------------------------------------------------------------------
2163    #  Interpret an alignment of two protein sequences in terms of: useful
2164    #  aligned positions (unambiguous, and not a common gap), number of identical
2165    #  residues, number of differing residues, number of gaps, and number of gap
2166    #  opennings.
2167    #
2168    #     ( $npos, $nid, $ndif, $ngap, $nopen, $tgap, $topen ) = interpret_aa_align( $seq1, $seq2 )
2169    #
2170    #  $npos  = total aligned positons (= $nid + $ndif + $ngap)
2171    #  $nid   = number of positions with identical amino acids (ignoring case)
2172    #  $ndif  = number of positions with differing amino acids
2173    #  $ngap  = number of positions with gap in one sequence but not the other
2174    #  $nopen = number of runs of gaps
2175    #  $tgap  = number of gaps in runs adjacent to a terminus
2176    #  $topen = number of alignment ends with gaps
2177    #
2178    #-------------------------------------------------------------------------------
2179    sub interpret_aa_align
2180    {
2181        #  Remove alignment columns that are not informative:
2182        my ( $s1, $s2 ) = useful_aa_align( @_[0,1] );
2183        my $nmat = length( $s1 );            # Useful alignment length
2184    
2185        my $m1 = $s1;
2186        $m1 =~ tr/ACDEFGHIKLMNPQRSTUVWY/\377/;  # Amino acids to all 1 bits
2187        $m1 =~ tr/\377/\000/c;               # Others (gaps) to null byte
2188        my $m2 = $s2;
2189        $m2 =~ tr/ACDEFGHIKLMNPQRSTUVWY/\377/;  # Amino acids to all 1 bits
2190        $m2 =~ tr/\377/\000/c;               # Others (gaps) to null byte
2191        $m1 &= $m2;                          # Gap in either sequence becomes null
2192        $s1 &= $m1;                          # Apply mask to sequence 1
2193        $s2 &= $m1;                          # Apply mask to sequence 2
2194        my $nopen = @{[ $s1 =~ /\000+/g ]}   # Gap opens in sequence 1
2195                  + @{[ $s2 =~ /\000+/g ]};  # Gap opens in sequence 2
2196        my ( $tgap, $topen ) = ( 0, 0 );
2197        if ( $s1 =~ /^(\000+)/ || $s2 =~ /^(\000+)/ ) { $tgap += length( $1 ); $topen++ }
2198        if ( $s1 =~ /(\000+)$/ || $s2 =~ /(\000+)$/ ) { $tgap += length( $1 ); $topen++ }
2199        $s1 =~ tr/\000//d;                 # Remove nulls (former gaps)
2200        $s2 =~ tr/\000//d;                 # Remove nulls (former gaps)
2201        my $ngap = $nmat - length( $s1 );  # Total gaps
2202    
2203        my $xor = $s1 ^ $s2;               # xor of identical residues is null byte
2204        my $nid = ( $xor =~ tr/\000//d );  # Count the nulls (identical residues)
2205        my $ndif = $nmat - $nid - $ngap;
2206    
2207        ( $nmat, $nid, $ndif, $ngap, $nopen, $tgap, $topen )
2208    }
2209    
2210    
2211    sub useful_aa_align
2212    {
2213        my ( $s1, $s2 ) = map { uc $_ } @_;
2214        my $m1 = $s1;
2215        $m1 =~ tr/ACDEFGHIKLMNPQRSTUVWY-/\377/;  # Allowed symbols to hex FF byte
2216        $m1 =~ tr/\377/\000/c;  # All else to null byte
2217        my $m2 = $s2;
2218        $m2 =~ tr/ACDEFGHIKLMNPQRSTUVWY-/\377/;  # Allowed symbols to hex FF byte
2219        $m2 =~ tr/\377/\000/c;  # All else to null byte
2220        $m1 &= $m2;             # Invalid in either sequence becomes null
2221        $s1 &= $m1;             # Apply mask to sequence 1
2222        $s1 =~ tr/\000//d;      # Delete nulls in sequence 1
2223        $s2 &= $m1;             # Apply mask to sequence 2
2224        $s2 =~ tr/\000//d;      # Delete nulls in sequence 2
2225        ( $s1, $s2 )
2226    }
2227    
2228    
2229  1;  1;

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3