[Bio] / FigKernelPackages / gjoseqlib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjoseqlib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.37, Thu Aug 14 21:22:31 2014 UTC revision 1.38, Fri Aug 29 21:59:46 2014 UTC
# Line 186  Line 186 
186  #  #
187  #  @sims = oligomer_similarity( $seq1, $seq2, \%opts )  #  @sims = oligomer_similarity( $seq1, $seq2, \%opts )
188  #  #
189    #  Guess type of a sequence:
190    #
191    #  $type = guess_seq_type(  $sequence )
192    #  $bool = is_dna(  $sequence )   # not RNA or prot
193    #  $bool = is_rna(  $sequence )   # not DNA or prot
194    #  $bool = is_na(  $sequence )    # nucleic acid, not prot
195    #  $bool = is_prot(  $sequence )  # not DNA or RNA
196    #
197    #  $sequence can be a sequence string, a reference to a sequence string,
198    #      or a [$id, $def, $seq] triple.
199    #  $type will be keyword 'DNA', 'RNA' or 'protein', or undef in case of error.
200    #
201  #  Verify the structure of an [ id, desc, sequence ] triple and  #  Verify the structure of an [ id, desc, sequence ] triple and
202  #  the structure of an array of sequence triples:  #  the structure of an array of sequence triples:
203  #  #
# Line 360  Line 372 
372  #    \@seq_entries = read_fasta( \*FILEHANDLE )  #    \@seq_entries = read_fasta( \*FILEHANDLE )
373  #     @seq_entries = read_fasta(  $filename )  #     @seq_entries = read_fasta(  $filename )
374  #    \@seq_entries = read_fasta(  $filename )  #    \@seq_entries = read_fasta(  $filename )
 #  #  @seq_entries = read_fasta( "command |" )   #  open and read from pipe  
 #  # \@seq_entries = read_fasta( "command |" )   #  open and read from pipe  
375  #     @seq_entries = read_fasta( \$string )      #  reference to file as string  #     @seq_entries = read_fasta( \$string )      #  reference to file as string
376  #    \@seq_entries = read_fasta( \$string )      #  reference to file as string  #    \@seq_entries = read_fasta( \$string )      #  reference to file as string
377  #  #
# Line 394  Line 404 
404          }          }
405      }      }
406    
407      wantarray() ? @seqs : \@seqs;      wantarray ? @seqs : \@seqs;
408  }  }
409    
410  #-----------------------------------------------------------------------------  #-----------------------------------------------------------------------------
# Line 2655  Line 2665 
2665    
2666    
2667  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2668    #  Guess type of a sequence:
2669    #
2670    #     $type = guess_seq_type(  $sequence )
2671    #     $bool = is_dna(  $sequence )   # not RNA or prot
2672    #     $bool = is_rna(  $sequence )   # not DNA or prot
2673    #     $bool = is_na(  $sequence )    # nucleic acid, not prot
2674    #     $bool = is_prot(  $sequence )  # not DNA or RNA
2675    #
2676    #  $sequence can be a string, a reference to a string, or an id_def_seq triple.
2677    #  $type will be a keyword: 'DNA', 'RNA' or 'protein', or '' in case of error.
2678    #-------------------------------------------------------------------------------
2679    sub guess_seq_type
2680    {
2681        local $_ = ( !      $_[0]               ) ?   undef
2682                 : ( ! ref( $_[0] )             ) ?   $_[0]
2683                 : (   ref( $_[0] ) eq 'SCALAR' ) ? ${$_[0]}
2684                 : (   ref( $_[0] ) eq 'ARRAY'  ) ?   $_[0]->[2]
2685                 :                                    undef;
2686    
2687        return '' unless $_;
2688    
2689        my $nt = tr/ACGNTUacgntu//;                                # nucleotides
2690        my $aa = tr/ACDEFGHIKLMNPQRSTVWXYacdefghiklmnpqrstvwxy//;  # amino acids
2691        return '' unless $nt + $aa > 10;
2692    
2693        return  $nt < 0.75 * $aa  ? 'protein'    # amino acids > nucleotides
2694              : tr/EFILPQefilpq// ? ''           # nonnucleotides are very bad
2695              : tr/Uu// > tr/Tt// ? 'RNA'
2696              :                     'DNA';
2697    }
2698    
2699    sub is_dna  { local $_ = guess_seq_type( $_[0] ); /DNA/   }
2700    sub is_rna  { local $_ = guess_seq_type( $_[0] ); /RNA/   }
2701    sub is_na   { local $_ = guess_seq_type( $_[0] ); /^.NA$/ }
2702    sub is_prot { local $_ = guess_seq_type( $_[0] ); /^prot/ }
2703    
2704    
2705    #-------------------------------------------------------------------------------
2706  #  Verify the structure of an [ id, desc, sequence ] triple and  #  Verify the structure of an [ id, desc, sequence ] triple and
2707  #  the structure of an array of sequence triples  #  the structure of an array of sequence triples
2708  #  #
# Line 2664  Line 2712 
2712  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2713  sub is_sequence_triple  sub is_sequence_triple
2714  {  {
2715      local $_ = $_[0];      local $_ = shift;
2716      $_ && ref( $_ ) eq 'ARRAY' && ( @$_ == 3 ) && defined( $_->[0] ) && defined( $_->[2] );      $_ && ( ref($_) eq 'ARRAY' )
2717           && ( @$_ == 3 )
2718           && defined( $_->[0] )
2719           && defined( $_->[2] );
2720  }  }
2721    
2722    
2723  sub is_array_of_sequence_triples  sub is_array_of_sequence_triples
2724  {  {
2725      local $_ = $_[0];      local $_ = $_[0];
2726      $_ && ref( $_ ) eq 'ARRAY' && @$_ == grep { is_sequence_triple( $_ ) } @$_;      $_ && ref( $_ ) eq 'ARRAY' && @$_ == grep { is_sequence_triple( $_ ) } @$_;
2727  }  }
2728    
2729    
2730  1;  1;

Legend:
Removed from v.1.37  
changed lines
  Added in v.1.38

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3