[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Wed Sep 2 20:28:21 2009 UTC revision 1.9, Wed Sep 30 15:35:00 2009 UTC
# Line 21  Line 21 
21    
22      use strict;      use strict;
23      use Tracer;      use Tracer;
24        use Digest::MD5 qw(md5_base64);
25    
26      use base qw(Exporter);      use base qw(Exporter);
27    
28      our @EXPORT = qw(create_fasta_record rev_comp genome_of min max sims verify_dir);      our @EXPORT = qw(parse_fasta_record create_fasta_record rev_comp genome_of min max sims verify_dir);
29    
30  =head1 SEED Utility Methods  =head1 SEED Utility Methods
31    
# Line 482  Line 484 
484      }      }
485  }  }
486    
487    =head3 parse_fasta_record
488    
489        my ($id, $comment, $seq) = parse_fasta_record($string);
490    
491    Extract the ID, comment, and sequence from a single FASTA record. For
492    backward compatability, instead of a FASTA record the ID and sequence can
493    be specified separated by a comma. In this case, the returned comment
494    will be empty.
495    
496    =over 4
497    
498    =item string
499    
500    A single FASTA record, or an ID and sequence separated by a single comma,
501    an unadorned sequence, a 2-element list consisting of an ID and a sequence,
502    or a 3-element list consisting of an ID, a comment, and a sequence.
503    
504    =item RETURN
505    
506    Returns a three-element list consisting of the incoming ID, the associated
507    comment, and the specified DNA or protein sequence. If the incoming string is
508    invalid, all three list elements will come back undefined. If no ID is
509    specified, an MD5 will be provided.
510    
511    =back
512    
513    =cut
514    
515    sub parse_fasta_record {
516        # Get the parameters.
517        my ($string) = @_;
518        # Declare the return variables.
519        my ($id, $comment, $seq);
520        # Check the type of input string.
521        if (! defined $string) {
522            # Do nothing if no string was passed in. This extra check prevents a
523            # warning at runtime.
524        } elsif ($string =~ /^>(\S+)([\t ]+[^\r\n]*)?[\r\n]+(.+)/s) {
525            # Here we have a standard FASTA string.
526            ($id, $comment, $seq) = ($1, $2, $3);
527            # Remove white space from the sequence string.
528            $seq =~ s/\s+//sg;
529            # Trim front of comment.
530            $comment =~ s/^s+//;
531        } elsif ($string =~ /(.+?)\s*,\s*(.+)/) {
532            ($id, $comment, $seq) = ($1, '', $2);
533        } elsif (ref $string eq 'ARRAY') {
534            # Here the data came in pre-formatted as a list reference.
535            ($id, $comment, $seq) = @$string;
536            # If there's no comment, we need to adjust.
537            if (! defined $seq) {
538                $seq = $comment;
539                $comment = '';
540            }
541        } else {
542            # Here we have only a sequence. We need to construct the ID.
543            $seq = $string;
544            $id = "md5|" . md5_base64($seq);
545            $comment = "";
546        }
547        # Return the results.
548        return ($id, $comment, $seq);
549    }
550    
551    
552    
553    
554  1;  1;

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3