[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package SeedUtils;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use base qw(Exporter);
25 :    
26 : parrello 1.2 our @EXPORT = qw(create_fasta_record rev_comp);
27 : parrello 1.1
28 :     =head1 SEED Utility Methods
29 :    
30 :     =head2 Introduction
31 :    
32 :     This is a simple utility package that performs functions useful for
33 :     bioinformatics, but that do not require access to the databases.
34 :    
35 :     =head2 Public Methods
36 :    
37 :     =head3 create_fasta_record
38 :    
39 :     my $fastaString = create_fasta_record($id, $comment, $sequence);
40 :    
41 :     Create a FASTA record from the specified DNA or protein sequence. The
42 :     sequence will be split into 60-character lines, and the record will
43 :     include an identifier line and the trailing double-slash.
44 :    
45 :     =over 4
46 :    
47 :     =item id
48 :    
49 :     ID for the sequence, to be placed at the beginning of the identifier
50 :     line.
51 :    
52 :     =item comment (optional)
53 :    
54 :     Comment text to place after the ID on the identifier line. If this parameter
55 :     is empty, undefined, or 0, no comment will be placed.
56 :    
57 :     =item sequence
58 :    
59 :     Sequence of letters to form into FASTA. For purposes of convenience, whitespace
60 :     characters in the sequence will be removed automatically.
61 :    
62 :     =item RETURN
63 :    
64 :     Returns the desired sequence in FASTA format.
65 :    
66 :     =back
67 :    
68 :     =cut
69 :    
70 :     sub create_fasta_record {
71 :     # Get the parameters.
72 :     my ($id, $comment, $sequence) = @_;
73 :     # Start with the ID.
74 :     my $header = ">$id";
75 :     # Add a comment, if any.
76 :     if ($comment) {
77 :     $header .= " $comment";
78 :     }
79 :     # Clean up the sequence.
80 :     $sequence =~ s/\s+//g;
81 :     # We need to format the sequence into 60-byte chunks. We use the infamous
82 :     # grep-split trick. The split, because of the presence of the parentheses,
83 :     # includes the matched delimiters in the output list. The grep strips out
84 :     # the empty list items that appear between the so-called delimiters, since
85 :     # the delimiters are what we want.
86 :     my @chunks = grep { $_ } split /(.{1,60})/, $sequence;
87 :     Trace(scalar(@chunks) . " chunks found in sequence of length " .
88 :     length($sequence) . ".") if T(3);
89 :     # Add the chunks and the trailer.
90 :     my $retVal = join("\n", $header, @chunks, "//\n");
91 :     # Return the result.
92 :     return $retVal;
93 :     }
94 :    
95 : parrello 1.2 =head3 rev_comp
96 :    
97 :     my $revcmp = rev_comp($dna);
98 :    
99 :     or
100 :    
101 :     rev_comp(\$dna);
102 :    
103 :     Return the reverse complement of a DNA string.
104 :    
105 :     =over 4
106 :    
107 :     =item dna
108 :    
109 :     Either a DNA string, or a reference to a DNA string.
110 :    
111 :     =item RETURN
112 :    
113 :     If the input is a DNA string, returns the reverse complement. If the
114 :     input is a reference to a DNA string, the string itself is reverse
115 :     complemented.
116 :    
117 :     =back
118 :    
119 :     =cut
120 :    
121 :     sub rev_comp {
122 :     # Get the parameters.
123 :     my ($dna) = @_;
124 :     # Determine how we were called.
125 :     my ($retVal, $refMode);
126 :     if (ref $dna eq 'SCALAR') {
127 :     $retVal = lc reverse $dna;
128 :     $refMode = 0;
129 :     } else {
130 :     $retVal = lc reverse $$dna;
131 :     $refMode = 1;
132 :     }
133 :     # Now $retVal contains the reversed DNA string in all lower case, and
134 :     # $refMode is TRUE iff the user passed in a reference. The following
135 :     # translation step complements the string.
136 :     $retVal =~ tr/acgtumrwsykbdhv/tgcaakywsrmvhdb/;
137 :     # Return the result in the method corresponding to the way it came in.
138 :     if ($refMode) {
139 :     $$dna = $retVal;
140 :     return;
141 :     } else {
142 :     return $retVal;
143 :     }
144 :     }
145 :    
146 : parrello 1.1
147 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3