[Bio] / Sprout / ERDBTypeText.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBTypeText.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package ERDBTypeText;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use base qw(ERDBType);
26 :    
27 :     =head1 ERDB Text Type Definition
28 :    
29 :     =head2 Introduction
30 :    
31 : parrello 1.4 This object represents the primitive data type for long strings (0 to 16M
32 : parrello 1.1 characters). These are stored with tabs, newlines, and backslashes escaped, and
33 :     unlike normal strings they are large enough that it is impractical to index the
34 :     entire length.
35 :    
36 :     =head3 new
37 :    
38 :     my $et = ERDBTypeText->new();
39 :    
40 :     Construct a new ERDBTypeText descriptor.
41 :    
42 :     =cut
43 :    
44 :     sub new {
45 :     # Get the parameters.
46 :     my ($class) = @_;
47 :     # Create the ERDBTypeText object.
48 :     my $retVal = { };
49 :     # Bless and return it.
50 :     bless $retVal, $class;
51 :     return $retVal;
52 :     }
53 :    
54 :     =head2 Virtual Methods
55 :    
56 :     =head3 averageLength
57 :    
58 :     my $value = $et->averageLength();
59 :    
60 :     Return the average length of a data item of this field type when it is stored in the
61 :     database. This value is used to compute the expected size of a database table.
62 :    
63 :     =cut
64 :    
65 :     sub averageLength {
66 :     return 1000;
67 :     }
68 :    
69 :     =head3 prettySortValue
70 :    
71 :     my $value = $et->prettySortValue();
72 :    
73 :     Number indicating where fields of this type should go in relation to other
74 :     fields. The value should be somewhere between C<1> and C<5>. A value outside
75 :     that range will make terrible things happen.
76 :    
77 :     =cut
78 :    
79 :     sub prettySortValue() {
80 :     return 4;
81 :     }
82 :    
83 :     =head3 validate
84 :    
85 :     my $okFlag = $et->validate($value);
86 :    
87 :     Return an error message if the specified value is invalid for this field type.
88 :    
89 :     The parameters are as follows.
90 :    
91 :     =over 4
92 :    
93 :     =item value
94 :    
95 :     Value of this type, for validation.
96 :    
97 :     =item RETURN
98 :    
99 :     Returns an empty string if the specified field is valid, and an error message
100 :     otherwise.
101 :    
102 :     =back
103 :    
104 :     =cut
105 :    
106 :     sub validate {
107 :     # Get the parameters.
108 :     my ($self, $value) = @_;
109 :     # Assume it's valid until we prove otherwise.
110 :     my $retVal = "";
111 :     # Escape the text.
112 :     my $text = Tracer::Escape($value);
113 :     # Verify the length.
114 : parrello 1.2 if (length $text > 16777216) {
115 : parrello 1.1 $retVal = "Text string too long.";
116 :     }
117 :     # Return the determination.
118 :     return $retVal;
119 :     }
120 :    
121 :     =head3 encode
122 :    
123 :     my $string = $et->encode($value, $mode);
124 :    
125 :     Encode a value of this field type for storage in the database (or in a database load
126 :     file.)
127 :    
128 :     The parameters are as follows.
129 :    
130 :     =over 4
131 :    
132 :     =item value
133 :    
134 :     Value of this type, for encoding.
135 :    
136 :     =item mode
137 :    
138 :     TRUE if the value is being encoding for placement in a load file, FALSE if it
139 :     is being encoded for use as an SQL statement parameter. In most cases, the
140 :     encoding is the same for both modes.
141 :    
142 :     =back
143 :    
144 :     =cut
145 :    
146 : parrello 1.5 use constant ENHASH => { "\x80" => "\\x80", "\x81" => "\\x81", "\x82" => "\\x82", "\x83" => "\\x83", "\x84" => "\\x84", "\x85" => "\\x85", "\x86" => "\\x86", "\x87" => "\\x87", "\x88" => "\\x88", "\x89" => "\\x89", "\x8A" => "\\x8A", "\x8B" => "\\x8B", "\x8C" => "\\x8C", "\x8D" => "\\x8D", "\x8E" => "\\x8E", "\x8F" => "\\x8F",
147 :     "\x90" => "\\x90", "\x91" => "\\x91", "\x92" => "\\x92", "\x93" => "\\x93", "\x94" => "\\x94", "\x95" => "\\x95", "\x96" => "\\x96", "\x97" => "\\x97", "\x98" => "\\x98", "\x99" => "\\x99", "\x9A" => "\\x9A", "\x9B" => "\\x9B", "\x9C" => "\\x9C", "\x9D" => "\\x9D", "\x9E" => "\\x9E", "\x9F" => "\\x9F",
148 :     "\xA0" => "\\xA0", "\xA1" => "\\xA1", "\xA2" => "\\xA2", "\xA3" => "\\xA3", "\xA4" => "\\xA4", "\xA5" => "\\xA5", "\xA6" => "\\xA6", "\xA7" => "\\xA7", "\xA8" => "\\xA8", "\xA9" => "\\xA9", "\xAA" => "\\xAA", "\xAB" => "\\xAB", "\xAC" => "\\xAC", "\xAD" => "\\xAD", "\xAE" => "\\xAE", "\xAF" => "\\xAF",
149 :     "\xB0" => "\\xB0", "\xB1" => "\\xB1", "\xB2" => "\\xB2", "\xB3" => "\\xB3", "\xB4" => "\\xB4", "\xB5" => "\\xB5", "\xB6" => "\\xB6", "\xB7" => "\\xB7", "\xB8" => "\\xB8", "\xB9" => "\\xB9", "\xBA" => "\\xBA", "\xBB" => "\\xBB", "\xBC" => "\\xBC", "\xBD" => "\\xBD", "\xBE" => "\\xBE", "\xBF" => "\\xBF",
150 :     "\xC0" => "\\xC0", "\xC1" => "\\xC1", "\xC2" => "\\xC2", "\xC3" => "\\xC3", "\xC4" => "\\xC4", "\xC5" => "\\xC5", "\xC6" => "\\xC6", "\xC7" => "\\xC7", "\xC8" => "\\xC8", "\xC9" => "\\xC9", "\xCA" => "\\xCA", "\xCB" => "\\xCB", "\xCC" => "\\xCC", "\xCD" => "\\xCD", "\xCE" => "\\xCE", "\xCF" => "\\xCF",
151 :     "\xD0" => "\\xD0", "\xD1" => "\\xD1", "\xD2" => "\\xD2", "\xD3" => "\\xD3", "\xD4" => "\\xD4", "\xD5" => "\\xD5", "\xD6" => "\\xD6", "\xD7" => "\\xD7", "\xD8" => "\\xD8", "\xD9" => "\\xD9", "\xDA" => "\\xDA", "\xDB" => "\\xDB", "\xDC" => "\\xDC", "\xDD" => "\\xDD", "\xDE" => "\\xDE", "\xDF" => "\\xDF",
152 :     "\xE0" => "\\xE0", "\xE1" => "\\xE1", "\xE2" => "\\xE2", "\xE3" => "\\xE3", "\xE4" => "\\xE4", "\xE5" => "\\xE5", "\xE6" => "\\xE6", "\xE7" => "\\xE7", "\xE8" => "\\xE8", "\xE9" => "\\xE9", "\xEA" => "\\xEA", "\xEB" => "\\xEB", "\xEC" => "\\xEC", "\xED" => "\\xED", "\xEE" => "\\xEE", "\xEF" => "\\xEF",
153 :     "\xF0" => "\\xF0", "\xF1" => "\\xF1", "\xF2" => "\\xF2", "\xF3" => "\\xF3", "\xF4" => "\\xF4", "\xF5" => "\\xF5", "\xF6" => "\\xF6", "\xF7" => "\\xF7", "\xF8" => "\\xF8", "\xF9" => "\\xF9", "\xFA" => "\\xFA", "\xFB" => "\\xFB", "\xFC" => "\\xFC", "\xFD" => "\\xFD", "\xFE" => "\\xFE", "\xFF" => "\\xFF",
154 :     "\n" => "\\n", "\\" => "\\\\", "\t" => "\\t" , "\r" => "" };
155 :    
156 : parrello 1.1 sub encode {
157 :     # Get the parameters.
158 :     my ($self, $value, $mode) = @_;
159 :     # Declare the return variable.
160 : parrello 1.5 my $retVal = $value;
161 :     # Process the encoding substitutions.
162 :     $retVal =~ s/([\t\n\r\\\x80-\xFF])/ENHASH->{$1}/ge;
163 : parrello 1.1 # Return the result.
164 :     return $retVal;
165 :     }
166 :    
167 :     =head3 decode
168 :    
169 :     my $value = $et->decode($string);
170 :    
171 :     Decode a string from the database into a value of this field type.
172 :    
173 :     The parameters are as follows.
174 :    
175 :     =over 4
176 :    
177 :     =item string
178 :    
179 :     String from the database to be decoded.
180 :    
181 :     =item RETURN
182 :    
183 :     Returns a value of the desired type.
184 :    
185 :     =back
186 :    
187 :     =cut
188 :    
189 : parrello 1.5 use constant DEHASH => { "x80" => "\x80", "x81" => "\x81", "x82" => "\x82", "x83" => "\x83", "x84" => "\x84", "x85" => "\x85", "x86" => "\x86", "x87" => "\x87", "x88" => "\x88", "x89" => "\x89", "x8A" => "\x8A", "x8B" => "\x8B", "x8C" => "\x8C", "x8D" => "\x8D", "x8E" => "\x8E", "x8F" => "\x8F",
190 :     "x90" => "\x90", "x91" => "\x91", "x92" => "\x92", "x93" => "\x93", "x94" => "\x94", "x95" => "\x95", "x96" => "\x96", "x97" => "\x97", "x98" => "\x98", "x99" => "\x99", "x9A" => "\x9A", "x9B" => "\x9B", "x9C" => "\x9C", "x9D" => "\x9D", "x9E" => "\x9E", "x9F" => "\x9F",
191 :     "xA0" => "\xA0", "xA1" => "\xA1", "xA2" => "\xA2", "xA3" => "\xA3", "xA4" => "\xA4", "xA5" => "\xA5", "xA6" => "\xA6", "xA7" => "\xA7", "xA8" => "\xA8", "xA9" => "\xA9", "xAA" => "\xAA", "xAB" => "\xAB", "xAC" => "\xAC", "xAD" => "\xAD", "xAE" => "\xAE", "xAF" => "\xAF",
192 :     "xB0" => "\xB0", "xB1" => "\xB1", "xB2" => "\xB2", "xB3" => "\xB3", "xB4" => "\xB4", "xB5" => "\xB5", "xB6" => "\xB6", "xB7" => "\xB7", "xB8" => "\xB8", "xB9" => "\xB9", "xBA" => "\xBA", "xBB" => "\xBB", "xBC" => "\xBC", "xBD" => "\xBD", "xBE" => "\xBE", "xBF" => "\xBF",
193 :     "xC0" => "\xC0", "xC1" => "\xC1", "xC2" => "\xC2", "xC3" => "\xC3", "xC4" => "\xC4", "xC5" => "\xC5", "xC6" => "\xC6", "xC7" => "\xC7", "xC8" => "\xC8", "xC9" => "\xC9", "xCA" => "\xCA", "xCB" => "\xCB", "xCC" => "\xCC", "xCD" => "\xCD", "xCE" => "\xCE", "xCF" => "\xCF",
194 :     "xD0" => "\xD0", "xD1" => "\xD1", "xD2" => "\xD2", "xD3" => "\xD3", "xD4" => "\xD4", "xD5" => "\xD5", "xD6" => "\xD6", "xD7" => "\xD7", "xD8" => "\xD8", "xD9" => "\xD9", "xDA" => "\xDA", "xDB" => "\xDB", "xDC" => "\xDC", "xDD" => "\xDD", "xDE" => "\xDE", "xDF" => "\xDF",
195 :     "xE0" => "\xE0", "xE1" => "\xE1", "xE2" => "\xE2", "xE3" => "\xE3", "xE4" => "\xE4", "xE5" => "\xE5", "xE6" => "\xE6", "xE7" => "\xE7", "xE8" => "\xE8", "xE9" => "\xE9", "xEA" => "\xEA", "xEB" => "\xEB", "xEC" => "\xEC", "xED" => "\xED", "xEE" => "\xEE", "xEF" => "\xEF",
196 :     "xF0" => "\xF0", "xF1" => "\xF1", "xF2" => "\xF2", "xF3" => "\xF3", "xF4" => "\xF4", "xF5" => "\xF5", "xF6" => "\xF6", "xF7" => "\xF7", "xF8" => "\xF8", "xF9" => "\xF9", "xFA" => "\xFA", "xFB" => "\xFB", "xFC" => "\xFC", "xFD" => "\xFD", "xFE" => "\xFE", "xFF" => "\xFF",
197 :     "n" => "\n", "\\" => "\\", "t" => "\t" };
198 :    
199 : parrello 1.1 sub decode {
200 :     # Get the parameters.
201 :     my ($self, $string) = @_;
202 :     # Declare the return variable.
203 : parrello 1.5 my $retVal = $string;
204 :     # Perform the decoding substitutions.
205 :     $retVal =~ s/\\(x..|.)/DEHASH->{$1}/ge;
206 : parrello 1.1 # Return the result.
207 :     return $retVal;
208 :     }
209 :    
210 :     =head3 sqlType
211 :    
212 : parrello 1.5 my $typeString = $et->sqlType($dbh);
213 : parrello 1.1
214 :     Return the SQL data type for this field type.
215 :    
216 : parrello 1.5 =over 4
217 :    
218 :     =item dbh
219 :    
220 :     Open L<DBKernel> handle for the database in question. This is used when the
221 :     datatype may be different depending on the DBMS used.
222 :    
223 :     =item RETURN
224 :    
225 :     Returns the datatype string to be used when creating a field of this type in
226 :     an SQL table.
227 :    
228 :     =back
229 :    
230 : parrello 1.1 =cut
231 :    
232 :     sub sqlType {
233 : parrello 1.5 my ($self, $dbh) = @_;
234 :     my $retVal = "TEXT";
235 :     if ($dbh->dbms() eq 'mysql') {
236 :     $retVal = "MEDIUMTEXT";
237 :     }
238 :     return $retVal;
239 : parrello 1.1 }
240 :    
241 :     =head3 indexMod
242 :    
243 :     my $length = $et->indexMod();
244 :    
245 :     Return the index modifier for this field type. The index modifier is the number of
246 :     characters to be indexed. If it is undefined, the field cannot be indexed. If it
247 :     is an empty string, the entire field is indexed. The default is an empty string.
248 :    
249 :     =cut
250 :    
251 :     sub indexMod {
252 :     return 250;
253 :     }
254 :    
255 :     =head3 sortType
256 :    
257 :     my $letter = $et->sortType();
258 :    
259 :     Return the sorting type for this field type. The sorting type is C<n> for integers,
260 :     C<g> for floating-point numbers, and the empty string for character fields.
261 :     The default is the empty string.
262 :    
263 :     =cut
264 :    
265 :     sub sortType {
266 :     return "";
267 :     }
268 :    
269 :     =head3 documentation
270 :    
271 :     my $docText = $et->documentation();
272 :    
273 :     Return the documentation text for this field type. This should be in TWiki markup
274 :     format, though HTML will also work.
275 :    
276 :     =cut
277 :    
278 :     sub documentation() {
279 : parrello 1.2 return 'Long character string, from 0 to approximately 16 million characters, not generally indexable.';
280 : parrello 1.1 }
281 :    
282 :     =head3 name
283 :    
284 :     my $name = $et->name();
285 :    
286 :     Return the name of this type, as it will appear in the XML database definition.
287 :    
288 :     =cut
289 :    
290 :     sub name() {
291 :     return "text";
292 :     }
293 :    
294 :     =head3 default
295 :    
296 :     my $defaultValue = $et->default();
297 :    
298 :     Default value to be used for fields of this type if no default value is
299 : parrello 1.3 specified in the database definition or in an L<ERDBLoadGroup/Put>
300 : parrello 1.1 call during a loader operation. The default is undefined, which means
301 :     an error will be thrown during the load.
302 :    
303 :     =cut
304 :    
305 :     sub default {
306 :     return '';
307 :     }
308 :    
309 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3