1 #ifndef SQL_STRING_INCLUDED
2 #define SQL_STRING_INCLUDED
3
4 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
18
19 /* This file is originally from the mysql distribution. Coded by monty */
20
21 #ifdef USE_PRAGMA_INTERFACE
22 #pragma interface /* gcc class implementation */
23 #endif
24
25 #include "m_ctype.h" /* my_charset_bin */
26 #include "my_sys.h" /* alloc_root, my_free, my_realloc */
27 #include "m_string.h" /* TRASH */
28
29 class String;
30 typedef struct charset_info_st CHARSET_INFO;
31 typedef struct st_io_cache IO_CACHE;
32 typedef struct st_mem_root MEM_ROOT;
33
34 int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
35 String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
36 uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
37 const char *from, uint32 from_length,
38 CHARSET_INFO *from_cs, uint *errors);
39 uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
40 char *to, uint to_length,
41 CHARSET_INFO *from_cs,
42 const char *from, uint from_length,
43 uint nchars,
44 const char **well_formed_error_pos,
45 const char **cannot_convert_error_pos,
46 const char **from_end_pos);
47 size_t my_copy_with_hex_escaping(CHARSET_INFO *cs,
48 char *dst, size_t dstlen,
49 const char *src, size_t srclen);
50 uint convert_to_printable(char *to, size_t to_len,
51 const char *from, size_t from_len,
52 CHARSET_INFO *from_cs, size_t nbytes= 0);
53
54 class String
55 {
56 char *Ptr;
57 uint32 str_length,Alloced_length;
58 bool alloced;
59 CHARSET_INFO *str_charset;
60 public:
String()61 String()
62 {
63 Ptr=0; str_length=Alloced_length=0; alloced=0;
64 str_charset= &my_charset_bin;
65 }
String(uint32 length_arg)66 String(uint32 length_arg)
67 {
68 alloced=0; Alloced_length=0; (void) real_alloc(length_arg);
69 str_charset= &my_charset_bin;
70 }
String(const char * str,CHARSET_INFO * cs)71 String(const char *str, CHARSET_INFO *cs)
72 {
73 Ptr=(char*) str; str_length=(uint) strlen(str); Alloced_length=0; alloced=0;
74 str_charset=cs;
75 }
String(const char * str,uint32 len,CHARSET_INFO * cs)76 String(const char *str,uint32 len, CHARSET_INFO *cs)
77 {
78 Ptr=(char*) str; str_length=len; Alloced_length=0; alloced=0;
79 str_charset=cs;
80 }
String(char * str,uint32 len,CHARSET_INFO * cs)81 String(char *str,uint32 len, CHARSET_INFO *cs)
82 {
83 Ptr=(char*) str; Alloced_length=str_length=len; alloced=0;
84 str_charset=cs;
85 }
String(const String & str)86 String(const String &str)
87 {
88 Ptr=str.Ptr ; str_length=str.str_length ;
89 Alloced_length=str.Alloced_length; alloced=0;
90 str_charset=str.str_charset;
91 }
new(size_t size,MEM_ROOT * mem_root)92 static void *operator new(size_t size, MEM_ROOT *mem_root) throw ()
93 { return (void*) alloc_root(mem_root, (uint) size); }
delete(void * ptr_arg,size_t size)94 static void operator delete(void *ptr_arg, size_t size)
95 {
96 (void) ptr_arg;
97 (void) size;
98 TRASH(ptr_arg, size);
99 }
delete(void *,MEM_ROOT *)100 static void operator delete(void *, MEM_ROOT *)
101 { /* never called */ }
~String()102 ~String() { free(); }
103
set_charset(CHARSET_INFO * charset_arg)104 inline void set_charset(CHARSET_INFO *charset_arg)
105 { str_charset= charset_arg; }
charset()106 inline CHARSET_INFO *charset() const { return str_charset; }
length()107 inline uint32 length() const { return str_length;}
alloced_length()108 inline uint32 alloced_length() const { return Alloced_length;}
109 inline char& operator [] (uint32 i) const { return Ptr[i]; }
length(uint32 len)110 inline void length(uint32 len) { str_length=len ; }
is_empty()111 inline bool is_empty() const { return (str_length == 0); }
mark_as_const()112 inline void mark_as_const() { Alloced_length= 0;}
ptr()113 inline const char *ptr() const { return Ptr; }
c_ptr()114 inline char *c_ptr()
115 {
116 DBUG_ASSERT(!alloced || !Ptr || !Alloced_length ||
117 (Alloced_length >= (str_length + 1)));
118
119 if (!Ptr || Ptr[str_length]) /* Should be safe */
120 (void) realloc(str_length);
121 return Ptr;
122 }
c_ptr_quick()123 inline char *c_ptr_quick()
124 {
125 if (Ptr && str_length < Alloced_length)
126 Ptr[str_length]=0;
127 return Ptr;
128 }
c_ptr_safe()129 inline char *c_ptr_safe()
130 {
131 if (Ptr && str_length < Alloced_length)
132 Ptr[str_length]=0;
133 else
134 (void) realloc(str_length);
135 return Ptr;
136 }
lex_string()137 LEX_STRING lex_string() const
138 {
139 LEX_STRING lex_string = { (char*) ptr(), length() };
140 return lex_string;
141 }
142
set(String & str,uint32 offset,uint32 arg_length)143 void set(String &str,uint32 offset,uint32 arg_length)
144 {
145 DBUG_ASSERT(&str != this);
146 free();
147 Ptr=(char*) str.ptr()+offset; str_length=arg_length; alloced=0;
148 if (str.Alloced_length)
149 Alloced_length=str.Alloced_length-offset;
150 else
151 Alloced_length=0;
152 str_charset=str.str_charset;
153 }
154
155
156 /**
157 Points the internal buffer to the supplied one. The old buffer is freed.
158 @param str Pointer to the new buffer.
159 @param arg_length Length of the new buffer in characters, excluding any
160 null character.
161 @param cs Character set to use for interpreting string data.
162 @note The new buffer will not be null terminated.
163 */
set(char * str,uint32 arg_length,CHARSET_INFO * cs)164 inline void set(char *str,uint32 arg_length, CHARSET_INFO *cs)
165 {
166 free();
167 Ptr=(char*) str; str_length=Alloced_length=arg_length ; alloced=0;
168 str_charset=cs;
169 }
set(const char * str,uint32 arg_length,CHARSET_INFO * cs)170 inline void set(const char *str,uint32 arg_length, CHARSET_INFO *cs)
171 {
172 free();
173 Ptr=(char*) str; str_length=arg_length; Alloced_length=0 ; alloced=0;
174 str_charset=cs;
175 }
176 bool set_ascii(const char *str, uint32 arg_length);
set_quick(char * str,uint32 arg_length,CHARSET_INFO * cs)177 inline void set_quick(char *str,uint32 arg_length, CHARSET_INFO *cs)
178 {
179 if (!alloced)
180 {
181 Ptr=(char*) str; str_length=Alloced_length=arg_length;
182 }
183 str_charset=cs;
184 }
185 bool set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs);
set(longlong num,CHARSET_INFO * cs)186 bool set(longlong num, CHARSET_INFO *cs)
187 { return set_int(num, false, cs); }
set(ulonglong num,CHARSET_INFO * cs)188 bool set(ulonglong num, CHARSET_INFO *cs)
189 { return set_int((longlong)num, true, cs); }
190 bool set_real(double num,uint decimals, CHARSET_INFO *cs);
191
192 /*
193 PMG 2004.11.12
194 This is a method that works the same as perl's "chop". It simply
195 drops the last character of a string. This is useful in the case
196 of the federated storage handler where I'm building a unknown
197 number, list of values and fields to be used in a sql insert
198 statement to be run on the remote server, and have a comma after each.
199 When the list is complete, I "chop" off the trailing comma
200
201 ex.
202 String stringobj;
203 stringobj.append("VALUES ('foo', 'fi', 'fo',");
204 stringobj.chop();
205 stringobj.append(")");
206
207 In this case, the value of string was:
208
209 VALUES ('foo', 'fi', 'fo',
210 VALUES ('foo', 'fi', 'fo'
211 VALUES ('foo', 'fi', 'fo')
212
213 */
chop()214 inline void chop()
215 {
216 Ptr[str_length--]= '\0';
217 }
218
free()219 inline void free()
220 {
221 if (alloced)
222 {
223 alloced=0;
224 Alloced_length=0;
225 my_free(Ptr);
226 Ptr=0;
227 str_length=0; /* Safety */
228 }
229 }
alloc(uint32 arg_length)230 inline bool alloc(uint32 arg_length)
231 {
232 if (arg_length < Alloced_length)
233 return 0;
234 return real_alloc(arg_length);
235 }
236 bool real_alloc(uint32 arg_length); // Empties old string
237 bool realloc(uint32 arg_length);
238
239 // Shrink the buffer, but only if it is allocated on the heap.
shrink(uint32 arg_length)240 inline void shrink(uint32 arg_length)
241 {
242 if (!is_alloced())
243 return;
244 if (arg_length < Alloced_length)
245 {
246 char *new_ptr;
247 if (!(new_ptr=(char*) my_realloc(Ptr,arg_length,MYF(0))))
248 {
249 Alloced_length = 0;
250 real_alloc(arg_length);
251 }
252 else
253 {
254 Ptr=new_ptr;
255 Alloced_length=arg_length;
256 }
257 }
258 }
is_alloced()259 bool is_alloced() const { return alloced; }
260 inline String& operator = (const String &s)
261 {
262 if (&s != this)
263 {
264 /*
265 It is forbidden to do assignments like
266 some_string = substring_of_that_string
267 */
268 DBUG_ASSERT(!s.uses_buffer_owned_by(this));
269 free();
270 Ptr=s.Ptr ; str_length=s.str_length ; Alloced_length=s.Alloced_length;
271 str_charset=s.str_charset;
272 alloced=0;
273 }
274 return *this;
275 }
276
277 bool copy(); // Alloc string if not alloced
278 bool copy(const String &s); // Allocate new string
279 bool copy(const char *s,uint32 arg_length, CHARSET_INFO *cs); // Allocate new string
280 static bool needs_conversion(uint32 arg_length,
281 CHARSET_INFO *cs_from, CHARSET_INFO *cs_to,
282 uint32 *offset);
283 static bool needs_conversion_on_storage(uint32 arg_length,
284 CHARSET_INFO *cs_from,
285 CHARSET_INFO *cs_to);
286 bool copy_aligned(const char *s, uint32 arg_length, uint32 offset,
287 CHARSET_INFO *cs);
288 bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
289 bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
290 CHARSET_INFO *csto, uint *errors);
291 bool append(const String &s);
292 bool append(const char *s);
append(LEX_STRING * ls)293 bool append(LEX_STRING *ls)
294 {
295 return append(ls->str, ls->length);
296 }
297 bool append(const char *s, uint32 arg_length);
298 bool append(const char *s, uint32 arg_length, CHARSET_INFO *cs);
299 bool append_ulonglong(ulonglong val);
300 bool append(IO_CACHE* file, uint32 arg_length);
301 bool append_with_prefill(const char *s, uint32 arg_length,
302 uint32 full_length, char fill_char);
303 int strstr(const String &search,uint32 offset=0); // Returns offset to substring or -1
304 int strrstr(const String &search,uint32 offset=0); // Returns offset to substring or -1
305 bool replace(uint32 offset,uint32 arg_length,const char *to,uint32 length);
306 bool replace(uint32 offset,uint32 arg_length,const String &to);
append(char chr)307 inline bool append(char chr)
308 {
309 if (str_length < Alloced_length)
310 {
311 Ptr[str_length++]=chr;
312 }
313 else
314 {
315 if (realloc(str_length+1))
316 return 1;
317 Ptr[str_length++]=chr;
318 }
319 return 0;
320 }
321 bool fill(uint32 max_length,char fill);
322 void strip_sp();
323 friend int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
324 friend int stringcmp(const String *a,const String *b);
325 friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
326 uint32 numchars();
327 int charpos(int i,uint32 offset=0);
328
reserve(uint32 space_needed)329 int reserve(uint32 space_needed)
330 {
331 return realloc(str_length + space_needed);
332 }
333 int reserve(uint32 space_needed, uint32 grow_by);
334
335 /*
336 The following append operations do NOT check alloced memory
337 q_*** methods writes values of parameters itself
338 qs_*** methods writes string representation of value
339 */
q_append(const char c)340 void q_append(const char c)
341 {
342 Ptr[str_length++] = c;
343 }
q_append(const uint32 n)344 void q_append(const uint32 n)
345 {
346 int4store(Ptr + str_length, n);
347 str_length += 4;
348 }
q_append(double d)349 void q_append(double d)
350 {
351 float8store(Ptr + str_length, d);
352 str_length += 8;
353 }
q_append(double * d)354 void q_append(double *d)
355 {
356 float8store(Ptr + str_length, *d);
357 str_length += 8;
358 }
q_append(const char * data,uint32 data_len)359 void q_append(const char *data, uint32 data_len)
360 {
361 memcpy(Ptr + str_length, data, data_len);
362 str_length += data_len;
363 }
364
write_at_position(int position,uint32 value)365 void write_at_position(int position, uint32 value)
366 {
367 int4store(Ptr + position,value);
368 }
369
370 void qs_append(const char *str, uint32 len);
371 void qs_append(double d);
372 void qs_append(double *d);
qs_append(const char c)373 inline void qs_append(const char c)
374 {
375 Ptr[str_length]= c;
376 str_length++;
377 }
378 void qs_append(int i);
379 void qs_append(uint i);
380
381 /* Inline (general) functions used by the protocol functions */
382
prep_append(uint32 arg_length,uint32 step_alloc)383 inline char *prep_append(uint32 arg_length, uint32 step_alloc)
384 {
385 uint32 new_length= arg_length + str_length;
386 if (new_length > Alloced_length)
387 {
388 if (realloc(new_length + step_alloc))
389 return 0;
390 }
391 uint32 old_length= str_length;
392 str_length+= arg_length;
393 return Ptr+ old_length; /* Area to use */
394 }
395
append(const char * s,uint32 arg_length,uint32 step_alloc)396 inline bool append(const char *s, uint32 arg_length, uint32 step_alloc)
397 {
398 uint32 new_length= arg_length + str_length;
399 if (new_length > Alloced_length && realloc(new_length + step_alloc))
400 return TRUE;
401 memcpy(Ptr+str_length, s, arg_length);
402 str_length+= arg_length;
403 return FALSE;
404 }
405 void print(String *print);
406
407 /* Swap two string objects. Efficient way to exchange data without memcpy. */
408 void swap(String &s);
409
uses_buffer_owned_by(const String * s)410 inline bool uses_buffer_owned_by(const String *s) const
411 {
412 return (s->alloced && Ptr >= s->Ptr && Ptr < s->Ptr + s->str_length);
413 }
is_ascii()414 bool is_ascii() const
415 {
416 if (length() == 0)
417 return TRUE;
418 if (charset()->mbminlen > 1)
419 return FALSE;
420 for (const char *c= ptr(), *end= c + length(); c < end; c++)
421 {
422 if (!my_isascii(*c))
423 return FALSE;
424 }
425 return TRUE;
426 }
427 };
428
check_if_only_end_space(CHARSET_INFO * cs,char * str,char * end)429 static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str,
430 char *end)
431 {
432 return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
433 }
434
435 bool
436 validate_string(CHARSET_INFO *cs, const char *str, uint32 length,
437 size_t *valid_length, bool *length_error);
438 #endif /* SQL_STRING_INCLUDED */
439