1 #ifndef SQL_STRING_INCLUDED
2 #define SQL_STRING_INCLUDED
3
4 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26 /* This file is originally from the mysql distribution. Coded by monty */
27
28 #include "m_ctype.h" /* my_charset_bin */
29 #include "my_sys.h" /* alloc_root, my_free, my_realloc */
30 #include "m_string.h" /* TRASH */
31
32 #ifdef MYSQL_SERVER
33 extern PSI_memory_key key_memory_String_value;
34 #define STRING_PSI_MEMORY_KEY key_memory_String_value
35 #else
36 #define STRING_PSI_MEMORY_KEY PSI_NOT_INSTRUMENTED
37 #endif
38
39 /**
40 A wrapper class for null-terminated constant strings.
41 Constructors make sure that the position of the '\0' terminating byte
42 in m_str is always in sync with m_length.
43
44 This class must stay as small as possible as we often
45 pass it and its descendants (such as Name_string) into functions
46 using call-by-value evaluation.
47
48 Don't add new members or virual methods into this class!
49 */
50 class Simple_cstring
51 {
52 private:
53 const char *m_str;
54 size_t m_length;
55 protected:
56 /**
57 Initialize from a C string whose length is already known.
58 */
set(const char * str_arg,size_t length_arg)59 void set(const char *str_arg, size_t length_arg)
60 {
61 // NULL is allowed only with length==0
62 assert(str_arg || length_arg == 0);
63 // For non-NULL, make sure length_arg is in sync with '\0' terminator.
64 assert(!str_arg || str_arg[length_arg] == '\0');
65 m_str= str_arg;
66 m_length= length_arg;
67 }
68 public:
Simple_cstring()69 Simple_cstring()
70 {
71 set(NULL, 0);
72 }
Simple_cstring(const char * str_arg,size_t length_arg)73 Simple_cstring(const char *str_arg, size_t length_arg)
74 {
75 set(str_arg, length_arg);
76 }
Simple_cstring(const LEX_STRING arg)77 Simple_cstring(const LEX_STRING arg)
78 {
79 set(arg.str, arg.length);
80 }
reset()81 void reset()
82 {
83 set(NULL, 0);
84 }
85 /**
86 Set to a null-terminated string.
87 */
set(const char * str)88 void set(const char *str)
89 {
90 set(str, str ? strlen(str) : 0);
91 }
92 /**
93 Return string buffer.
94 */
ptr()95 const char *ptr() const { return m_str; }
96 /**
97 Check if m_ptr is set.
98 */
is_set()99 bool is_set() const { return m_str != NULL; }
100 /**
101 Return name length.
102 */
length()103 size_t length() const { return m_length; }
104 /**
105 Compare to another Simple_cstring.
106 */
eq_bin(const Simple_cstring other)107 bool eq_bin(const Simple_cstring other) const
108 {
109 return m_length == other.m_length &&
110 memcmp(m_str, other.m_str, m_length) == 0;
111 }
112 /**
113 Copy to the given buffer
114 */
strcpy(char * buff)115 void strcpy(char *buff) const
116 {
117 memcpy(buff, m_str, m_length);
118 buff[m_length]= '\0';
119 }
120 };
121
122
123 class String;
124 typedef struct charset_info_st CHARSET_INFO;
125 typedef struct st_io_cache IO_CACHE;
126 typedef struct st_mem_root MEM_ROOT;
127
128 int sortcmp(const String *a,const String *b, const CHARSET_INFO *cs);
129 String *copy_if_not_alloced(String *a, String *b, size_t arg_length);
copy_and_convert(char * to,size_t to_length,const CHARSET_INFO * to_cs,const char * from,size_t from_length,const CHARSET_INFO * from_cs,uint * errors)130 inline size_t copy_and_convert(char *to, size_t to_length,
131 const CHARSET_INFO *to_cs,
132 const char *from, size_t from_length,
133 const CHARSET_INFO *from_cs, uint *errors)
134 {
135 return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
136 }
137 size_t well_formed_copy_nchars(const CHARSET_INFO *to_cs,
138 char *to, size_t to_length,
139 const CHARSET_INFO *from_cs,
140 const char *from, size_t from_length,
141 size_t nchars,
142 const char **well_formed_error_pos,
143 const char **cannot_convert_error_pos,
144 const char **from_end_pos);
145 size_t convert_to_printable(char *to, size_t to_len,
146 const char *from, size_t from_len,
147 const CHARSET_INFO *from_cs, size_t nbytes= 0);
148
149 size_t bin_to_hex_str(char *to, size_t to_len, char *from, size_t from_len);
150
151 class String
152 {
153 char *m_ptr;
154 size_t m_length;
155 const CHARSET_INFO *m_charset;
156 uint32 m_alloced_length; // should be size_t, but kept uint32 for size reasons
157 bool m_is_alloced;
158 public:
String()159 String()
160 :m_ptr(NULL), m_length(0), m_charset(&my_charset_bin),
161 m_alloced_length(0), m_is_alloced(false)
162 { }
String(size_t length_arg)163 String(size_t length_arg)
164 :m_ptr(NULL), m_length(0), m_charset(&my_charset_bin),
165 m_alloced_length(0), m_is_alloced(false)
166 {
167 (void) real_alloc(length_arg);
168 }
String(const char * str,const CHARSET_INFO * cs)169 String(const char *str, const CHARSET_INFO *cs)
170 :m_ptr(const_cast<char*>(str)), m_length(strlen(str)),
171 m_charset(cs), m_alloced_length(0), m_is_alloced(false)
172 { }
String(const char * str,size_t len,const CHARSET_INFO * cs)173 String(const char *str, size_t len, const CHARSET_INFO *cs)
174 :m_ptr(const_cast<char*>(str)), m_length(len),
175 m_charset(cs), m_alloced_length(0), m_is_alloced(false)
176 { }
String(char * str,size_t len,const CHARSET_INFO * cs)177 String(char *str, size_t len, const CHARSET_INFO *cs)
178 :m_ptr(str), m_length(len), m_charset(cs),
179 m_alloced_length(static_cast<uint32>(len)), m_is_alloced(false)
180 { }
String(const String & str)181 String(const String &str)
182 :m_ptr(str.m_ptr), m_length(str.m_length), m_charset(str.m_charset),
183 m_alloced_length(static_cast<uint32>(str.m_alloced_length)),
184 m_is_alloced(false)
185 { }
new(size_t size,MEM_ROOT * mem_root)186 static void *operator new(size_t size, MEM_ROOT *mem_root) throw ()
187 { return alloc_root(mem_root, size); }
delete(void * ptr_arg,size_t size)188 static void operator delete(void *ptr_arg, size_t size)
189 {
190 (void) ptr_arg;
191 (void) size;
192 TRASH(ptr_arg, size);
193 }
delete(void *,MEM_ROOT *)194 static void operator delete(void *, MEM_ROOT *)
195 { /* never called */ }
~String()196 ~String() { mem_free(); }
197
set_charset(const CHARSET_INFO * charset_arg)198 void set_charset(const CHARSET_INFO *charset_arg)
199 { m_charset= charset_arg; }
charset()200 const CHARSET_INFO *charset() const { return m_charset; }
length()201 size_t length() const { return m_length;}
alloced_length()202 size_t alloced_length() const { return m_alloced_length;}
203 char& operator [] (size_t i) const { return m_ptr[i]; }
length(size_t len)204 void length(size_t len) { m_length= len; }
is_empty()205 bool is_empty() const { return (m_length == 0); }
mark_as_const()206 void mark_as_const() { m_alloced_length= 0;}
ptr()207 const char *ptr() const { return m_ptr; }
c_ptr()208 char *c_ptr()
209 {
210 assert(!m_is_alloced || !m_ptr || !m_alloced_length ||
211 (m_alloced_length >= (m_length + 1)));
212
213 if (!m_ptr || m_ptr[m_length]) /* Should be safe */
214 (void) mem_realloc(m_length);
215 return m_ptr;
216 }
c_ptr_quick()217 char *c_ptr_quick()
218 {
219 if (m_ptr && m_length < m_alloced_length)
220 m_ptr[m_length]= 0;
221 return m_ptr;
222 }
c_ptr_safe()223 char *c_ptr_safe()
224 {
225 if (m_ptr && m_length < m_alloced_length)
226 m_ptr[m_length]= 0;
227 else
228 (void) mem_realloc(m_length);
229 return m_ptr;
230 }
lex_string()231 LEX_STRING lex_string() const
232 {
233 LEX_STRING lex_string = { (char*) ptr(), length() };
234 return lex_string;
235 }
236
lex_cstring()237 LEX_CSTRING lex_cstring() const
238 {
239 LEX_CSTRING lex_cstring = { ptr(), length() };
240 return lex_cstring;
241 }
242
set(String & str,size_t offset,size_t arg_length)243 void set(String &str,size_t offset, size_t arg_length)
244 {
245 assert(&str != this);
246 mem_free();
247 m_ptr= const_cast<char*>(str.ptr()) + offset;
248 m_length= arg_length;
249 m_is_alloced= false;
250 if (str.m_alloced_length)
251 m_alloced_length= str.m_alloced_length - static_cast<uint32>(offset);
252 else
253 m_alloced_length= 0;
254 m_charset= str.m_charset;
255 }
256
257
258 /**
259 Points the internal buffer to the supplied one. The old buffer is freed.
260 @param str Pointer to the new buffer.
261 @param arg_length Length of the new buffer in characters, excluding any
262 null character.
263 @param cs Character set to use for interpreting string data.
264 @note The new buffer will not be null terminated.
265 */
set(char * str,size_t arg_length,const CHARSET_INFO * cs)266 void set(char *str, size_t arg_length, const CHARSET_INFO *cs)
267 {
268 mem_free();
269 m_ptr= str;
270 m_length= m_alloced_length= static_cast<uint32>(arg_length);
271 m_is_alloced= false;
272 m_charset= cs;
273 }
set(const char * str,size_t arg_length,const CHARSET_INFO * cs)274 void set(const char *str, size_t arg_length, const CHARSET_INFO *cs)
275 {
276 mem_free();
277 m_ptr= const_cast<char*>(str);
278 m_length= arg_length;
279 m_alloced_length= 0;
280 m_is_alloced= false;
281 m_charset= cs;
282 }
283 bool set_ascii(const char *str, size_t arg_length);
set_quick(char * str,size_t arg_length,const CHARSET_INFO * cs)284 void set_quick(char *str, size_t arg_length, const CHARSET_INFO *cs)
285 {
286 if (!m_is_alloced)
287 {
288 m_ptr= str;
289 m_length= arg_length;
290 m_alloced_length= static_cast<uint32>(arg_length);
291 }
292 m_charset= cs;
293 }
294 bool set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs);
set(longlong num,const CHARSET_INFO * cs)295 bool set(longlong num, const CHARSET_INFO *cs)
296 { return set_int(num, false, cs); }
set(ulonglong num,const CHARSET_INFO * cs)297 bool set(ulonglong num, const CHARSET_INFO *cs)
298 { return set_int((longlong)num, true, cs); }
299 bool set_real(double num,uint decimals, const CHARSET_INFO *cs);
300
301 /*
302 PMG 2004.11.12
303 This is a method that works the same as perl's "chop". It simply
304 drops the last character of a string. This is useful in the case
305 of the federated storage handler where I'm building a unknown
306 number, list of values and fields to be used in a sql insert
307 statement to be run on the remote server, and have a comma after each.
308 When the list is complete, I "chop" off the trailing comma
309
310 ex.
311 String stringobj;
312 stringobj.append("VALUES ('foo', 'fi', 'fo',");
313 stringobj.chop();
314 stringobj.append(")");
315
316 In this case, the value of string was:
317
318 VALUES ('foo', 'fi', 'fo',
319 VALUES ('foo', 'fi', 'fo'
320 VALUES ('foo', 'fi', 'fo')
321
322 */
chop()323 void chop()
324 {
325 m_length--;
326 m_ptr[m_length]= '\0';
327 assert(strlen(m_ptr) == m_length);
328 }
329
mem_claim()330 void mem_claim()
331 {
332 if (m_is_alloced)
333 {
334 my_claim(m_ptr);
335 }
336 }
337
mem_free()338 void mem_free()
339 {
340 if (m_is_alloced)
341 {
342 m_is_alloced= false;
343 m_alloced_length= 0;
344 my_free(m_ptr);
345 m_ptr= NULL;
346 m_length= 0; /* Safety */
347 }
348 }
349
alloc(size_t arg_length)350 bool alloc(size_t arg_length)
351 {
352 if (arg_length < m_alloced_length)
353 return false;
354 return real_alloc(arg_length);
355 }
356 bool real_alloc(size_t arg_length); // Empties old string
357 bool mem_realloc(size_t arg_length, bool force_on_heap= false);
358
359 private:
360 size_t next_realloc_exp_size(size_t sz);
361 bool mem_realloc_exp(size_t arg_length);
362
363 public:
364 // Shrink the buffer, but only if it is allocated on the heap.
shrink(size_t arg_length)365 void shrink(size_t arg_length)
366 {
367 if (!is_alloced())
368 return;
369 if (arg_length < m_alloced_length)
370 {
371 char *new_ptr;
372 if (!(new_ptr= static_cast<char*>(my_realloc(STRING_PSI_MEMORY_KEY,
373 m_ptr, arg_length, MYF(0)))))
374 {
375 m_alloced_length= 0;
376 real_alloc(arg_length);
377 }
378 else
379 {
380 m_ptr= new_ptr;
381 m_alloced_length= static_cast<uint32>(arg_length);
382 }
383 }
384 }
is_alloced()385 bool is_alloced() const { return m_is_alloced; }
386 String& operator = (const String &s)
387 {
388 if (&s != this)
389 {
390 /*
391 It is forbidden to do assignments like
392 some_string = substring_of_that_string
393 */
394 assert(!s.uses_buffer_owned_by(this));
395 mem_free();
396 m_ptr= s.m_ptr;
397 m_length= s.m_length;
398 m_alloced_length= s.m_alloced_length;
399 m_charset= s.m_charset;
400 m_is_alloced= false;
401 }
402 return *this;
403 }
404 /**
405 Takeover the buffer owned by another string.
406 "this" becames the owner of the buffer and
407 is further responsible to free it.
408 The string "s" is detouched from the buffer (cleared).
409
410 @param s - a String object to steal buffer from.
411 */
takeover(String & s)412 void takeover(String &s)
413 {
414 assert(this != &s);
415 // Make sure buffers of the two Strings do not overlap
416 assert(!s.uses_buffer_owned_by(this));
417 mem_free();
418 m_ptr= s.m_ptr;
419 m_length= s.m_length;
420 m_alloced_length= s.m_alloced_length;
421 m_is_alloced= s.m_is_alloced;
422 m_charset= s.m_charset;
423 s.m_ptr= NULL;
424 s.m_alloced_length= 0;
425 s.m_length= 0;
426 s.m_is_alloced= false;
427 }
428
429 bool copy(); // Alloc string if not alloced
430 bool copy(const String &s); // Allocate new string
431 // Allocate new string
432 bool copy(const char *s, size_t arg_length, const CHARSET_INFO *cs);
433 static bool needs_conversion(size_t arg_length,
434 const CHARSET_INFO *cs_from, const CHARSET_INFO *cs_to,
435 size_t *offset);
436 static bool needs_conversion_on_storage(size_t arg_length,
437 const CHARSET_INFO *cs_from,
438 const CHARSET_INFO *cs_to);
439 bool copy_aligned(const char *s, size_t arg_length, size_t offset,
440 const CHARSET_INFO *cs);
441 bool set_or_copy_aligned(const char *s, size_t arg_length,
442 const CHARSET_INFO *cs);
443 bool copy(const char*s, size_t arg_length, const CHARSET_INFO *csfrom,
444 const CHARSET_INFO *csto, uint *errors);
445 bool append(const String &s);
446 bool append(const char *s);
append(LEX_STRING * ls)447 bool append(LEX_STRING *ls)
448 {
449 return append(ls->str, ls->length);
450 }
append(Simple_cstring str)451 bool append(Simple_cstring str)
452 {
453 return append(str.ptr(), str.length());
454 }
455 bool append(const char *s, size_t arg_length);
456 bool append(const char *s, size_t arg_length, const CHARSET_INFO *cs);
457 bool append_ulonglong(ulonglong val);
458 bool append_longlong(longlong val);
459 bool append(IO_CACHE* file, size_t arg_length);
460 bool append_with_prefill(const char *s, size_t arg_length,
461 size_t full_length, char fill_char);
462 bool append_parenthesized(long nr, int radix= 10);
463 int strstr(const String &search,size_t offset=0); // Returns offset to substring or -1
464 int strrstr(const String &search,size_t offset=0); // Returns offset to substring or -1
465 /**
466 * Returns substring of given characters lenght, starting at given character offset.
467 * Note that parameter indexes are character indexes and not byte indexes.
468 */
469 String substr(int offset, int count);
470
471 bool replace(size_t offset, size_t arg_length,const char *to, size_t length);
472 bool replace(size_t offset, size_t arg_length,const String &to);
append(char chr)473 bool append(char chr)
474 {
475 if (m_length < m_alloced_length)
476 {
477 m_ptr[m_length++]= chr;
478 }
479 else
480 {
481 if (mem_realloc_exp(m_length + 1))
482 return 1;
483 m_ptr[m_length++]= chr;
484 }
485 return 0;
486 }
487 bool fill(size_t max_length,char fill);
488 void strip_sp();
489 friend int sortcmp(const String *a,const String *b, const CHARSET_INFO *cs);
490 friend int stringcmp(const String *a,const String *b);
491 friend String *copy_if_not_alloced(String *a,String *b, size_t arg_length);
492 size_t numchars() const;
493 size_t charpos(size_t i, size_t offset=0);
494
reserve(size_t space_needed)495 int reserve(size_t space_needed)
496 {
497 return mem_realloc(m_length + space_needed);
498 }
499 int reserve(size_t space_needed, size_t grow_by);
500 /*
501 The following append operations do NOT check alloced memory
502 q_*** methods writes values of parameters itself
503 qs_*** methods writes string representation of value
504 */
q_append(const char c)505 void q_append(const char c)
506 {
507 m_ptr[m_length++] = c;
508 }
q_append(const uint32 n)509 void q_append(const uint32 n)
510 {
511 int4store(m_ptr + m_length, n);
512 m_length += 4;
513 }
q_append(double d)514 void q_append(double d)
515 {
516 float8store(m_ptr + m_length, d);
517 m_length += 8;
518 }
q_append(double * d)519 void q_append(double *d)
520 {
521 float8store(m_ptr + m_length, *d);
522 m_length += 8;
523 }
q_append(const char * data,size_t data_len)524 void q_append(const char *data, size_t data_len)
525 {
526 memcpy(m_ptr + m_length, data, data_len);
527 m_length += data_len;
528 }
529
write_at_position(int position,uint32 value)530 void write_at_position(int position, uint32 value)
531 {
532 int4store(m_ptr + position,value);
533 }
534
535 void qs_append(const char *str, size_t len);
536 void qs_append(double d, size_t len);
qs_append(const char c)537 void qs_append(const char c)
538 {
539 m_ptr[m_length]= c;
540 m_length++;
541 }
542 void qs_append(int i);
543 void qs_append(uint i);
544
545 /* Inline (general) functions used by the protocol functions */
546
prep_append(size_t arg_length,size_t step_alloc)547 char *prep_append(size_t arg_length, size_t step_alloc)
548 {
549 size_t new_length= arg_length + m_length;
550 if (new_length > m_alloced_length)
551 {
552 if (mem_realloc(new_length + step_alloc))
553 return NULL;
554 }
555 size_t old_length= m_length;
556 m_length+= arg_length;
557 return m_ptr+ old_length; /* Area to use */
558 }
559
append(const char * s,size_t arg_length,size_t step_alloc)560 bool append(const char *s, size_t arg_length, size_t step_alloc)
561 {
562 size_t new_length= arg_length + m_length;
563 if (new_length > m_alloced_length && mem_realloc_exp(new_length + step_alloc))
564 return true;
565 memcpy(m_ptr+m_length, s, arg_length);
566 m_length+= arg_length;
567 return false;
568 }
569 void print(String *print);
570
571 /* Swap two string objects. Efficient way to exchange data without memcpy. */
572 void swap(String &s);
573
uses_buffer_owned_by(const String * s)574 bool uses_buffer_owned_by(const String *s) const
575 {
576 return (s->m_is_alloced && m_ptr >= s->m_ptr && m_ptr < s->m_ptr + s->m_length);
577 }
is_ascii()578 bool is_ascii() const
579 {
580 if (length() == 0)
581 return true;
582 if (charset()->mbminlen > 1)
583 return false;
584 for (const char *c= ptr(), *end= c + length(); c < end; c++)
585 {
586 if (!my_isascii(*c))
587 return false;
588 }
589 return true;
590 }
591 /**
592 Make a zero-terminated copy of our value,allocated in the specified MEM_ROOT
593
594 @param root MEM_ROOT to allocate the result
595
596 @return allocated string or NULL
597 */
dup(MEM_ROOT * root)598 char *dup(MEM_ROOT *root) const
599 {
600 if (m_length > 0 && m_ptr[m_length - 1] == 0)
601 return static_cast<char *>(memdup_root(root, m_ptr, m_length));
602
603 char *ret= static_cast<char*>(alloc_root(root, m_length + 1));
604 if (ret != NULL)
605 {
606 memcpy(ret, m_ptr, m_length);
607 ret[m_length]= 0;
608 }
609 return ret;
610 }
611 };
612
613
614 /**
615 String class wrapper with a preallocated buffer of size buff_sz
616
617 This class allows to replace sequences of:
618 char buff[12345];
619 String str(buff, sizeof(buff));
620 str.length(0);
621 with a simple equivalent declaration:
622 StringBuffer<12345> str;
623 */
624
625 template<size_t buff_sz>
626 class StringBuffer : public String
627 {
628 char buff[buff_sz];
629
630 public:
StringBuffer()631 StringBuffer() : String(buff, buff_sz, &my_charset_bin) { length(0); }
StringBuffer(const CHARSET_INFO * cs)632 explicit StringBuffer(const CHARSET_INFO *cs) : String(buff, buff_sz, cs)
633 {
634 length(0);
635 }
StringBuffer(const char * str,size_t length,const CHARSET_INFO * cs)636 StringBuffer(const char *str, size_t length, const CHARSET_INFO *cs)
637 : String(buff, buff_sz, cs)
638 {
639 set(str, length, cs);
640 }
641 };
642
643
check_if_only_end_space(const CHARSET_INFO * cs,char * str,char * end)644 static inline bool check_if_only_end_space(const CHARSET_INFO *cs, char *str,
645 char *end)
646 {
647 return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
648 }
649
650
to_lex_cstring(const LEX_STRING & s)651 inline LEX_CSTRING to_lex_cstring(const LEX_STRING &s)
652 {
653 LEX_CSTRING cstr= { s.str, s.length };
654 return cstr;
655 }
656
657
to_lex_string(const LEX_CSTRING & s)658 inline LEX_STRING to_lex_string(const LEX_CSTRING &s)
659 {
660 LEX_STRING str= { const_cast<char *>(s.str), s.length };
661 return str;
662 }
663
to_lex_cstring(const char * s)664 inline LEX_CSTRING to_lex_cstring(const char *s)
665 {
666 LEX_CSTRING cstr= { s, s != NULL ? strlen(s) : 0 };
667 return cstr;
668 }
669
670 bool
671 validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
672 size_t *valid_length, bool *length_error);
673 #endif /* SQL_STRING_INCLUDED */
674