1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /* This file is originally from the mysql distribution. Coded by monty */
24
25 #include <my_global.h>
26 #include <my_sys.h>
27 #include <m_string.h>
28 #include <m_ctype.h>
29 #include <mysql_com.h>
30
31 #include "sql_string.h"
32
33 #include <algorithm>
34 #include <limits>
35
36 using std::min;
37 using std::max;
38
39 #ifdef MYSQL_SERVER
40 PSI_memory_key key_memory_String_value;
41 #endif
42
43 /*****************************************************************************
44 ** String functions
45 *****************************************************************************/
46
real_alloc(size_t length)47 bool String::real_alloc(size_t length)
48 {
49 size_t arg_length= ALIGN_SIZE(length + 1);
50 assert(arg_length > length);
51 if (arg_length <= length)
52 return true; /* Overflow */
53 m_length= 0;
54 if (m_alloced_length < arg_length)
55 {
56 mem_free();
57 if (!(m_ptr= static_cast<char*>(my_malloc(STRING_PSI_MEMORY_KEY,
58 arg_length, MYF(MY_WME)))))
59 return true;
60 m_alloced_length= static_cast<uint32>(arg_length);
61 m_is_alloced= true;
62 }
63 m_ptr[0]= 0;
64 return false;
65 }
66
67
68 /**
69 Allocates a new buffer on the heap for this String.
70
71 - If the String's internal buffer is privately owned and heap allocated,
72 one of the following is performed.
73
74 - If the requested length is greater than what fits in the buffer, a new
75 buffer is allocated, data moved and the old buffer freed.
76
77 - If the requested length is less or equal to what fits in the buffer, a
78 null character is inserted at the appropriate position.
79
80 - If the String does not keep a private buffer on the heap:
81
82 - If the requested length is greater than what fits in the buffer, or
83 force_on_heap is true, a new buffer is allocated, data is copied.
84 - If the requested length is less or equal to what fits in the buffer,
85 and force_on_heap is false, a null character is inserted at the
86 appropriate position.
87
88 For C compatibility, the new string buffer is null terminated.
89
90 @param alloc_length The requested string size in characters, excluding any
91 null terminator.
92 @param force_on_heap If the caller wants String's 'str' buffer to be on the
93 heap in all cases.
94
95 @retval false Either the copy operation is complete or, if the size of the
96 new buffer is smaller than the currently allocated buffer (if one exists),
97 no allocation occured.
98
99 @retval true An error occured when attempting to allocate memory or memory
100 allocation length exceeded allowed limit (4GB) for String Class.
101 */
mem_realloc(size_t alloc_length,bool force_on_heap)102 bool String::mem_realloc(size_t alloc_length, bool force_on_heap)
103 {
104 size_t len= ALIGN_SIZE(alloc_length + 1);
105 assert(len > alloc_length);
106 if (len <= alloc_length)
107 return true; /* Overflow */
108
109 if (force_on_heap && !m_is_alloced)
110 {
111 /*
112 Caller wants bytes on the heap, and the currently available bytes are
113 not; they are thus irrelevant:
114 */
115 m_alloced_length= 0;
116 }
117
118 if (m_alloced_length < len) // Available bytes are not enough
119 {
120 // Signal an error if len exceeds uint32 max on 64-bit word platform.
121 #if defined(__WORDSIZE) && (__WORDSIZE == 64)
122 if (len > std::numeric_limits<uint32>::max())
123 return true;
124 #endif
125 char *new_ptr;
126 if (m_is_alloced)
127 {
128 if (!(new_ptr= static_cast<char*>(my_realloc(STRING_PSI_MEMORY_KEY,
129 m_ptr, len, MYF(MY_WME)))))
130 return true; // Signal error
131 }
132 else if ((new_ptr= static_cast<char*>(my_malloc(STRING_PSI_MEMORY_KEY,
133 len, MYF(MY_WME)))))
134 {
135 if (m_length > len - 1)
136 m_length= 0;
137 memcpy(new_ptr, m_ptr, m_length);
138 new_ptr[m_length]= 0;
139 m_is_alloced= true;
140 }
141 else
142 return true; // Signal error
143 m_ptr= new_ptr;
144 m_alloced_length= static_cast<uint32>(len);
145 }
146 m_ptr[alloc_length]= 0; // This make other funcs shorter
147 return false;
148 }
149
150 /*
151 Helper function for @see mem_realloc_exp.
152 */
next_realloc_exp_size(size_t sz)153 inline size_t String::next_realloc_exp_size(size_t sz)
154 {
155 const size_t len= ALIGN_SIZE(sz + 1);
156 const size_t ret=
157 (m_is_alloced && m_alloced_length < len) ? sz + (m_length / 4) : sz;
158 return ret;
159 }
160
161 /**
162 This function is used by the various append() member functions, to ensure
163 that append() has amortized constant cost. Once we have started to allocate
164 buffer on the heap, we increase the buffer size exponentially, rather
165 than linearly.
166
167 @param alloc_length The requested string size in characters, excluding any
168 null terminator.
169
170 @retval false Either the copy operation is complete or, if the size of the
171 new buffer is smaller than the currently allocated buffer (if one exists),
172 no allocation occured.
173
174 @retval true An error occured when attempting to allocate memory.
175
176 @see mem_realloc.
177 */
mem_realloc_exp(size_t alloc_length)178 bool String::mem_realloc_exp(size_t alloc_length)
179 {
180 if (mem_realloc(next_realloc_exp_size(alloc_length)))
181 return true;
182 m_ptr[alloc_length]= '\0';
183 return false;
184 }
185
186
set_int(longlong num,bool unsigned_flag,const CHARSET_INFO * cs)187 bool String::set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs)
188 {
189 uint l= 20 * cs->mbmaxlen + 1;
190 int base= unsigned_flag ? 10 : -10;
191
192 if (alloc(l))
193 return true;
194 m_length=(uint32) (cs->cset->longlong10_to_str)(cs, m_ptr, l, base, num);
195 m_charset= cs;
196 return false;
197 }
198
set_real(double num,uint decimals,const CHARSET_INFO * cs)199 bool String::set_real(double num,uint decimals, const CHARSET_INFO *cs)
200 {
201 char buff[FLOATING_POINT_BUFFER];
202 uint dummy_errors;
203 size_t len;
204
205 m_charset=cs;
206 if (decimals >= NOT_FIXED_DEC)
207 {
208 len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, static_cast<int>(sizeof(buff)) - 1,
209 buff, NULL);
210 return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
211 }
212 len= my_fcvt(num, decimals, buff, NULL);
213 return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
214 }
215
216
copy()217 bool String::copy()
218 {
219 if (!m_is_alloced)
220 {
221 m_alloced_length= 0; // Force realloc
222 return mem_realloc(m_length);
223 }
224 return false;
225 }
226
227 /**
228 Copies the internal buffer from str. If this String has a private heap
229 allocated buffer where new data does not fit, a new buffer is allocated
230 before copying and the old buffer freed. Character set information is also
231 copied.
232
233 If str is the same as this and str doesn't own its buffer, a
234 new buffer is allocated and it's owned by str.
235
236 @param str The string whose internal buffer is to be copied.
237
238 @retval false Success.
239 @retval true Memory allocation failed.
240 */
copy(const String & str)241 bool String::copy(const String &str)
242 {
243 /*
244 If &str == this and it owns the buffer, this operation is a no-op, so skip
245 the meaningless copy. Otherwise if we do, we will read freed memory at
246 the memmove call below.
247 */
248 if (&str == this && str.is_alloced())
249 return false;
250
251 /*
252 If a String s doesn't own its buffer, here we should allocate
253 a new buffer owned by s and copy the contents there. But alloc()
254 will change this->m_ptr and this->m_length, and if this == &str, this
255 will also change str->m_ptr and str->m_length, so we need to save
256 these values first.
257 */
258 const size_t str_length= str.m_length;
259 const char *str_ptr= str.m_ptr;
260 if (alloc(str.m_length))
261 return true;
262 m_length= str_length;
263 memmove(m_ptr, str_ptr, m_length); // May be overlapping
264 m_ptr[m_length]= 0;
265 m_charset= str.m_charset;
266 return false;
267 }
268
copy(const char * str,size_t arg_length,const CHARSET_INFO * cs)269 bool String::copy(const char *str, size_t arg_length, const CHARSET_INFO *cs)
270 {
271 if (alloc(arg_length))
272 return true;
273 if ((m_length= arg_length))
274 memcpy(m_ptr, str, arg_length);
275 m_ptr[arg_length]= 0;
276 m_charset= cs;
277 return false;
278 }
279
280
281 /*
282 Checks that the source string can be just copied to the destination string
283 without conversion.
284
285 SYNPOSIS
286
287 needs_conversion()
288 arg_length Length of string to copy.
289 from_cs Character set to copy from
290 to_cs Character set to copy to
291 uint32 *offset Returns number of unaligned characters.
292
293 RETURN
294 0 No conversion needed
295 1 Either character set conversion or adding leading zeros
296 (e.g. for UCS-2) must be done
297
298 NOTE
299 to_cs may be NULL for "no conversion" if the system variable
300 character_set_results is NULL.
301 */
302
needs_conversion(size_t arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,size_t * offset)303 bool String::needs_conversion(size_t arg_length,
304 const CHARSET_INFO *from_cs,
305 const CHARSET_INFO *to_cs,
306 size_t *offset)
307 {
308 *offset= 0;
309 if (!to_cs ||
310 (to_cs == &my_charset_bin) ||
311 (to_cs == from_cs) ||
312 my_charset_same(from_cs, to_cs) ||
313 ((from_cs == &my_charset_bin) &&
314 (!(*offset=(arg_length % to_cs->mbminlen)))))
315 return false;
316 return true;
317 }
318
319
320 /*
321 Checks that the source string can just be copied to the destination string
322 without conversion.
323 Unlike needs_conversion it will require conversion on incoming binary data
324 to ensure the data are verified for vailidity first.
325
326 @param arg_length Length of string to copy.
327 @param from_cs Character set to copy from
328 @param to_cs Character set to copy to
329
330 @return conversion needed
331 */
needs_conversion_on_storage(size_t arg_length,const CHARSET_INFO * cs_from,const CHARSET_INFO * cs_to)332 bool String::needs_conversion_on_storage(size_t arg_length,
333 const CHARSET_INFO *cs_from,
334 const CHARSET_INFO *cs_to)
335 {
336 size_t offset;
337 return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
338 /* force conversion when storing a binary string */
339 (cs_from == &my_charset_bin &&
340 /* into a non-binary destination */
341 cs_to != &my_charset_bin &&
342 /* and any of the following is true :*/
343 (
344 /* it's a variable length encoding */
345 cs_to->mbminlen != cs_to->mbmaxlen ||
346 /* longer than 2 bytes : neither 1 byte nor ucs2 */
347 cs_to->mbminlen > 2 ||
348 /* and is not a multiple of the char byte size */
349 0 != (arg_length % cs_to->mbmaxlen)
350 )
351 )
352 );
353 }
354
355
356 /*
357 Copy a multi-byte character sets with adding leading zeros.
358
359 SYNOPSIS
360
361 copy_aligned()
362 str String to copy
363 arg_length Length of string. This should NOT be dividable with
364 cs->mbminlen.
365 offset arg_length % cs->mb_minlength
366 cs Character set for 'str'
367
368 NOTES
369 For real multi-byte, ascii incompatible charactser sets,
370 like UCS-2, add leading zeros if we have an incomplete character.
371 Thus,
372 SELECT _ucs2 0xAA
373 will automatically be converted into
374 SELECT _ucs2 0x00AA
375
376 RETURN
377 0 ok
378 1 error
379 */
380
copy_aligned(const char * str,size_t arg_length,size_t offset,const CHARSET_INFO * cs)381 bool String::copy_aligned(const char *str, size_t arg_length, size_t offset,
382 const CHARSET_INFO *cs)
383 {
384 /* How many bytes are in incomplete character */
385 offset= cs->mbminlen - offset; /* How many zeros we should prepend */
386 assert(offset && offset != cs->mbminlen);
387
388 size_t aligned_length= arg_length + offset;
389 if (alloc(aligned_length))
390 return true;
391
392 /*
393 Note, this is only safe for big-endian UCS-2.
394 If we add little-endian UCS-2 sometimes, this code
395 will be more complicated. But it's OK for now.
396 */
397 memset(m_ptr, 0, offset);
398 memcpy(m_ptr + offset, str, arg_length);
399 m_ptr[aligned_length]= 0;
400 /* m_length is always >= 0 as arg_length is != 0 */
401 m_length= aligned_length;
402 m_charset= cs;
403 return false;
404 }
405
406
set_or_copy_aligned(const char * str,size_t arg_length,const CHARSET_INFO * cs)407 bool String::set_or_copy_aligned(const char *str, size_t arg_length,
408 const CHARSET_INFO *cs)
409 {
410 /* How many bytes are in incomplete character */
411 size_t offset= (arg_length % cs->mbminlen);
412
413 if (!offset) /* All characters are complete, just copy */
414 {
415 set(str, arg_length, cs);
416 return false;
417 }
418 return copy_aligned(str, arg_length, offset, cs);
419 }
420
421
422 /**
423 Copies the character data into this String, with optional character set
424 conversion.
425
426 @return
427 false ok
428 true Could not allocate result buffer
429
430 */
431
copy(const char * str,size_t arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,uint * errors)432 bool String::copy(const char *str, size_t arg_length,
433 const CHARSET_INFO *from_cs, const CHARSET_INFO *to_cs, uint *errors)
434 {
435 size_t offset;
436
437 assert(!str || str != m_ptr);
438
439 if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
440 {
441 *errors= 0;
442 return copy(str, arg_length, to_cs);
443 }
444 if ((from_cs == &my_charset_bin) && offset)
445 {
446 *errors= 0;
447 return copy_aligned(str, arg_length, offset, to_cs);
448 }
449 size_t new_length= to_cs->mbmaxlen*arg_length;
450 if (alloc(new_length))
451 return true;
452 m_length= copy_and_convert(m_ptr, new_length, to_cs,
453 str, arg_length, from_cs, errors);
454 m_charset= to_cs;
455 return false;
456 }
457
458
459 /*
460 Set a string to the value of a latin1-string, keeping the original charset
461
462 SYNOPSIS
463 copy_or_set()
464 str String of a simple charset (latin1)
465 arg_length Length of string
466
467 IMPLEMENTATION
468 If string object is of a simple character set, set it to point to the
469 given string.
470 If not, make a copy and convert it to the new character set.
471
472 RETURN
473 0 ok
474 1 Could not allocate result buffer
475
476 */
477
set_ascii(const char * str,size_t arg_length)478 bool String::set_ascii(const char *str, size_t arg_length)
479 {
480 if (m_charset->mbminlen == 1)
481 {
482 set(str, arg_length, m_charset);
483 return 0;
484 }
485 uint dummy_errors;
486 return copy(str, arg_length, &my_charset_latin1, m_charset, &dummy_errors);
487 }
488
489
490 /* This is used by mysql.cc */
491
fill(size_t max_length,char fill_char)492 bool String::fill(size_t max_length,char fill_char)
493 {
494 if (m_length > max_length)
495 m_ptr[m_length= max_length]= 0;
496 else
497 {
498 if (mem_realloc(max_length))
499 return true;
500 memset(m_ptr + m_length, fill_char, max_length - m_length);
501 m_length= max_length;
502 }
503 return false;
504 }
505
strip_sp()506 void String::strip_sp()
507 {
508 while (m_length && my_isspace(m_charset, m_ptr[m_length - 1]))
509 m_length--;
510 }
511
append(const String & s)512 bool String::append(const String &s)
513 {
514 if (s.length())
515 {
516 assert(!this->uses_buffer_owned_by(&s));
517 assert(!s.uses_buffer_owned_by(this));
518
519 if (mem_realloc_exp((m_length + s.length())))
520 return true;
521 memcpy(m_ptr + m_length,s.ptr(), s.length());
522 m_length+=s.length();
523 }
524 return false;
525 }
526
527
528 /*
529 Append an ASCII string to the a string of the current character set
530 */
531
append(const char * s,size_t arg_length)532 bool String::append(const char *s, size_t arg_length)
533 {
534 if (!arg_length)
535 return false;
536
537 /*
538 For an ASCII incompatible string, e.g. UCS-2, we need to convert
539 */
540 if (m_charset->mbminlen > 1)
541 {
542 size_t add_length= arg_length * m_charset->mbmaxlen;
543 uint dummy_errors;
544 if (mem_realloc(m_length + add_length))
545 return true;
546 m_length+= copy_and_convert(m_ptr + m_length, add_length, m_charset,
547 s, arg_length, &my_charset_latin1,
548 &dummy_errors);
549 return false;
550 }
551
552 /*
553 For an ASCII compatinble string we can just append.
554 */
555 if (mem_realloc_exp(m_length + arg_length))
556 return true;
557 memcpy(m_ptr + m_length, s, arg_length);
558 m_length+= arg_length;
559 return false;
560 }
561
562
563 /*
564 Append a 0-terminated ASCII string
565 */
566
append(const char * s)567 bool String::append(const char *s)
568 {
569 return append(s, (uint) strlen(s));
570 }
571
572
573 /**
574 Append an unsigned longlong to the string.
575 */
append_ulonglong(ulonglong val)576 bool String::append_ulonglong(ulonglong val)
577 {
578 if (mem_realloc_exp(m_length + MAX_BIGINT_WIDTH + 2))
579 return true;
580 char *end= longlong10_to_str(val, m_ptr + m_length, 10);
581 m_length= end - m_ptr;
582 return false;
583 }
584
585
586 /**
587 Append a signed longlong to the string.
588 */
append_longlong(longlong val)589 bool String::append_longlong(longlong val)
590 {
591 if (mem_realloc_exp(m_length + MAX_BIGINT_WIDTH + 2))
592 return true; /* purecov: inspected */
593 char *end= longlong10_to_str(val, m_ptr + m_length, -10);
594 m_length= end - m_ptr;
595 return false;
596 }
597
598
599 /*
600 Append a string in the given charset to the string
601 with character set recoding
602 */
603
append(const char * s,size_t arg_length,const CHARSET_INFO * cs)604 bool String::append(const char *s, size_t arg_length, const CHARSET_INFO *cs)
605 {
606 size_t offset;
607
608 if (needs_conversion(arg_length, cs, m_charset, &offset))
609 {
610 size_t add_length;
611 if ((cs == &my_charset_bin) && offset)
612 {
613 assert(m_charset->mbminlen > offset);
614 offset= m_charset->mbminlen - offset; // How many characters to pad
615 add_length= arg_length + offset;
616 if (mem_realloc_exp(m_length + add_length))
617 return true;
618 memset(m_ptr + m_length, 0, offset);
619 memcpy(m_ptr + m_length + offset, s, arg_length);
620 m_length+= add_length;
621 return false;
622 }
623
624 add_length= arg_length / cs->mbminlen * m_charset->mbmaxlen;
625 uint dummy_errors;
626 if (mem_realloc_exp(m_length + add_length))
627 return true;
628 m_length+= copy_and_convert(m_ptr + m_length, add_length, m_charset,
629 s, arg_length, cs, &dummy_errors);
630 }
631 else
632 {
633 if (mem_realloc_exp(m_length + arg_length))
634 return true;
635 memcpy(m_ptr + m_length, s, arg_length);
636 m_length+= arg_length;
637 }
638 return false;
639 }
640
append(IO_CACHE * file,size_t arg_length)641 bool String::append(IO_CACHE* file, size_t arg_length)
642 {
643 if (mem_realloc(m_length + arg_length))
644 return true;
645 if (my_b_read(file, reinterpret_cast<uchar*>(m_ptr) + m_length, arg_length))
646 {
647 shrink(m_length);
648 return true;
649 }
650 m_length+= arg_length;
651 return false;
652 }
653
654
655 /**
656 Append a parenthesized number to String.
657 Used in various pieces of SHOW related code.
658
659 @param nr Number
660 @param radix Radix, optional parameter, 10 by default.
661 */
append_parenthesized(long nr,int radix)662 bool String::append_parenthesized(long nr, int radix)
663 {
664 char buff[64], *end;
665 buff[0]= '(';
666 end= int10_to_str(nr, buff + 1, radix);
667 *end++ = ')';
668 return append(buff, (uint) (end - buff));
669 }
670
671
append_with_prefill(const char * s,size_t arg_length,size_t full_length,char fill_char)672 bool String::append_with_prefill(const char *s, size_t arg_length,
673 size_t full_length, char fill_char)
674 {
675 size_t t_length= arg_length > full_length ? arg_length : full_length;
676
677 if (mem_realloc(m_length + t_length))
678 return true;
679 if (full_length > arg_length)
680 {
681 t_length= full_length - arg_length;
682 memset(m_ptr + m_length, fill_char, t_length);
683 m_length= m_length + t_length;
684 }
685 append(s, arg_length);
686 return false;
687 }
688
numchars() const689 size_t String::numchars() const
690 {
691 return m_charset->cset->numchars(m_charset, m_ptr, m_ptr + m_length);
692 }
693
charpos(size_t i,size_t offset)694 size_t String::charpos(size_t i, size_t offset)
695 {
696 if (i <= 0)
697 return i;
698 return m_charset->cset->charpos(m_charset, m_ptr + offset, m_ptr + m_length, i);
699 }
700
strstr(const String & s,size_t offset)701 int String::strstr(const String &s, size_t offset)
702 {
703 if (s.length()+offset <= m_length)
704 {
705 if (!s.length())
706 return ((int) offset); // Empty string is always found
707
708 const char *str= m_ptr + offset;
709 const char *search= s.ptr();
710 const char *end= m_ptr + m_length - s.length() + 1;
711 const char *search_end= s.ptr() + s.length();
712 skip:
713 while (str != end)
714 {
715 if (*str++ == *search)
716 {
717 const char *i= str;
718 const char *j= search + 1;
719 while (j != search_end)
720 if (*i++ != *j++) goto skip;
721 return (int) (str - m_ptr) -1;
722 }
723 }
724 }
725 return -1;
726 }
727
728 /*
729 ** Search string from end. Offset is offset to the end of string
730 */
731
strrstr(const String & s,size_t offset)732 int String::strrstr(const String &s, size_t offset)
733 {
734 if (s.length() <= offset && offset <= m_length)
735 {
736 if (!s.length())
737 return static_cast<int>(offset); // Empty string is always found
738 const char *str= m_ptr + offset - 1;
739 const char *search= s.ptr() + s.length() - 1;
740
741 const char *end= m_ptr + s.length() - 2;
742 const char *search_end= s.ptr() - 1;
743 skip:
744 while (str != end)
745 {
746 if (*str-- == *search)
747 {
748 const char *i= str;
749 const char *j= search - 1;
750 while (j != search_end)
751 if (*i-- != *j--) goto skip;
752 return (int) (i - m_ptr) +1;
753 }
754 }
755 }
756 return -1;
757 }
758
substr(int offset,int count)759 String String::substr(int offset, int count)
760 {
761 int original_count = this->numchars();
762 if (offset > original_count)
763 {
764 offset= original_count;
765 }
766 if (offset + count > original_count)
767 {
768 count= original_count - offset;
769 }
770 size_t bytes_offset= this->charpos(offset);
771
772 return String(this->m_ptr + bytes_offset,
773 this->charpos(offset + count) - bytes_offset, this->m_charset);
774 }
775
776 /*
777 Replace substring with string
778 If wrong parameter or not enough memory, do nothing
779 */
780
replace(size_t offset,size_t arg_length,const String & to)781 bool String::replace(size_t offset, size_t arg_length,const String &to)
782 {
783 return replace(offset, arg_length, to.ptr(), to.length());
784 }
785
replace(size_t offset,size_t arg_length,const char * to,size_t to_length)786 bool String::replace(size_t offset, size_t arg_length,
787 const char *to, size_t to_length)
788 {
789 long diff = static_cast<long>(to_length) - static_cast<long>(arg_length);
790 if (offset+arg_length <= m_length)
791 {
792 if (diff < 0)
793 {
794 if (to_length)
795 memcpy(m_ptr + offset, to, to_length);
796 memmove(m_ptr + offset + to_length,
797 m_ptr + offset + arg_length,
798 m_length - offset - arg_length);
799 }
800 else
801 {
802 if (diff)
803 {
804 if (mem_realloc(m_length + diff))
805 return true;
806 memmove(m_ptr + offset + to_length,
807 m_ptr + offset + arg_length,
808 m_length - offset - arg_length);
809 }
810 if (to_length)
811 memcpy(m_ptr + offset, to, to_length);
812 }
813 m_length+= diff;
814 }
815 return false;
816 }
817
818
819 // added by Holyfoot for "geometry" needs
reserve(size_t space_needed,size_t grow_by)820 int String::reserve(size_t space_needed, size_t grow_by)
821 {
822 if (m_alloced_length < m_length + space_needed)
823 {
824 if (mem_realloc(m_alloced_length + max(space_needed, grow_by) - 1))
825 return true;
826 }
827 return false;
828 }
829
qs_append(const char * str,size_t len)830 void String::qs_append(const char *str, size_t len)
831 {
832 memcpy(m_ptr + m_length, str, len + 1);
833 m_length += len;
834 }
835
qs_append(double d,size_t len)836 void String::qs_append(double d, size_t len)
837 {
838 char *buff = m_ptr + m_length;
839 m_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, len, buff, NULL);
840 }
841
qs_append(int i)842 void String::qs_append(int i)
843 {
844 char *buff= m_ptr + m_length;
845 char *end= int10_to_str(i, buff, -10);
846 m_length+= (int) (end-buff);
847 }
848
qs_append(uint i)849 void String::qs_append(uint i)
850 {
851 char *buff= m_ptr + m_length;
852 char *end= int10_to_str(i, buff, 10);
853 m_length+= (int) (end-buff);
854 }
855
856 /*
857 Compare strings according to collation, without end space.
858
859 SYNOPSIS
860 sortcmp()
861 s First string
862 t Second string
863 cs Collation
864
865 NOTE:
866 Normally this is case sensitive comparison
867
868 RETURN
869 < 0 s < t
870 0 s == t
871 > 0 s > t
872 */
873
874
sortcmp(const String * s,const String * t,const CHARSET_INFO * cs)875 int sortcmp(const String *s,const String *t, const CHARSET_INFO *cs)
876 {
877 return cs->coll->strnncollsp(cs,
878 (uchar *) s->ptr(),s->length(),
879 (uchar *) t->ptr(),t->length(), 0);
880 }
881
882
883 /*
884 Compare strings byte by byte. End spaces are also compared.
885
886 SYNOPSIS
887 stringcmp()
888 s First string
889 t Second string
890
891 NOTE:
892 Strings are compared as a stream of uchars
893
894 RETURN
895 < 0 s < t
896 0 s == t
897 > 0 s > t
898 */
899
900
stringcmp(const String * s,const String * t)901 int stringcmp(const String *s,const String *t)
902 {
903 size_t s_len= s->length();
904 size_t t_len= t->length();
905 size_t len= min(s_len, t_len);
906 int cmp= memcmp(s->ptr(), t->ptr(), len);
907 return (cmp) ? cmp : static_cast<int>(s_len) - static_cast<int>(t_len);
908 }
909
910 /**
911 Makes a copy of a String's buffer unless it's already heap-allocated.
912
913 If the buffer ('str') of 'from' is on the heap, this function returns
914 'from', possibly re-allocated to be at least from_length bytes long.
915 It is also the case if from==to or to==NULL.
916 Otherwise, this function makes and returns a copy of "from" into "to"; the
917 buffer of "to" is heap-allocated; a pre-condition is that from->str and
918 to->str must point to non-overlapping buffers.
919 The logic behind this complex design, is that a caller, typically a
920 val_str() function, sometimes has an input String ('from') which buffer it
921 wants to modify; but this String's buffer may or not be heap-allocated; if
922 it's not heap-allocated it is possibly in static storage or belongs to an
923 outer context, and thus should not be modified; in that case the caller
924 wants a heap-allocated copy which it can freely modify.
925
926 @param to destination string
927 @param from source string
928 @param from_length destination string will hold at least from_length bytes.
929 */
930
copy_if_not_alloced(String * to,String * from,size_t from_length)931 String *copy_if_not_alloced(String *to,String *from, size_t from_length)
932 {
933 if (from->m_is_alloced && from->m_alloced_length >= from_length)
934 return from;
935 if ((from->m_is_alloced && (from->m_alloced_length != 0)) || !to || from == to)
936 {
937 (void) from->mem_realloc(from_length,
938 true /* force heap allocation */);
939 return from;
940 }
941 if (to->mem_realloc(from_length, true))
942 return from; // Actually an error
943
944 // from and to should not be overlapping
945 assert(!to->uses_buffer_owned_by(from));
946 assert(!from->uses_buffer_owned_by(to));
947
948 if ((to->m_length= min(from->m_length, from_length)))
949 memcpy(to->m_ptr, from->m_ptr, to->m_length);
950 to->m_charset=from->m_charset;
951 return to;
952 }
953
954
955 /****************************************************************************
956 Help functions
957 ****************************************************************************/
958
959 /*
960 copy a string,
961 with optional character set conversion,
962 with optional left padding (for binary -> UCS2 conversion)
963
964 SYNOPSIS
965 well_formed_copy_nchars()
966 to Store result here
967 to_length Maxinum length of "to" string
968 to_cs Character set of "to" string
969 from Copy from here
970 from_length Length of from string
971 from_cs From character set
972 nchars Copy not more that nchars characters
973 well_formed_error_pos Return position when "from" is not well formed
974 or NULL otherwise.
975 cannot_convert_error_pos Return position where a not convertable
976 character met, or NULL otherwise.
977 from_end_pos Return position where scanning of "from"
978 string stopped.
979 NOTES
980
981 RETURN
982 length of bytes copied to 'to'
983 */
984
985
well_formed_copy_nchars(const CHARSET_INFO * to_cs,char * to,size_t to_length,const CHARSET_INFO * from_cs,const char * from,size_t from_length,size_t nchars,const char ** well_formed_error_pos,const char ** cannot_convert_error_pos,const char ** from_end_pos)986 size_t well_formed_copy_nchars(const CHARSET_INFO *to_cs,
987 char *to, size_t to_length,
988 const CHARSET_INFO *from_cs,
989 const char *from, size_t from_length,
990 size_t nchars,
991 const char **well_formed_error_pos,
992 const char **cannot_convert_error_pos,
993 const char **from_end_pos)
994 {
995 size_t res;
996
997 if ((to_cs == &my_charset_bin) ||
998 (from_cs == &my_charset_bin) ||
999 (to_cs == from_cs) ||
1000 my_charset_same(from_cs, to_cs))
1001 {
1002 if (to_length < to_cs->mbminlen || !nchars)
1003 {
1004 *from_end_pos= from;
1005 *cannot_convert_error_pos= NULL;
1006 *well_formed_error_pos= NULL;
1007 return 0;
1008 }
1009
1010 if (to_cs == &my_charset_bin)
1011 {
1012 res= min(min(nchars, to_length), from_length);
1013 memmove(to, from, res);
1014 *from_end_pos= from + res;
1015 *well_formed_error_pos= NULL;
1016 *cannot_convert_error_pos= NULL;
1017 }
1018 else
1019 {
1020 int well_formed_error;
1021 uint from_offset;
1022
1023 if ((from_offset= (from_length % to_cs->mbminlen)) &&
1024 (from_cs == &my_charset_bin))
1025 {
1026 /*
1027 Copying from BINARY to UCS2 needs to prepend zeros sometimes:
1028 INSERT INTO t1 (ucs2_column) VALUES (0x01);
1029 0x01 -> 0x0001
1030 */
1031 uint pad_length= to_cs->mbminlen - from_offset;
1032 memset(to, 0, pad_length);
1033 memmove(to + pad_length, from, from_offset);
1034 /*
1035 In some cases left zero-padding can create an incorrect character.
1036 For example:
1037 INSERT INTO t1 (utf32_column) VALUES (0x110000);
1038 We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
1039 The valid characters range is limited to 0x00000000..0x0010FFFF.
1040
1041 Make sure we didn't pad to an incorrect character.
1042 */
1043 if (to_cs->cset->well_formed_len(to_cs,
1044 to, to + to_cs->mbminlen, 1,
1045 &well_formed_error) !=
1046 to_cs->mbminlen)
1047 {
1048 *from_end_pos= *well_formed_error_pos= from;
1049 *cannot_convert_error_pos= NULL;
1050 return 0;
1051 }
1052 nchars--;
1053 from+= from_offset;
1054 from_length-= from_offset;
1055 to+= to_cs->mbminlen;
1056 to_length-= to_cs->mbminlen;
1057 }
1058
1059 set_if_smaller(from_length, to_length);
1060 res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
1061 nchars, &well_formed_error);
1062 memmove(to, from, res);
1063 *from_end_pos= from + res;
1064 *well_formed_error_pos= well_formed_error ? from + res : NULL;
1065 *cannot_convert_error_pos= NULL;
1066 if (from_offset)
1067 res+= to_cs->mbminlen;
1068 }
1069 }
1070 else
1071 {
1072 int cnvres;
1073 my_wc_t wc;
1074 my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
1075 my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
1076 const uchar *from_end= (const uchar*) from + from_length;
1077 uchar *to_end= (uchar*) to + to_length;
1078 char *to_start= to;
1079 *well_formed_error_pos= NULL;
1080 *cannot_convert_error_pos= NULL;
1081
1082 for ( ; nchars; nchars--)
1083 {
1084 const char *from_prev= from;
1085 if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
1086 from+= cnvres;
1087 else if (cnvres == MY_CS_ILSEQ)
1088 {
1089 if (!*well_formed_error_pos)
1090 *well_formed_error_pos= from;
1091 from++;
1092 wc= '?';
1093 }
1094 else if (cnvres > MY_CS_TOOSMALL)
1095 {
1096 /*
1097 A correct multibyte sequence detected
1098 But it doesn't have Unicode mapping.
1099 */
1100 if (!*cannot_convert_error_pos)
1101 *cannot_convert_error_pos= from;
1102 from+= (-cnvres);
1103 wc= '?';
1104 }
1105 else
1106 break; // Not enough characters
1107
1108 outp:
1109 if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1110 to+= cnvres;
1111 else if (cnvres == MY_CS_ILUNI && wc != '?')
1112 {
1113 if (!*cannot_convert_error_pos)
1114 *cannot_convert_error_pos= from_prev;
1115 wc= '?';
1116 goto outp;
1117 }
1118 else
1119 {
1120 from= from_prev;
1121 break;
1122 }
1123 }
1124 *from_end_pos= from;
1125 res= to - to_start;
1126 }
1127 return res;
1128 }
1129
1130
1131
1132
print(String * str)1133 void String::print(String *str)
1134 {
1135 char *st= m_ptr;
1136 char *end= st + m_length;
1137 for (; st < end; st++)
1138 {
1139 uchar c= *st;
1140 switch (c)
1141 {
1142 case '\\':
1143 str->append(STRING_WITH_LEN("\\\\"));
1144 break;
1145 case '\0':
1146 str->append(STRING_WITH_LEN("\\0"));
1147 break;
1148 case '\'':
1149 str->append(STRING_WITH_LEN("\\'"));
1150 break;
1151 case '\n':
1152 str->append(STRING_WITH_LEN("\\n"));
1153 break;
1154 case '\r':
1155 str->append(STRING_WITH_LEN("\\r"));
1156 break;
1157 case '\032': // Ctrl-Z
1158 str->append(STRING_WITH_LEN("\\Z"));
1159 break;
1160 default:
1161 str->append(c);
1162 }
1163 }
1164 }
1165
1166
1167 /*
1168 Exchange state of this object and argument.
1169
1170 SYNOPSIS
1171 String::swap()
1172
1173 RETURN
1174 Target string will contain state of this object and vice versa.
1175 */
1176
swap(String & s)1177 void String::swap(String &s)
1178 {
1179 swap_variables(char *, m_ptr, s.m_ptr);
1180 swap_variables(size_t, m_length, s.m_length);
1181 swap_variables(uint32, m_alloced_length, s.m_alloced_length);
1182 swap_variables(bool, m_is_alloced, s.m_is_alloced);
1183 swap_variables(const CHARSET_INFO *, m_charset, s.m_charset);
1184 }
1185
1186
1187 /**
1188 Convert string to printable ASCII string
1189
1190 @details This function converts input string "from" replacing non-ASCII bytes
1191 with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1192 the resulting string.
1193 This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1194 e.g. when a string cannot be converted to a result charset.
1195
1196
1197 @param to output buffer
1198 @param to_len size of the output buffer (8 bytes or greater)
1199 @param from input string
1200 @param from_len size of the input string
1201 @param from_cs input charset
1202 @param nbytes maximal number of bytes to convert (from_len if 0)
1203
1204 @return number of bytes in the output string
1205 */
1206
convert_to_printable(char * to,size_t to_len,const char * from,size_t from_len,const CHARSET_INFO * from_cs,size_t nbytes)1207 size_t convert_to_printable(char *to, size_t to_len,
1208 const char *from, size_t from_len,
1209 const CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1210 {
1211 /* needs at least 8 bytes for '\xXX...' and zero byte */
1212 assert(to_len >= 8);
1213
1214 char *t= to;
1215 char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1216 const char *f= from;
1217 const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
1218 char *dots= to; // last safe place to append '...'
1219
1220 if (!f || t == t_end)
1221 return 0;
1222
1223 for (; t < t_end && f < f_end; f++)
1224 {
1225 /*
1226 If the source string is ASCII compatible (mbminlen==1)
1227 and the source character is in ASCII printable range (0x20..0x7F),
1228 then display the character as is.
1229
1230 Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1231 or the source character is not in the printable range,
1232 then print the character using HEX notation.
1233 */
1234 if (((unsigned char) *f) >= 0x20 &&
1235 ((unsigned char) *f) <= 0x7F &&
1236 from_cs->mbminlen == 1)
1237 {
1238 *t++= *f;
1239 }
1240 else
1241 {
1242 if (t_end - t < 4) // \xXX
1243 break;
1244 *t++= '\\';
1245 *t++= 'x';
1246 *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
1247 *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
1248 }
1249 if (t_end - t >= 3) // '...'
1250 dots= t;
1251 }
1252 if (f < from + from_len)
1253 memcpy(dots, STRING_WITH_LEN("...\0"));
1254 else
1255 *t= '\0';
1256 return t - to;
1257 }
1258
1259
1260 /**
1261 Convert a buffer to printable HEX encoded string
1262 For eg: ABCDEF1234
1263
1264
1265 @param to output buffer
1266 @param to_len size of the output buffer (from_len*2 + 1 or greater)
1267 @param from input buffer
1268 @param from_len size of the input buffer
1269
1270 @return number of bytes in the output string
1271 */
bin_to_hex_str(char * to,size_t to_len,char * from,size_t from_len)1272 size_t bin_to_hex_str(char *to, size_t to_len, char *from, size_t from_len)
1273 {
1274 char *out;
1275 char *in;
1276 size_t i;
1277
1278 if (to_len < ((from_len * 2) + 1))
1279 return 0 ;
1280
1281 out= to;
1282 in= from;
1283
1284 for (i=0; i < from_len; i++, in++)
1285 {
1286 *out++=_dig_vec_upper[((unsigned char) *in) >> 4];
1287 *out++=_dig_vec_upper[((unsigned char) *in) & 0xF];
1288 }
1289
1290 *out= '\0';
1291
1292 return out - to;
1293 }
1294
1295 /**
1296 Check if an input byte sequence is a valid character string of a given charset
1297
1298 @param cs The input character set.
1299 @param str The input byte sequence to validate.
1300 @param length A byte length of the str.
1301 @param [out] valid_length A byte length of a valid prefix of the str.
1302 @param [out] length_error True in the case of a character length error:
1303 some byte[s] in the input is not a valid
1304 prefix for a character, i.e. the byte length
1305 of that invalid character is undefined.
1306
1307 @retval true if the whole input byte sequence is a valid character string.
1308 The length_error output parameter is undefined.
1309
1310 @return
1311 if the whole input byte sequence is a valid character string
1312 then
1313 return false
1314 else
1315 if the length of some character in the input is undefined (MY_CS_ILSEQ)
1316 or the last character is truncated (MY_CS_TOOSMALL)
1317 then
1318 *length_error= true; // fatal error!
1319 else
1320 *length_error= false; // non-fatal error: there is no wide character
1321 // encoding for some input character
1322 return true
1323 */
validate_string(const CHARSET_INFO * cs,const char * str,uint32 length,size_t * valid_length,bool * length_error)1324 bool validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
1325 size_t *valid_length, bool *length_error)
1326 {
1327 if (cs->mbmaxlen > 1)
1328 {
1329 int well_formed_error;
1330 *valid_length= cs->cset->well_formed_len(cs, str, str + length,
1331 length, &well_formed_error);
1332 *length_error= well_formed_error;
1333 return well_formed_error;
1334 }
1335
1336 /*
1337 well_formed_len() is not functional on single-byte character sets,
1338 so use mb_wc() instead:
1339 */
1340 *length_error= false;
1341
1342 const uchar *from= reinterpret_cast<const uchar *>(str);
1343 const uchar *from_end= from + length;
1344 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1345
1346 while (from < from_end)
1347 {
1348 my_wc_t wc;
1349 int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
1350 if (cnvres <= 0)
1351 {
1352 *valid_length= from - reinterpret_cast<const uchar *>(str);
1353 return true;
1354 }
1355 from+= cnvres;
1356 }
1357 *valid_length= length;
1358 return false;
1359 }
1360