1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
2 Copyright (c) 2016, 2020, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /* This file is originally from the mysql distribution. Coded by monty */
18
19 #ifdef USE_PRAGMA_IMPLEMENTATION
20 #pragma implementation // gcc: Class implementation
21 #endif
22
23 #include "mariadb.h"
24 #include <m_string.h>
25 #include <m_ctype.h>
26 #include <mysql_com.h>
27
28 #include "sql_string.h"
29
30 /*****************************************************************************
31 ** String functions
32 *****************************************************************************/
33
real_alloc(size_t length)34 bool String::real_alloc(size_t length)
35 {
36 size_t arg_length= ALIGN_SIZE(length + 1);
37 DBUG_ASSERT(arg_length > length);
38 if (arg_length <= length)
39 return TRUE; /* Overflow */
40 str_length=0;
41 if (Alloced_length < arg_length)
42 {
43 free();
44 if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME |
45 (thread_specific ?
46 MY_THREAD_SPECIFIC : 0)))))
47 return TRUE;
48 DBUG_ASSERT(length < UINT_MAX32);
49 Alloced_length=(uint32) arg_length;
50 alloced=1;
51 }
52 Ptr[0]=0;
53 return FALSE;
54 }
55
56
57 /**
58 Allocates a new buffer on the heap for this String.
59
60 - If the String's internal buffer is privately owned and heap allocated,
61 one of the following is performed.
62
63 - If the requested length is greater than what fits in the buffer, a new
64 buffer is allocated, data moved and the old buffer freed.
65
66 - If the requested length is less or equal to what fits in the buffer, a
67 null character is inserted at the appropriate position.
68
69 - If the String does not keep a private buffer on the heap, such a buffer
70 will be allocated and the string copied accoring to its length, as found
71 in String::length().
72
73 For C compatibility, the new string buffer is null terminated.
74
75 @param alloc_length The requested string size in characters, excluding any
76 null terminator.
77
78 @retval false Either the copy operation is complete or, if the size of the
79 new buffer is smaller than the currently allocated buffer (if one exists),
80 no allocation occurred.
81
82 @retval true An error occurred when attempting to allocate memory.
83 */
realloc_raw(size_t alloc_length)84 bool String::realloc_raw(size_t alloc_length)
85 {
86 if (Alloced_length <= alloc_length)
87 {
88 char *new_ptr;
89 uint32 len= ALIGN_SIZE(alloc_length+1);
90 DBUG_ASSERT(len > alloc_length);
91 if (len <= alloc_length)
92 return TRUE; /* Overflow */
93 if (alloced)
94 {
95 if (!(new_ptr= (char*) my_realloc(Ptr,len,
96 MYF(MY_WME |
97 (thread_specific ?
98 MY_THREAD_SPECIFIC : 0)))))
99 return TRUE; // Signal error
100 }
101 else if ((new_ptr= (char*) my_malloc(len,
102 MYF(MY_WME |
103 (thread_specific ?
104 MY_THREAD_SPECIFIC : 0)))))
105 {
106 if (str_length > len - 1)
107 str_length= 0;
108 if (str_length) // Avoid bugs in memcpy on AIX
109 memcpy(new_ptr,Ptr,str_length);
110 new_ptr[str_length]=0;
111 alloced=1;
112 }
113 else
114 return TRUE; // Signal error
115 Ptr= new_ptr;
116 DBUG_ASSERT(len < UINT_MAX32);
117 Alloced_length= (uint32)len;
118 }
119 return FALSE;
120 }
121
set_int(longlong num,bool unsigned_flag,CHARSET_INFO * cs)122 bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
123 {
124 uint l=20*cs->mbmaxlen+1;
125 int base= unsigned_flag ? 10 : -10;
126
127 if (alloc(l))
128 return TRUE;
129 str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
130 str_charset=cs;
131 return FALSE;
132 }
133
134
135 // Convert a number into its HEX representation
set_hex(ulonglong num)136 bool String::set_hex(ulonglong num)
137 {
138 char *n_end;
139 if (alloc(65) || !(n_end= longlong2str(num, Ptr, 16)))
140 return true;
141 length((uint32) (n_end - Ptr));
142 set_charset(&my_charset_latin1);
143 return false;
144 }
145
146
147 /**
148 Append a hex representation of the byte "value" into "to".
149 Note:
150 "to" is incremented for the caller by two bytes. It's passed by reference!
151 So it resembles a macros, hence capital letters in the name.
152 */
APPEND_HEX(char * & to,uchar value)153 static inline void APPEND_HEX(char *&to, uchar value)
154 {
155 *to++= _dig_vec_upper[((uchar) value) >> 4];
156 *to++= _dig_vec_upper[((uchar) value) & 0x0F];
157 }
158
159
qs_append_hex(const char * str,uint32 len)160 void String::qs_append_hex(const char *str, uint32 len)
161 {
162 const char *str_end= str + len;
163 for (char *to= Ptr + str_length ; str < str_end; str++)
164 APPEND_HEX(to, (uchar) *str);
165 str_length+= len * 2;
166 }
167
168
169 // Convert a string to its HEX representation
set_hex(const char * str,uint32 len)170 bool String::set_hex(const char *str, uint32 len)
171 {
172 /*
173 Safety: cut the source string if "len" is too large.
174 Note, alloc() can allocate some more space than requested, due to:
175 - ALIGN_SIZE
176 - one extra byte for a null terminator
177 So cut the source string to 0x7FFFFFF0 rather than 0x7FFFFFFE.
178 */
179 set_if_smaller(len, 0x7FFFFFF0);
180 if (alloc(len * 2))
181 return true;
182 length(0);
183 qs_append_hex(str, len);
184 set_charset(&my_charset_latin1);
185 return false;
186 }
187
188
set_real(double num,uint decimals,CHARSET_INFO * cs)189 bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
190 {
191 char buff[FLOATING_POINT_BUFFER];
192 uint dummy_errors;
193 size_t len;
194
195 str_charset=cs;
196 if (decimals >= FLOATING_POINT_DECIMALS)
197 {
198 len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
199 return copy(buff, (uint)len, &my_charset_latin1, cs, &dummy_errors);
200 }
201 len= my_fcvt(num, decimals, buff, NULL);
202 return copy(buff, (uint32) len, &my_charset_latin1, cs,
203 &dummy_errors);
204 }
205
206
copy()207 bool String::copy()
208 {
209 if (!alloced)
210 {
211 Alloced_length=0; // Force realloc
212 return realloc(str_length);
213 }
214 return FALSE;
215 }
216
217 /**
218 Copies the internal buffer from str. If this String has a private heap
219 allocated buffer where new data does not fit, a new buffer is allocated
220 before copying and the old buffer freed. Character set information is also
221 copied.
222
223 @param str The string whose internal buffer is to be copied.
224
225 @retval false Success.
226 @retval true Memory allocation failed.
227 */
copy(const String & str)228 bool String::copy(const String &str)
229 {
230 if (alloc(str.str_length))
231 return TRUE;
232 if ((str_length=str.str_length))
233 bmove(Ptr,str.Ptr,str_length); // May be overlapping
234 Ptr[str_length]=0;
235 str_charset=str.str_charset;
236 return FALSE;
237 }
238
copy(const char * str,size_t arg_length,CHARSET_INFO * cs)239 bool String::copy(const char *str,size_t arg_length, CHARSET_INFO *cs)
240 {
241 DBUG_ASSERT(arg_length < UINT_MAX32);
242 if (alloc(arg_length))
243 return TRUE;
244 if (Ptr == str && arg_length == uint32(str_length))
245 {
246 /*
247 This can happen in some cases. This code is here mainly to avoid
248 warnings from valgrind, but can also be an indication of error.
249 */
250 DBUG_PRINT("warning", ("Copying string on itself: %p %zu",
251 str, arg_length));
252 }
253 else if ((str_length=uint32(arg_length)))
254 memcpy(Ptr,str,arg_length);
255 Ptr[arg_length]=0;
256 str_charset=cs;
257 return FALSE;
258 }
259
260 /*
261 Copy string, where strings may overlap.
262 Same as String::copy, but use memmove instead of memcpy to avoid warnings
263 from valgrind
264 */
265
copy_or_move(const char * str,size_t arg_length,CHARSET_INFO * cs)266 bool String::copy_or_move(const char *str,size_t arg_length, CHARSET_INFO *cs)
267 {
268 DBUG_ASSERT(arg_length < UINT_MAX32);
269 if (alloc(arg_length))
270 return TRUE;
271 if ((str_length=uint32(arg_length)))
272 memmove(Ptr,str,arg_length);
273 Ptr[arg_length]=0;
274 str_charset=cs;
275 return FALSE;
276 }
277
278
279 /*
280 Checks that the source string can be just copied to the destination string
281 without conversion.
282
283 SYNPOSIS
284
285 needs_conversion()
286 arg_length Length of string to copy.
287 from_cs Character set to copy from
288 to_cs Character set to copy to
289 uint32 *offset Returns number of unaligned characters.
290
291 RETURN
292 0 No conversion needed
293 1 Either character set conversion or adding leading zeros
294 (e.g. for UCS-2) must be done
295
296 NOTE
297 to_cs may be NULL for "no conversion" if the system variable
298 character_set_results is NULL.
299 */
300
needs_conversion(size_t arg_length,CHARSET_INFO * from_cs,CHARSET_INFO * to_cs,uint32 * offset)301 bool String::needs_conversion(size_t arg_length,
302 CHARSET_INFO *from_cs,
303 CHARSET_INFO *to_cs,
304 uint32 *offset)
305 {
306 *offset= 0;
307 if (!to_cs ||
308 (to_cs == &my_charset_bin) ||
309 (to_cs == from_cs) ||
310 my_charset_same(from_cs, to_cs) ||
311 ((from_cs == &my_charset_bin) &&
312 (!(*offset=(uint32)(arg_length % to_cs->mbminlen)))))
313 return FALSE;
314 return TRUE;
315 }
316
317
318 /*
319 Checks that the source string can just be copied to the destination string
320 without conversion.
321 Unlike needs_conversion it will require conversion on incoming binary data
322 to ensure the data are verified for vailidity first.
323
324 @param arg_length Length of string to copy.
325 @param from_cs Character set to copy from
326 @param to_cs Character set to copy to
327
328 @return conversion needed
329 */
needs_conversion_on_storage(size_t arg_length,CHARSET_INFO * cs_from,CHARSET_INFO * cs_to)330 bool String::needs_conversion_on_storage(size_t arg_length,
331 CHARSET_INFO *cs_from,
332 CHARSET_INFO *cs_to)
333 {
334 uint32 offset;
335 return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
336 /* force conversion when storing a binary string */
337 (cs_from == &my_charset_bin &&
338 /* into a non-binary destination */
339 cs_to != &my_charset_bin &&
340 /* and any of the following is true :*/
341 (
342 /* it's a variable length encoding */
343 cs_to->mbminlen != cs_to->mbmaxlen ||
344 /* longer than 2 bytes : neither 1 byte nor ucs2 */
345 cs_to->mbminlen > 2 ||
346 /* and is not a multiple of the char byte size */
347 0 != (arg_length % cs_to->mbmaxlen)
348 )
349 )
350 );
351 }
352
353
354 /*
355 Copy a multi-byte character sets with adding leading zeros.
356
357 SYNOPSIS
358
359 copy_aligned()
360 str String to copy
361 arg_length Length of string. This should NOT be dividable with
362 cs->mbminlen.
363 offset arg_length % cs->mb_minlength
364 cs Character set for 'str'
365
366 NOTES
367 For real multi-byte, ascii incompatible charactser sets,
368 like UCS-2, add leading zeros if we have an incomplete character.
369 Thus,
370 SELECT _ucs2 0xAA
371 will automatically be converted into
372 SELECT _ucs2 0x00AA
373
374 RETURN
375 0 ok
376 1 error
377 */
378
copy_aligned(const char * str,size_t arg_length,size_t offset,CHARSET_INFO * cs)379 bool String::copy_aligned(const char *str, size_t arg_length, size_t offset,
380 CHARSET_INFO *cs)
381 {
382 /* How many bytes are in incomplete character */
383 offset= cs->mbminlen - offset; /* How many zeros we should prepend */
384 DBUG_ASSERT(offset && offset != cs->mbminlen);
385
386 size_t aligned_length= arg_length + offset;
387 if (alloc(aligned_length))
388 return TRUE;
389
390 /*
391 Note, this is only safe for big-endian UCS-2.
392 If we add little-endian UCS-2 sometimes, this code
393 will be more complicated. But it's OK for now.
394 */
395 bzero((char*) Ptr, offset);
396 memcpy(Ptr + offset, str, arg_length);
397 Ptr[aligned_length]=0;
398 /* str_length is always >= 0 as arg_length is != 0 */
399 str_length= (uint32)aligned_length;
400 str_charset= cs;
401 return FALSE;
402 }
403
404
set_or_copy_aligned(const char * str,size_t arg_length,CHARSET_INFO * cs)405 bool String::set_or_copy_aligned(const char *str, size_t arg_length,
406 CHARSET_INFO *cs)
407 {
408 /* How many bytes are in incomplete character */
409 size_t offset= (arg_length % cs->mbminlen);
410
411 if (!offset)
412 {
413 /* All characters are complete, just use given string */
414 set(str, arg_length, cs);
415 return FALSE;
416 }
417 return copy_aligned(str, arg_length, offset, cs);
418 }
419
420
421 /**
422 Copies the character data into this String, with optional character set
423 conversion.
424
425 @return
426 FALSE ok
427 TRUE Could not allocate result buffer
428
429 */
430
copy(const char * str,size_t arg_length,CHARSET_INFO * from_cs,CHARSET_INFO * to_cs,uint * errors)431 bool String::copy(const char *str, size_t arg_length,
432 CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
433 {
434 uint32 offset;
435
436 DBUG_ASSERT(!str || str != Ptr || !alloced);
437
438 if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
439 {
440 *errors= 0;
441 return copy(str, arg_length, to_cs);
442 }
443 if ((from_cs == &my_charset_bin) && offset)
444 {
445 *errors= 0;
446 return copy_aligned(str, arg_length, offset, to_cs);
447 }
448 size_t new_length= to_cs->mbmaxlen*arg_length;
449 if (alloc(new_length))
450 return TRUE;
451 str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
452 str, arg_length, from_cs, errors);
453 str_charset=to_cs;
454 return FALSE;
455 }
456
457
458 /*
459 Set a string to the value of a latin1-string, keeping the original charset
460
461 SYNOPSIS
462 copy_or_set()
463 str String of a simple charset (latin1)
464 arg_length Length of string
465
466 IMPLEMENTATION
467 If string object is of a simple character set, set it to point to the
468 given string.
469 If not, make a copy and convert it to the new character set.
470
471 RETURN
472 0 ok
473 1 Could not allocate result buffer
474
475 */
476
set_ascii(const char * str,size_t arg_length)477 bool String::set_ascii(const char *str, size_t arg_length)
478 {
479 if (str_charset->mbminlen == 1)
480 {
481 set(str, arg_length, str_charset);
482 return 0;
483 }
484 uint dummy_errors;
485 return copy(str, (uint32)arg_length, &my_charset_latin1, str_charset, &dummy_errors);
486 }
487
488
489 /* This is used by mysql.cc */
490
fill(uint32 max_length,char fill_char)491 bool String::fill(uint32 max_length,char fill_char)
492 {
493 if (str_length > max_length)
494 Ptr[str_length=max_length]=0;
495 else
496 {
497 if (realloc(max_length))
498 return TRUE;
499 bfill(Ptr+str_length,max_length-str_length,fill_char);
500 str_length=max_length;
501 }
502 return FALSE;
503 }
504
strip_sp()505 void String::strip_sp()
506 {
507 while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
508 str_length--;
509 }
510
append(const String & s)511 bool String::append(const String &s)
512 {
513 if (s.length())
514 {
515 if (realloc_with_extra_if_needed(str_length+s.length()))
516 return TRUE;
517 memcpy(Ptr+str_length,s.ptr(),s.length());
518 str_length+=s.length();
519 }
520 return FALSE;
521 }
522
523
524 /*
525 Append an ASCII string to the a string of the current character set
526 */
527
append(const char * s,size_t size)528 bool String::append(const char *s,size_t size)
529 {
530 DBUG_ASSERT(size <= UINT_MAX32);
531 uint32 arg_length= (uint32) size;
532 if (!arg_length)
533 return FALSE;
534
535 /*
536 For an ASCII incompatible string, e.g. UCS-2, we need to convert
537 */
538 if (str_charset->mbminlen > 1)
539 {
540 uint32 add_length=arg_length * str_charset->mbmaxlen;
541 uint dummy_errors;
542 if (realloc_with_extra_if_needed(str_length+ add_length))
543 return TRUE;
544 str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
545 s, arg_length, &my_charset_latin1,
546 &dummy_errors);
547 return FALSE;
548 }
549
550 /*
551 For an ASCII compatinble string we can just append.
552 */
553 if (realloc_with_extra_if_needed(str_length+arg_length))
554 return TRUE;
555 memcpy(Ptr+str_length,s,arg_length);
556 str_length+=arg_length;
557 return FALSE;
558 }
559
560
561 /*
562 Append a 0-terminated ASCII string
563 */
564
append(const char * s)565 bool String::append(const char *s)
566 {
567 return append(s, (uint) strlen(s));
568 }
569
append_longlong(longlong val)570 bool String::append_longlong(longlong val)
571 {
572 if (realloc(str_length+MAX_BIGINT_WIDTH+2))
573 return TRUE;
574 char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, -10);
575 str_length= (uint32)(end - Ptr);
576 return FALSE;
577 }
578
579
append_ulonglong(ulonglong val)580 bool String::append_ulonglong(ulonglong val)
581 {
582 if (realloc(str_length+MAX_BIGINT_WIDTH+2))
583 return TRUE;
584 char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
585 str_length= (uint32) (end - Ptr);
586 return FALSE;
587 }
588
589 /*
590 Append a string in the given charset to the string
591 with character set recoding
592 */
593
append(const char * s,size_t arg_length,CHARSET_INFO * cs)594 bool String::append(const char *s, size_t arg_length, CHARSET_INFO *cs)
595 {
596 if (!arg_length)
597 return false;
598
599 uint32 offset;
600
601 if (needs_conversion((uint32)arg_length, cs, str_charset, &offset))
602 {
603 size_t add_length;
604 if ((cs == &my_charset_bin) && offset)
605 {
606 DBUG_ASSERT(str_charset->mbminlen > offset);
607 offset= str_charset->mbminlen - offset; // How many characters to pad
608 add_length= arg_length + offset;
609 if (realloc(str_length + add_length))
610 return TRUE;
611 bzero((char*) Ptr + str_length, offset);
612 memcpy(Ptr + str_length + offset, s, arg_length);
613 str_length+= (uint32)add_length;
614 return FALSE;
615 }
616
617 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
618 uint dummy_errors;
619 if (realloc_with_extra_if_needed(str_length + add_length))
620 return TRUE;
621 str_length+= copy_and_convert(Ptr+str_length, (uint32)add_length, str_charset,
622 s, (uint32)arg_length, cs, &dummy_errors);
623 }
624 else
625 {
626 if (realloc_with_extra_if_needed(str_length + arg_length))
627 return TRUE;
628 memcpy(Ptr + str_length, s, arg_length);
629 str_length+= (uint32)arg_length;
630 }
631 return FALSE;
632 }
633
append(IO_CACHE * file,uint32 arg_length)634 bool String::append(IO_CACHE* file, uint32 arg_length)
635 {
636 if (realloc_with_extra_if_needed(str_length+arg_length))
637 return TRUE;
638 if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
639 {
640 shrink(str_length ? str_length : 1);
641 return TRUE;
642 }
643 str_length+=arg_length;
644 return FALSE;
645 }
646
647
648 /**
649 Append a parenthesized number to String.
650 Used in various pieces of SHOW related code.
651
652 @param nr Number
653 @param radix Radix, optional parameter, 10 by default.
654 */
append_parenthesized(long nr,int radix)655 bool String::append_parenthesized(long nr, int radix)
656 {
657 char buff[64], *end;
658 buff[0]= '(';
659 end= int10_to_str(nr, buff + 1, radix);
660 *end++ = ')';
661 return append(buff, (uint) (end - buff));
662 }
663
664
append_with_prefill(const char * s,uint32 arg_length,uint32 full_length,char fill_char)665 bool String::append_with_prefill(const char *s,uint32 arg_length,
666 uint32 full_length, char fill_char)
667 {
668 int t_length= arg_length > full_length ? arg_length : full_length;
669
670 if (realloc_with_extra_if_needed(str_length + t_length))
671 return TRUE;
672 t_length= full_length - arg_length;
673 if (t_length > 0)
674 {
675 bfill(Ptr+str_length, t_length, fill_char);
676 str_length=str_length + t_length;
677 }
678 append(s, arg_length);
679 return FALSE;
680 }
681
numchars() const682 uint32 String::numchars() const
683 {
684 return (uint32) str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
685 }
686
charpos(longlong i,uint32 offset)687 int String::charpos(longlong i,uint32 offset)
688 {
689 if (i <= 0)
690 return (int)i;
691 return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i);
692 }
693
strstr(const String & s,uint32 offset)694 int String::strstr(const String &s,uint32 offset)
695 {
696 if (s.length()+offset <= str_length)
697 {
698 if (!s.length())
699 return ((int) offset); // Empty string is always found
700
701 const char *str = Ptr+offset;
702 const char *search=s.ptr();
703 const char *end=Ptr+str_length-s.length()+1;
704 const char *search_end=s.ptr()+s.length();
705 skip:
706 while (str != end)
707 {
708 if (*str++ == *search)
709 {
710 char *i,*j;
711 i=(char*) str; j=(char*) search+1;
712 while (j != search_end)
713 if (*i++ != *j++) goto skip;
714 return (int) (str-Ptr) -1;
715 }
716 }
717 }
718 return -1;
719 }
720
721 /*
722 ** Search string from end. Offset is offset to the end of string
723 */
724
strrstr(const String & s,uint32 offset)725 int String::strrstr(const String &s,uint32 offset)
726 {
727 if (s.length() <= offset && offset <= str_length)
728 {
729 if (!s.length())
730 return offset; // Empty string is always found
731 const char *str = Ptr+offset-1;
732 const char *search=s.ptr()+s.length()-1;
733
734 const char *end=Ptr+s.length()-2;
735 const char *search_end=s.ptr()-1;
736 skip:
737 while (str != end)
738 {
739 if (*str-- == *search)
740 {
741 char *i,*j;
742 i=(char*) str; j=(char*) search-1;
743 while (j != search_end)
744 if (*i-- != *j--) goto skip;
745 return (int) (i-Ptr) +1;
746 }
747 }
748 }
749 return -1;
750 }
751
752 /*
753 Replace substring with string
754 If wrong parameter or not enough memory, do nothing
755 */
756
replace(uint32 offset,uint32 arg_length,const String & to)757 bool String::replace(uint32 offset,uint32 arg_length,const String &to)
758 {
759 return replace(offset,arg_length,to.ptr(),to.length());
760 }
761
replace(uint32 offset,uint32 arg_length,const char * to,uint32 to_length)762 bool String::replace(uint32 offset,uint32 arg_length,
763 const char *to, uint32 to_length)
764 {
765 long diff = (long) to_length-(long) arg_length;
766 if (offset+arg_length <= str_length)
767 {
768 if (diff < 0)
769 {
770 if (to_length)
771 memcpy(Ptr+offset,to,to_length);
772 bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
773 str_length-offset-arg_length);
774 }
775 else
776 {
777 if (diff)
778 {
779 if (realloc_with_extra_if_needed(str_length+(uint32) diff))
780 return TRUE;
781 bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
782 str_length-offset-arg_length);
783 }
784 if (to_length)
785 memcpy(Ptr+offset,to,to_length);
786 }
787 str_length+=(uint32) diff;
788 }
789 return FALSE;
790 }
791
792
793 // added by Holyfoot for "geometry" needs
reserve(size_t space_needed,size_t grow_by)794 int String::reserve(size_t space_needed, size_t grow_by)
795 {
796 if (Alloced_length < str_length + space_needed)
797 {
798 if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1))
799 return TRUE;
800 }
801 return FALSE;
802 }
803
qs_append(const char * str,size_t len)804 void String::qs_append(const char *str, size_t len)
805 {
806 memcpy(Ptr + str_length, str, len + 1);
807 str_length += (uint32)len;
808 }
809
qs_append(double d)810 void String::qs_append(double d)
811 {
812 char *buff = Ptr + str_length;
813 str_length+= (uint32) my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
814 NULL);
815 }
816
qs_append(double * d)817 void String::qs_append(double *d)
818 {
819 double ld;
820 float8get(ld, (char*) d);
821 qs_append(ld);
822 }
823
qs_append(int i)824 void String::qs_append(int i)
825 {
826 char *buff= Ptr + str_length;
827 char *end= int10_to_str(i, buff, -10);
828 str_length+= (int) (end-buff);
829 }
830
qs_append(ulonglong i)831 void String::qs_append(ulonglong i)
832 {
833 char *buff= Ptr + str_length;
834 char *end= longlong10_to_str(i, buff, 10);
835 str_length+= (int) (end-buff);
836 }
837
838 /*
839 Compare strings according to collation, without end space.
840
841 SYNOPSIS
842 sortcmp()
843 s First string
844 t Second string
845 cs Collation
846
847 NOTE:
848 Normally this is case sensitive comparison
849
850 RETURN
851 < 0 s < t
852 0 s == t
853 > 0 s > t
854 */
855
856
sortcmp(const String * s,const String * t,CHARSET_INFO * cs)857 int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
858 {
859 return cs->coll->strnncollsp(cs,
860 (uchar *) s->ptr(),s->length(),
861 (uchar *) t->ptr(),t->length());
862 }
863
864
865 /*
866 Compare strings byte by byte. End spaces are also compared.
867
868 SYNOPSIS
869 stringcmp()
870 s First string
871 t Second string
872
873 NOTE:
874 Strings are compared as a stream of uchars
875
876 RETURN
877 < 0 s < t
878 0 s == t
879 > 0 s > t
880 */
881
882
stringcmp(const String * s,const String * t)883 int stringcmp(const String *s,const String *t)
884 {
885 uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len);
886 int cmp= len ? memcmp(s->ptr(), t->ptr(), len) : 0;
887 return (cmp) ? cmp : (int) (s_len - t_len);
888 }
889
890
891 /**
892 Return a string which has the same value with "from" and
893 which is safe to modify, trying to avoid unnecessary allocation
894 and copying when possible.
895
896 @param to Buffer. Must not be a constant string.
897 @param from Some existing value. We'll try to reuse it.
898 Can be a constant or a variable string.
899 @param from_length The total size that will be possibly needed.
900 Note, can be 0.
901
902 Note, in some cases "from" and "to" can point to the same object.
903
904 If "from" is a variable string and its allocated memory is enough
905 to store "from_length" bytes, then "from" is returned as is.
906
907 If "from" is a variable string and its allocated memory is not enough
908 to store "from_length" bytes, then "from" is reallocated and returned.
909
910 Otherwise (if "from" is a constant string, or looks like a constant string),
911 then "to" is reallocated to fit "from_length" bytes, the value is copied
912 from "from" to "to", then "to" is returned.
913 */
copy_if_not_alloced(String * to,String * from,uint32 from_length)914 String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
915 {
916 DBUG_ASSERT(to);
917 /*
918 If "from" is a constant string, e.g.:
919 SELECT INSERT('', <pos>, <length>, <replacement>);
920 we should not return it. See MDEV-9332.
921
922 The code below detects different string types:
923
924 a. All constant strings have Alloced_length==0 and alloced==false.
925 They point to a static memory array, or a mem_root memory,
926 and should stay untouched until the end of their life cycle.
927 Not safe to reuse.
928
929 b. Some variable string have Alloced_length==0 and alloced==false initially,
930 they are not bound to any char array and allocate space on the first use
931 (and become #d). A typical example of such String is Item::str_value.
932 This type of string could be reused, but there is no a way to distinguish
933 them from the true constant strings (#a).
934 Not safe to reuse.
935
936 c. Some variable strings have Alloced_length>0 and alloced==false.
937 They point to a fixed size writtable char array (typically on stack)
938 initially but can later allocate more space on the heap when the
939 fixed size array is too small (these strings become #d after allocation).
940 Safe to reuse.
941
942 d. Some variable strings have Alloced_length>0 and alloced==true.
943 They already store data on the heap.
944 Safe to reuse.
945
946 e. Some strings can have Alloced_length==0 and alloced==true.
947 This type of strings allocate space on the heap, but then are marked
948 as constant strings using String::mark_as_const().
949 A typical example - the result of a character set conversion
950 of a constant string.
951 Not safe to reuse.
952 */
953 if (from->Alloced_length > 0) // "from" is #c or #d (not a constant)
954 {
955 if (from->Alloced_length >= from_length)
956 return from; // #c or #d (large enough to store from_length bytes)
957
958 if (from->alloced)
959 {
960 (void) from->realloc(from_length);
961 return from; // #d (reallocated to fit from_length bytes)
962 }
963 /*
964 "from" is of type #c. It currently points to a writtable char array
965 (typically on stack), but is too small for "from_length" bytes.
966 We need to reallocate either "from" or "to".
967
968 "from" typically points to a temporary buffer inside Item_xxx::val_str(),
969 or to Item::str_value, and thus is "less permanent" than "to".
970
971 Reallocating "to" may give more benifits:
972 - "to" can point to a "more permanent" storage and can be reused
973 for multiple rows, e.g. str_buffer in Protocol::send_result_set_row(),
974 which is passed to val_str() for all string type rows.
975 - "from" can stay pointing to its original fixed size stack char array,
976 and thus reduce the total amount of my_alloc/my_free.
977 */
978 }
979
980 if (from == to)
981 {
982 /*
983 Possible string types:
984 #a not possible (constants should not be passed as "to")
985 #b possible (a fresh variable with no associated char buffer)
986 #c possible (a variable with a char buffer,
987 in case it's smaller than fixed_length)
988 #d not possible (handled earlier)
989 #e not possible (constants should not be passed as "to")
990
991 If a string of types #a or #e appears here, that means the caller made
992 something wrong. Otherwise, it's safe to reallocate and return "to".
993
994 Note, as we can't distinguish between #a and #b for sure,
995 so we can't assert "not #a", but we can at least assert "not #e".
996 */
997 DBUG_ASSERT(!from->alloced || from->Alloced_length > 0); // Not #e
998
999 (void) from->realloc(from_length);
1000 return from;
1001 }
1002 if (from->uses_buffer_owned_by(to))
1003 {
1004 DBUG_ASSERT(!from->alloced);
1005 DBUG_ASSERT(to->alloced);
1006 /*
1007 "from" is a constant string pointing to a fragment of alloced string "to":
1008 to= xxxFFFyyy
1009 - FFF is the part of "to" pointed by "from"
1010 - xxx is the part of "to" before "from"
1011 - yyy is the part of "to" after "from"
1012 */
1013 uint32 xxx_length= (uint32) (from->ptr() - to->ptr());
1014 uint32 yyy_length= (uint32) (to->end() - from->end());
1015 DBUG_ASSERT(to->length() >= yyy_length);
1016 to->length(to->length() - yyy_length); // Remove the "yyy" part
1017 DBUG_ASSERT(to->length() >= xxx_length);
1018 to->replace(0, xxx_length, "", 0); // Remove the "xxx" part
1019 to->realloc(from_length);
1020 to->str_charset= from->str_charset;
1021 return to;
1022 }
1023 if (to->realloc(from_length))
1024 return from; // Actually an error
1025 if ((to->str_length=MY_MIN(from->str_length,from_length)))
1026 memcpy(to->Ptr,from->Ptr,to->str_length);
1027 to->str_charset=from->str_charset;
1028 return to; // "from" was of types #a, #b, #e, or small #c.
1029 }
1030
1031
1032 /****************************************************************************
1033 Help functions
1034 ****************************************************************************/
1035
1036 /**
1037 Copy string with HEX-encoding of "bad" characters.
1038
1039 @details This functions copies the string pointed by "src"
1040 to the string pointed by "dst". Not more than "srclen" bytes
1041 are read from "src". Any sequences of bytes representing
1042 a not-well-formed substring (according to cs) are hex-encoded,
1043 and all well-formed substrings (according to cs) are copied as is.
1044 Not more than "dstlen" bytes are written to "dst". The number
1045 of bytes written to "dst" is returned.
1046
1047 @param cs character set pointer of the destination string
1048 @param[out] dst destination string
1049 @param dstlen size of dst
1050 @param src source string
1051 @param srclen length of src
1052
1053 @retval result length
1054 */
1055
1056 size_t
my_copy_with_hex_escaping(CHARSET_INFO * cs,char * dst,size_t dstlen,const char * src,size_t srclen)1057 my_copy_with_hex_escaping(CHARSET_INFO *cs,
1058 char *dst, size_t dstlen,
1059 const char *src, size_t srclen)
1060 {
1061 const char *srcend= src + srclen;
1062 char *dst0= dst;
1063
1064 for ( ; src < srcend ; )
1065 {
1066 size_t chlen;
1067 if ((chlen= my_ismbchar(cs, src, srcend)))
1068 {
1069 if (dstlen < chlen)
1070 break; /* purecov: inspected */
1071 memcpy(dst, src, chlen);
1072 src+= chlen;
1073 dst+= chlen;
1074 dstlen-= chlen;
1075 }
1076 else if (*src & 0x80)
1077 {
1078 if (dstlen < 4)
1079 break; /* purecov: inspected */
1080 *dst++= '\\';
1081 *dst++= 'x';
1082 APPEND_HEX(dst, (uchar) *src);
1083 src++;
1084 dstlen-= 4;
1085 }
1086 else
1087 {
1088 if (dstlen < 1)
1089 break; /* purecov: inspected */
1090 *dst++= *src++;
1091 dstlen--;
1092 }
1093 }
1094 return dst - dst0;
1095 }
1096
1097
1098 /*
1099 Copy a string,
1100 with optional character set conversion,
1101 with optional left padding (for binary -> UCS2 conversion)
1102
1103 Bad input bytes are replaced to '?'.
1104
1105 The string that is written to "to" is always well-formed.
1106
1107 @param to The destination string
1108 @param to_length Space available in "to"
1109 @param to_cs Character set of the "to" string
1110 @param from The source string
1111 @param from_length Length of the "from" string
1112 @param from_cs Character set of the "from" string
1113 @param nchars Copy not more than "nchars" characters
1114
1115 The members as set as follows:
1116 m_well_formed_error_pos To the position when "from" is not well formed
1117 or NULL otherwise.
1118 m_cannot_convert_error_pos To the position where a not convertable
1119 character met, or NULL otherwise.
1120 m_source_end_pos To the position where scanning of the "from"
1121 string stopped.
1122
1123 @returns number of bytes that were written to 'to'
1124 */
1125 uint
well_formed_copy(CHARSET_INFO * to_cs,char * to,size_t to_length,CHARSET_INFO * from_cs,const char * from,size_t from_length,size_t nchars)1126 String_copier::well_formed_copy(CHARSET_INFO *to_cs,
1127 char *to, size_t to_length,
1128 CHARSET_INFO *from_cs,
1129 const char *from, size_t from_length, size_t nchars)
1130 {
1131 if ((to_cs == &my_charset_bin) ||
1132 (from_cs == &my_charset_bin) ||
1133 (to_cs == from_cs) ||
1134 my_charset_same(from_cs, to_cs))
1135 {
1136 m_cannot_convert_error_pos= NULL;
1137 return (uint) to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
1138 nchars, this);
1139 }
1140 return (uint) my_convert_fix(to_cs, to, to_length, from_cs, from, from_length,
1141 nchars, this, this);
1142 }
1143
1144
1145
1146 /*
1147 Append characters to a single-quoted string '...', escaping special
1148 characters with backslashes as necessary.
1149 Does not add the enclosing quotes, this is left up to caller.
1150 */
1151 #define APPEND(X) if (append(X)) return 1; else break
append_for_single_quote(const char * st,size_t len)1152 bool String::append_for_single_quote(const char *st, size_t len)
1153 {
1154 const char *end= st+len;
1155 for (; st < end; st++)
1156 {
1157 uchar c= *st;
1158 switch (c)
1159 {
1160 case '\\': APPEND(STRING_WITH_LEN("\\\\"));
1161 case '\0': APPEND(STRING_WITH_LEN("\\0"));
1162 case '\'': APPEND(STRING_WITH_LEN("\\'"));
1163 case '\n': APPEND(STRING_WITH_LEN("\\n"));
1164 case '\r': APPEND(STRING_WITH_LEN("\\r"));
1165 case '\032': APPEND(STRING_WITH_LEN("\\Z"));
1166 default: APPEND(c);
1167 }
1168 }
1169 return 0;
1170 }
1171
print(String * str) const1172 void String::print(String *str) const
1173 {
1174 str->append_for_single_quote(Ptr, str_length);
1175 }
1176
1177
print_with_conversion(String * print,CHARSET_INFO * cs) const1178 void String::print_with_conversion(String *print, CHARSET_INFO *cs) const
1179 {
1180 StringBuffer<256> tmp(cs);
1181 uint errors= 0;
1182 tmp.copy(this, cs, &errors);
1183 tmp.print(print);
1184 }
1185
1186
1187 /*
1188 Exchange state of this object and argument.
1189
1190 SYNOPSIS
1191 String::swap()
1192
1193 RETURN
1194 Target string will contain state of this object and vice versa.
1195 */
1196
swap(String & s)1197 void String::swap(String &s)
1198 {
1199 swap_variables(char *, Ptr, s.Ptr);
1200 swap_variables(uint32, str_length, s.str_length);
1201 swap_variables(uint32, Alloced_length, s.Alloced_length);
1202 swap_variables(bool, alloced, s.alloced);
1203 swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
1204 }
1205
1206
1207 /**
1208 Convert string to printable ASCII string
1209
1210 @details This function converts input string "from" replacing non-ASCII bytes
1211 with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1212 the resulting string.
1213 This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1214 e.g. when a string cannot be converted to a result charset.
1215
1216
1217 @param to output buffer
1218 @param to_len size of the output buffer (8 bytes or greater)
1219 @param from input string
1220 @param from_len size of the input string
1221 @param from_cs input charset
1222 @param nbytes maximal number of bytes to convert (from_len if 0)
1223
1224 @return number of bytes in the output string
1225 */
1226
convert_to_printable(char * to,size_t to_len,const char * from,size_t from_len,CHARSET_INFO * from_cs,size_t nbytes)1227 uint convert_to_printable(char *to, size_t to_len,
1228 const char *from, size_t from_len,
1229 CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1230 {
1231 /* needs at least 8 bytes for '\xXX...' and zero byte */
1232 DBUG_ASSERT(to_len >= 8);
1233
1234 char *t= to;
1235 char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1236 const char *f= from;
1237 const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len);
1238 char *dots= to; // last safe place to append '...'
1239
1240 if (!f || t == t_end)
1241 return 0;
1242
1243 for (; t < t_end && f < f_end; f++)
1244 {
1245 /*
1246 If the source string is ASCII compatible (mbminlen==1)
1247 and the source character is in ASCII printable range (0x20..0x7F),
1248 then display the character as is.
1249
1250 Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1251 or the source character is not in the printable range,
1252 then print the character using HEX notation.
1253 */
1254 if (((unsigned char) *f) >= 0x20 &&
1255 ((unsigned char) *f) <= 0x7F &&
1256 from_cs->mbminlen == 1)
1257 {
1258 *t++= *f;
1259 }
1260 else
1261 {
1262 if (t_end - t < 4) // \xXX
1263 break;
1264 *t++= '\\';
1265 *t++= 'x';
1266 APPEND_HEX(t, *f);
1267 }
1268 if (t_end - t >= 3) // '...'
1269 dots= t;
1270 }
1271 if (f < from + from_len)
1272 memcpy(dots, STRING_WITH_LEN("...\0"));
1273 else
1274 *t= '\0';
1275 return (uint) (t - to);
1276 }
1277