1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights
2 * reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24 /* This file is originally from the mysql distribution. Coded by monty */
25
26 #include <my_global.h>
27 #include <my_sys.h>
28 #include <m_string.h>
29 #include <m_ctype.h>
30 #include <mysql_com.h>
31
32 #include "sql_string.h"
33
34 #include <algorithm>
35
36 using std::min;
37 using std::max;
38
39 /*****************************************************************************
40 ** String functions
41 *****************************************************************************/
42
real_alloc(uint32 length)43 bool String::real_alloc(uint32 length)
44 {
45 uint32 arg_length= ALIGN_SIZE(length + 1);
46 DBUG_ASSERT(arg_length > length);
47 if (arg_length <= length)
48 return TRUE; /* Overflow */
49 str_length=0;
50 if (Alloced_length < arg_length)
51 {
52 free();
53 if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME))))
54 return TRUE;
55 Alloced_length=arg_length;
56 alloced=1;
57 }
58 Ptr[0]=0;
59 return FALSE;
60 }
61
62
63 /**
64 Allocates a new buffer on the heap for this String.
65
66 - If the String's internal buffer is privately owned and heap allocated,
67 one of the following is performed.
68
69 - If the requested length is greater than what fits in the buffer, a new
70 buffer is allocated, data moved and the old buffer freed.
71
72 - If the requested length is less or equal to what fits in the buffer, a
73 null character is inserted at the appropriate position.
74
75 - If the String does not keep a private buffer on the heap:
76
77 - If the requested length is greater than what fits in the buffer, or
78 force_on_heap is true, a new buffer is allocated, data is copied.
79 - If the requested length is less or equal to what fits in the buffer,
80 and force_on_heap is false, a null character is inserted at the
81 appropriate position.
82
83 For C compatibility, the new string buffer is null terminated.
84
85 @param alloc_length The requested string size in characters, excluding any
86 null terminator.
87 @param force_on_heap If the caller wants String's 'str' buffer to be on the
88 heap in all cases.
89
90 @retval false Either the copy operation is complete or, if the size of the
91 new buffer is smaller than the currently allocated buffer (if one exists),
92 no allocation occured.
93
94 @retval true An error occured when attempting to allocate memory.
95 */
realloc(uint32 alloc_length,bool force_on_heap)96 bool String::realloc(uint32 alloc_length, bool force_on_heap)
97 {
98 uint32 len=ALIGN_SIZE(alloc_length+1);
99 DBUG_ASSERT(len > alloc_length);
100 if (len <= alloc_length)
101 return TRUE; /* Overflow */
102
103 if (force_on_heap && !alloced)
104 {
105 /* Bytes will be allocated on the heap.*/
106 Alloced_length= 0;
107 }
108
109 if (Alloced_length < len)
110 {
111 char *new_ptr;
112 if (alloced)
113 {
114 if (!(new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
115 return TRUE; // Signal error
116 }
117 else if ((new_ptr= (char*) my_malloc(len,MYF(MY_WME))))
118 {
119 if (str_length > len - 1)
120 str_length= 0;
121 if (str_length) // Avoid bugs in memcpy on AIX
122 memcpy(new_ptr,Ptr,str_length);
123 new_ptr[str_length]=0;
124 alloced=1;
125 }
126 else
127 return TRUE; // Signal error
128 Ptr= new_ptr;
129 Alloced_length= len;
130 }
131 Ptr[alloc_length]=0; // This make other funcs shorter
132 return FALSE;
133 }
134
set_int(longlong num,bool unsigned_flag,const CHARSET_INFO * cs)135 bool String::set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs)
136 {
137 uint l=20*cs->mbmaxlen+1;
138 int base= unsigned_flag ? 10 : -10;
139
140 if (alloc(l))
141 return TRUE;
142 str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
143 str_charset=cs;
144 return FALSE;
145 }
146
set_real(double num,uint decimals,const CHARSET_INFO * cs)147 bool String::set_real(double num,uint decimals, const CHARSET_INFO *cs)
148 {
149 char buff[FLOATING_POINT_BUFFER];
150 uint dummy_errors;
151 size_t len;
152
153 str_charset=cs;
154 if (decimals >= NOT_FIXED_DEC)
155 {
156 len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
157 return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
158 }
159 len= my_fcvt(num, decimals, buff, NULL);
160 return copy(buff, (uint32) len, &my_charset_latin1, cs,
161 &dummy_errors);
162 }
163
164
copy()165 bool String::copy()
166 {
167 if (!alloced)
168 {
169 Alloced_length=0; // Force realloc
170 return realloc(str_length);
171 }
172 return FALSE;
173 }
174
175 /**
176 Copies the internal buffer from str. If this String has a private heap
177 allocated buffer where new data does not fit, a new buffer is allocated
178 before copying and the old buffer freed. Character set information is also
179 copied.
180
181 @param str The string whose internal buffer is to be copied.
182
183 @retval false Success.
184 @retval true Memory allocation failed.
185 */
copy(const String & str)186 bool String::copy(const String &str)
187 {
188 if (alloc(str.str_length))
189 return TRUE;
190 str_length=str.str_length;
191 bmove(Ptr,str.Ptr,str_length); // May be overlapping
192 Ptr[str_length]=0;
193 str_charset=str.str_charset;
194 return FALSE;
195 }
196
copy(const char * str,uint32 arg_length,const CHARSET_INFO * cs)197 bool String::copy(const char *str,uint32 arg_length,
198 const CHARSET_INFO *cs)
199 {
200 if (alloc(arg_length))
201 return TRUE;
202 if ((str_length=arg_length))
203 memcpy(Ptr,str,arg_length);
204 Ptr[arg_length]=0;
205 str_charset=cs;
206 return FALSE;
207 }
208
209
210 /*
211 Checks that the source string can be just copied to the destination string
212 without conversion.
213
214 SYNPOSIS
215
216 needs_conversion()
217 arg_length Length of string to copy.
218 from_cs Character set to copy from
219 to_cs Character set to copy to
220 uint32 *offset Returns number of unaligned characters.
221
222 RETURN
223 0 No conversion needed
224 1 Either character set conversion or adding leading zeros
225 (e.g. for UCS-2) must be done
226
227 NOTE
228 to_cs may be NULL for "no conversion" if the system variable
229 character_set_results is NULL.
230 */
231
needs_conversion(uint32 arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,uint32 * offset)232 bool String::needs_conversion(uint32 arg_length,
233 const CHARSET_INFO *from_cs,
234 const CHARSET_INFO *to_cs,
235 uint32 *offset)
236 {
237 *offset= 0;
238 if (!to_cs ||
239 (to_cs == &my_charset_bin) ||
240 (to_cs == from_cs) ||
241 my_charset_same(from_cs, to_cs) ||
242 ((from_cs == &my_charset_bin) &&
243 (!(*offset=(arg_length % to_cs->mbminlen)))))
244 return FALSE;
245 return TRUE;
246 }
247
248
249 /*
250 Checks that the source string can just be copied to the destination string
251 without conversion.
252 Unlike needs_conversion it will require conversion on incoming binary data
253 to ensure the data are verified for vailidity first.
254
255 @param arg_length Length of string to copy.
256 @param from_cs Character set to copy from
257 @param to_cs Character set to copy to
258
259 @return conversion needed
260 */
needs_conversion_on_storage(uint32 arg_length,const CHARSET_INFO * cs_from,const CHARSET_INFO * cs_to)261 bool String::needs_conversion_on_storage(uint32 arg_length,
262 const CHARSET_INFO *cs_from,
263 const CHARSET_INFO *cs_to)
264 {
265 uint32 offset;
266 return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
267 /* force conversion when storing a binary string */
268 (cs_from == &my_charset_bin &&
269 /* into a non-binary destination */
270 cs_to != &my_charset_bin &&
271 /* and any of the following is true :*/
272 (
273 /* it's a variable length encoding */
274 cs_to->mbminlen != cs_to->mbmaxlen ||
275 /* longer than 2 bytes : neither 1 byte nor ucs2 */
276 cs_to->mbminlen > 2 ||
277 /* and is not a multiple of the char byte size */
278 0 != (arg_length % cs_to->mbmaxlen)
279 )
280 )
281 );
282 }
283
284
285 /*
286 Copy a multi-byte character sets with adding leading zeros.
287
288 SYNOPSIS
289
290 copy_aligned()
291 str String to copy
292 arg_length Length of string. This should NOT be dividable with
293 cs->mbminlen.
294 offset arg_length % cs->mb_minlength
295 cs Character set for 'str'
296
297 NOTES
298 For real multi-byte, ascii incompatible charactser sets,
299 like UCS-2, add leading zeros if we have an incomplete character.
300 Thus,
301 SELECT _ucs2 0xAA
302 will automatically be converted into
303 SELECT _ucs2 0x00AA
304
305 RETURN
306 0 ok
307 1 error
308 */
309
copy_aligned(const char * str,uint32 arg_length,uint32 offset,const CHARSET_INFO * cs)310 bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
311 const CHARSET_INFO *cs)
312 {
313 /* How many bytes are in incomplete character */
314 offset= cs->mbminlen - offset; /* How many zeros we should prepend */
315 DBUG_ASSERT(offset && offset != cs->mbminlen);
316
317 uint32 aligned_length= arg_length + offset;
318 if (alloc(aligned_length))
319 return TRUE;
320
321 /*
322 Note, this is only safe for big-endian UCS-2.
323 If we add little-endian UCS-2 sometimes, this code
324 will be more complicated. But it's OK for now.
325 */
326 memset(Ptr, 0, offset);
327 memcpy(Ptr + offset, str, arg_length);
328 Ptr[aligned_length]=0;
329 /* str_length is always >= 0 as arg_length is != 0 */
330 str_length= aligned_length;
331 str_charset= cs;
332 return FALSE;
333 }
334
335
set_or_copy_aligned(const char * str,uint32 arg_length,const CHARSET_INFO * cs)336 bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
337 const CHARSET_INFO *cs)
338 {
339 /* How many bytes are in incomplete character */
340 uint32 offset= (arg_length % cs->mbminlen);
341
342 if (!offset) /* All characters are complete, just copy */
343 {
344 set(str, arg_length, cs);
345 return FALSE;
346 }
347 return copy_aligned(str, arg_length, offset, cs);
348 }
349
350
351 /**
352 Copies the character data into this String, with optional character set
353 conversion.
354
355 @return
356 FALSE ok
357 TRUE Could not allocate result buffer
358
359 */
360
copy(const char * str,uint32 arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,uint * errors)361 bool String::copy(const char *str, uint32 arg_length,
362 const CHARSET_INFO *from_cs, const CHARSET_INFO *to_cs, uint *errors)
363 {
364 uint32 offset;
365
366 DBUG_ASSERT(!str || str != Ptr);
367
368 if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
369 {
370 *errors= 0;
371 return copy(str, arg_length, to_cs);
372 }
373 if ((from_cs == &my_charset_bin) && offset)
374 {
375 *errors= 0;
376 return copy_aligned(str, arg_length, offset, to_cs);
377 }
378 uint32 new_length= to_cs->mbmaxlen*arg_length;
379 if (alloc(new_length))
380 return TRUE;
381 str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
382 str, arg_length, from_cs, errors);
383 str_charset=to_cs;
384 return FALSE;
385 }
386
387
388 /*
389 Set a string to the value of a latin1-string, keeping the original charset
390
391 SYNOPSIS
392 copy_or_set()
393 str String of a simple charset (latin1)
394 arg_length Length of string
395
396 IMPLEMENTATION
397 If string object is of a simple character set, set it to point to the
398 given string.
399 If not, make a copy and convert it to the new character set.
400
401 RETURN
402 0 ok
403 1 Could not allocate result buffer
404
405 */
406
set_ascii(const char * str,uint32 arg_length)407 bool String::set_ascii(const char *str, uint32 arg_length)
408 {
409 if (str_charset->mbminlen == 1)
410 {
411 set(str, arg_length, str_charset);
412 return 0;
413 }
414 uint dummy_errors;
415 return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
416 }
417
418
419 /* This is used by mysql.cc */
420
fill(uint32 max_length,char fill_char)421 bool String::fill(uint32 max_length,char fill_char)
422 {
423 if (str_length > max_length)
424 Ptr[str_length=max_length]=0;
425 else
426 {
427 if (realloc(max_length))
428 return TRUE;
429 memset(Ptr+str_length, fill_char, max_length-str_length);
430 str_length=max_length;
431 }
432 return FALSE;
433 }
434
strip_sp()435 void String::strip_sp()
436 {
437 while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
438 str_length--;
439 }
440
append(const String & s)441 bool String::append(const String &s)
442 {
443 if (s.length())
444 {
445 DBUG_ASSERT(!this->uses_buffer_owned_by(&s));
446 DBUG_ASSERT(!s.uses_buffer_owned_by(this));
447
448 if (realloc(str_length+s.length()))
449 return TRUE;
450 memcpy(Ptr+str_length,s.ptr(),s.length());
451 str_length+=s.length();
452 }
453 return FALSE;
454 }
455
456
457 /*
458 Append an ASCII string to the a string of the current character set
459 */
460
append(const char * s,uint32 arg_length)461 bool String::append(const char *s,uint32 arg_length)
462 {
463 if (!arg_length)
464 return FALSE;
465
466 /*
467 For an ASCII incompatible string, e.g. UCS-2, we need to convert
468 */
469 if (str_charset->mbminlen > 1)
470 {
471 uint32 add_length=arg_length * str_charset->mbmaxlen;
472 uint dummy_errors;
473 if (realloc(str_length+ add_length))
474 return TRUE;
475 str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
476 s, arg_length, &my_charset_latin1,
477 &dummy_errors);
478 return FALSE;
479 }
480
481 /*
482 For an ASCII compatinble string we can just append.
483 */
484 if (realloc(str_length+arg_length))
485 return TRUE;
486 memcpy(Ptr+str_length,s,arg_length);
487 str_length+=arg_length;
488 return FALSE;
489 }
490
491
492 /*
493 Append a 0-terminated ASCII string
494 */
495
append(const char * s)496 bool String::append(const char *s)
497 {
498 return append(s, (uint) strlen(s));
499 }
500
501
502
append_ulonglong(ulonglong val)503 bool String::append_ulonglong(ulonglong val)
504 {
505 if (realloc(str_length+MAX_BIGINT_WIDTH+2))
506 return TRUE;
507 char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
508 str_length= end - Ptr;
509 return FALSE;
510 }
511
512 /*
513 Append a string in the given charset to the string
514 with character set recoding
515 */
516
append(const char * s,uint32 arg_length,const CHARSET_INFO * cs)517 bool String::append(const char *s,uint32 arg_length, const CHARSET_INFO *cs)
518 {
519 uint32 offset;
520
521 if (needs_conversion(arg_length, cs, str_charset, &offset))
522 {
523 uint32 add_length;
524 if ((cs == &my_charset_bin) && offset)
525 {
526 DBUG_ASSERT(str_charset->mbminlen > offset);
527 offset= str_charset->mbminlen - offset; // How many characters to pad
528 add_length= arg_length + offset;
529 if (realloc(str_length + add_length))
530 return TRUE;
531 memset(Ptr + str_length, 0, offset);
532 memcpy(Ptr + str_length + offset, s, arg_length);
533 str_length+= add_length;
534 return FALSE;
535 }
536
537 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
538 uint dummy_errors;
539 if (realloc(str_length + add_length))
540 return TRUE;
541 str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
542 s, arg_length, cs, &dummy_errors);
543 }
544 else
545 {
546 if (realloc(str_length + arg_length))
547 return TRUE;
548 memcpy(Ptr + str_length, s, arg_length);
549 str_length+= arg_length;
550 }
551 return FALSE;
552 }
553
append(IO_CACHE * file,uint32 arg_length)554 bool String::append(IO_CACHE* file, uint32 arg_length)
555 {
556 if (realloc(str_length+arg_length))
557 return TRUE;
558 if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
559 {
560 shrink(str_length);
561 return TRUE;
562 }
563 str_length+=arg_length;
564 return FALSE;
565 }
566
567
568 /**
569 Append a parenthesized number to String.
570 Used in various pieces of SHOW related code.
571
572 @param nr Number
573 @param radix Radix, optional parameter, 10 by default.
574 */
append_parenthesized(long nr,int radix)575 bool String::append_parenthesized(long nr, int radix)
576 {
577 char buff[64], *end;
578 buff[0]= '(';
579 end= int10_to_str(nr, buff + 1, radix);
580 *end++ = ')';
581 return append(buff, (uint) (end - buff));
582 }
583
584
append_with_prefill(const char * s,uint32 arg_length,uint32 full_length,char fill_char)585 bool String::append_with_prefill(const char *s,uint32 arg_length,
586 uint32 full_length, char fill_char)
587 {
588 int t_length= arg_length > full_length ? arg_length : full_length;
589
590 if (realloc(str_length + t_length))
591 return TRUE;
592 t_length= full_length - arg_length;
593 if (t_length > 0)
594 {
595 memset(Ptr+str_length, fill_char, t_length);
596 str_length=str_length + t_length;
597 }
598 append(s, arg_length);
599 return FALSE;
600 }
601
numchars() const602 uint32 String::numchars() const
603 {
604 return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
605 }
606
charpos(int i,uint32 offset)607 int String::charpos(int i,uint32 offset)
608 {
609 if (i <= 0)
610 return i;
611 return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
612 }
613
strstr(const String & s,uint32 offset)614 int String::strstr(const String &s,uint32 offset)
615 {
616 if (s.length()+offset <= str_length)
617 {
618 if (!s.length())
619 return ((int) offset); // Empty string is always found
620
621 const char *str = Ptr+offset;
622 const char *search=s.ptr();
623 const char *end=Ptr+str_length-s.length()+1;
624 const char *search_end=s.ptr()+s.length();
625 skip:
626 while (str != end)
627 {
628 if (*str++ == *search)
629 {
630 char *i,*j;
631 i=(char*) str; j=(char*) search+1;
632 while (j != search_end)
633 if (*i++ != *j++) goto skip;
634 return (int) (str-Ptr) -1;
635 }
636 }
637 }
638 return -1;
639 }
640
641 /*
642 ** Search string from end. Offset is offset to the end of string
643 */
644
strrstr(const String & s,uint32 offset)645 int String::strrstr(const String &s,uint32 offset)
646 {
647 if (s.length() <= offset && offset <= str_length)
648 {
649 if (!s.length())
650 return offset; // Empty string is always found
651 const char *str = Ptr+offset-1;
652 const char *search=s.ptr()+s.length()-1;
653
654 const char *end=Ptr+s.length()-2;
655 const char *search_end=s.ptr()-1;
656 skip:
657 while (str != end)
658 {
659 if (*str-- == *search)
660 {
661 char *i,*j;
662 i=(char*) str; j=(char*) search-1;
663 while (j != search_end)
664 if (*i-- != *j--) goto skip;
665 return (int) (i-Ptr) +1;
666 }
667 }
668 }
669 return -1;
670 }
671
672 /*
673 Replace substring with string
674 If wrong parameter or not enough memory, do nothing
675 */
676
replace(uint32 offset,uint32 arg_length,const String & to)677 bool String::replace(uint32 offset,uint32 arg_length,const String &to)
678 {
679 return replace(offset,arg_length,to.ptr(),to.length());
680 }
681
replace(uint32 offset,uint32 arg_length,const char * to,uint32 to_length)682 bool String::replace(uint32 offset,uint32 arg_length,
683 const char *to, uint32 to_length)
684 {
685 long diff = (long) to_length-(long) arg_length;
686 if (offset+arg_length <= str_length)
687 {
688 if (diff < 0)
689 {
690 if (to_length)
691 memcpy(Ptr+offset,to,to_length);
692 bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
693 str_length-offset-arg_length);
694 }
695 else
696 {
697 if (diff)
698 {
699 if (realloc(str_length+(uint32) diff))
700 return TRUE;
701 bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
702 str_length-offset-arg_length);
703 }
704 if (to_length)
705 memcpy(Ptr+offset,to,to_length);
706 }
707 str_length+=(uint32) diff;
708 }
709 return FALSE;
710 }
711
712
713 // added by Holyfoot for "geometry" needs
reserve(uint32 space_needed,uint32 grow_by)714 int String::reserve(uint32 space_needed, uint32 grow_by)
715 {
716 if (Alloced_length < str_length + space_needed)
717 {
718 if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
719 return TRUE;
720 }
721 return FALSE;
722 }
723
qs_append(const char * str,uint32 len)724 void String::qs_append(const char *str, uint32 len)
725 {
726 memcpy(Ptr + str_length, str, len + 1);
727 str_length += len;
728 }
729
qs_append(double d)730 void String::qs_append(double d)
731 {
732 char *buff = Ptr + str_length;
733 str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
734 NULL);
735 }
736
qs_append(double * d)737 void String::qs_append(double *d)
738 {
739 double ld;
740 float8get(ld, (char*) d);
741 qs_append(ld);
742 }
743
qs_append(int i)744 void String::qs_append(int i)
745 {
746 char *buff= Ptr + str_length;
747 char *end= int10_to_str(i, buff, -10);
748 str_length+= (int) (end-buff);
749 }
750
qs_append(uint i)751 void String::qs_append(uint i)
752 {
753 char *buff= Ptr + str_length;
754 char *end= int10_to_str(i, buff, 10);
755 str_length+= (int) (end-buff);
756 }
757
758 /*
759 Compare strings according to collation, without end space.
760
761 SYNOPSIS
762 sortcmp()
763 s First string
764 t Second string
765 cs Collation
766
767 NOTE:
768 Normally this is case sensitive comparison
769
770 RETURN
771 < 0 s < t
772 0 s == t
773 > 0 s > t
774 */
775
776
sortcmp(const String * s,const String * t,const CHARSET_INFO * cs)777 int sortcmp(const String *s,const String *t, const CHARSET_INFO *cs)
778 {
779 return cs->coll->strnncollsp(cs,
780 (uchar *) s->ptr(),s->length(),
781 (uchar *) t->ptr(),t->length(), 0);
782 }
783
784
785 /*
786 Compare strings byte by byte. End spaces are also compared.
787
788 SYNOPSIS
789 stringcmp()
790 s First string
791 t Second string
792
793 NOTE:
794 Strings are compared as a stream of uchars
795
796 RETURN
797 < 0 s < t
798 0 s == t
799 > 0 s > t
800 */
801
802
stringcmp(const String * s,const String * t)803 int stringcmp(const String *s,const String *t)
804 {
805 uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
806 int cmp= memcmp(s->ptr(), t->ptr(), len);
807 return (cmp) ? cmp : (int) (s_len - t_len);
808 }
809
810 /**
811 Makes a copy of a String's buffer unless it's already heap-allocated.
812
813 If the buffer ('str') of 'from' is on the heap, this function returns
814 'from', possibly re-allocated to be at least from_length bytes long.
815 It is also the case if from==to or to==NULL.
816 Otherwise, this function makes and returns a copy of "from" into "to"; the
817 buffer of "to" is heap-allocated; a pre-condition is that from->str and
818 to->str must point to non-overlapping buffers.
819 The logic behind this complex design, is that a caller, typically a
820 val_str() function, sometimes has an input String ('from') which buffer it
821 wants to modify; but this String's buffer may or not be heap-allocated; if
822 it's not heap-allocated it is possibly in static storage or belongs to an
823 outer context, and thus should not be modified; in that case the caller
824 wants a heap-allocated copy which it can freely modify.
825
826 @param to destination string
827 @param from source string
828 @param from_length destination string will hold at least from_length bytes.
829 */
copy_if_not_alloced(String * to,String * from,uint32 from_length)830 String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
831 {
832 if (from->alloced && from->Alloced_length >= from_length)
833 return from;
834 if ((from->alloced && (from->Alloced_length != 0)) || !to || from == to)
835 {
836 (void) from->realloc(from_length, true);
837 return from;
838 }
839 if (to->realloc(from_length, true))
840 return from; // Actually an error
841
842 // from and to should not be overlapping
843 DBUG_ASSERT(!to->uses_buffer_owned_by(from));
844 DBUG_ASSERT(!from->uses_buffer_owned_by(to));
845
846 if ((to->str_length=min(from->str_length,from_length)))
847 memcpy(to->Ptr,from->Ptr,to->str_length);
848 to->str_charset=from->str_charset;
849 return to;
850 }
851
852
853 /****************************************************************************
854 Help functions
855 ****************************************************************************/
856
857 /**
858 Copy string with HEX-encoding of "bad" characters.
859
860 @details This functions copies the string pointed by "src"
861 to the string pointed by "dst". Not more than "srclen" bytes
862 are read from "src". Any sequences of bytes representing
863 a not-well-formed substring (according to cs) are hex-encoded,
864 and all well-formed substrings (according to cs) are copied as is.
865 Not more than "dstlen" bytes are written to "dst". The number
866 of bytes written to "dst" is returned.
867
868 @param cs character set pointer of the destination string
869 @param[out] dst destination string
870 @param dstlen size of dst
871 @param src source string
872 @param srclen length of src
873
874 @retval result length
875 */
876
877 size_t
my_copy_with_hex_escaping(const CHARSET_INFO * cs,char * dst,size_t dstlen,const char * src,size_t srclen)878 my_copy_with_hex_escaping(const CHARSET_INFO *cs,
879 char *dst, size_t dstlen,
880 const char *src, size_t srclen)
881 {
882 const char *srcend= src + srclen;
883 char *dst0= dst;
884
885 for ( ; src < srcend ; )
886 {
887 size_t chlen;
888 if ((chlen= my_ismbchar(cs, src, srcend)))
889 {
890 if (dstlen < chlen)
891 break; /* purecov: inspected */
892 memcpy(dst, src, chlen);
893 src+= chlen;
894 dst+= chlen;
895 dstlen-= chlen;
896 }
897 else if (*src & 0x80)
898 {
899 if (dstlen < 4)
900 break; /* purecov: inspected */
901 *dst++= '\\';
902 *dst++= 'x';
903 *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
904 *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
905 src++;
906 dstlen-= 4;
907 }
908 else
909 {
910 if (dstlen < 1)
911 break; /* purecov: inspected */
912 *dst++= *src++;
913 dstlen--;
914 }
915 }
916 return dst - dst0;
917 }
918
919 /*
920 copy a string,
921 with optional character set conversion,
922 with optional left padding (for binary -> UCS2 conversion)
923
924 SYNOPSIS
925 well_formed_copy_nchars()
926 to Store result here
927 to_length Maxinum length of "to" string
928 to_cs Character set of "to" string
929 from Copy from here
930 from_length Length of from string
931 from_cs From character set
932 nchars Copy not more that nchars characters
933 well_formed_error_pos Return position when "from" is not well formed
934 or NULL otherwise.
935 cannot_convert_error_pos Return position where a not convertable
936 character met, or NULL otherwise.
937 from_end_pos Return position where scanning of "from"
938 string stopped.
939 NOTES
940
941 RETURN
942 length of bytes copied to 'to'
943 */
944
945
946 uint32
well_formed_copy_nchars(const CHARSET_INFO * to_cs,char * to,uint to_length,const CHARSET_INFO * from_cs,const char * from,uint from_length,uint nchars,const char ** well_formed_error_pos,const char ** cannot_convert_error_pos,const char ** from_end_pos)947 well_formed_copy_nchars(const CHARSET_INFO *to_cs,
948 char *to, uint to_length,
949 const CHARSET_INFO *from_cs,
950 const char *from, uint from_length,
951 uint nchars,
952 const char **well_formed_error_pos,
953 const char **cannot_convert_error_pos,
954 const char **from_end_pos)
955 {
956 uint res;
957
958 if ((to_cs == &my_charset_bin) ||
959 (from_cs == &my_charset_bin) ||
960 (to_cs == from_cs) ||
961 my_charset_same(from_cs, to_cs))
962 {
963 if (to_length < to_cs->mbminlen || !nchars)
964 {
965 *from_end_pos= from;
966 *cannot_convert_error_pos= NULL;
967 *well_formed_error_pos= NULL;
968 return 0;
969 }
970
971 if (to_cs == &my_charset_bin)
972 {
973 res= min(min(nchars, to_length), from_length);
974 memmove(to, from, res);
975 *from_end_pos= from + res;
976 *well_formed_error_pos= NULL;
977 *cannot_convert_error_pos= NULL;
978 }
979 else
980 {
981 int well_formed_error;
982 uint from_offset;
983
984 if ((from_offset= (from_length % to_cs->mbminlen)) &&
985 (from_cs == &my_charset_bin))
986 {
987 /*
988 Copying from BINARY to UCS2 needs to prepend zeros sometimes:
989 INSERT INTO t1 (ucs2_column) VALUES (0x01);
990 0x01 -> 0x0001
991 */
992 uint pad_length= to_cs->mbminlen - from_offset;
993 memset(to, 0, pad_length);
994 memmove(to + pad_length, from, from_offset);
995 /*
996 In some cases left zero-padding can create an incorrect character.
997 For example:
998 INSERT INTO t1 (utf32_column) VALUES (0x110000);
999 We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
1000 The valid characters range is limited to 0x00000000..0x0010FFFF.
1001
1002 Make sure we didn't pad to an incorrect character.
1003 */
1004 if (to_cs->cset->well_formed_len(to_cs,
1005 to, to + to_cs->mbminlen, 1,
1006 &well_formed_error) !=
1007 to_cs->mbminlen)
1008 {
1009 *from_end_pos= *well_formed_error_pos= from;
1010 *cannot_convert_error_pos= NULL;
1011 return 0;
1012 }
1013 nchars--;
1014 from+= from_offset;
1015 from_length-= from_offset;
1016 to+= to_cs->mbminlen;
1017 to_length-= to_cs->mbminlen;
1018 }
1019
1020 set_if_smaller(from_length, to_length);
1021 res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
1022 nchars, &well_formed_error);
1023 memmove(to, from, res);
1024 *from_end_pos= from + res;
1025 *well_formed_error_pos= well_formed_error ? from + res : NULL;
1026 *cannot_convert_error_pos= NULL;
1027 if (from_offset)
1028 res+= to_cs->mbminlen;
1029 }
1030 }
1031 else
1032 {
1033 int cnvres;
1034 my_wc_t wc;
1035 my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
1036 my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
1037 const uchar *from_end= (const uchar*) from + from_length;
1038 uchar *to_end= (uchar*) to + to_length;
1039 char *to_start= to;
1040 *well_formed_error_pos= NULL;
1041 *cannot_convert_error_pos= NULL;
1042
1043 for ( ; nchars; nchars--)
1044 {
1045 const char *from_prev= from;
1046 if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
1047 from+= cnvres;
1048 else if (cnvres == MY_CS_ILSEQ)
1049 {
1050 if (!*well_formed_error_pos)
1051 *well_formed_error_pos= from;
1052 from++;
1053 wc= '?';
1054 }
1055 else if (cnvres > MY_CS_TOOSMALL)
1056 {
1057 /*
1058 A correct multibyte sequence detected
1059 But it doesn't have Unicode mapping.
1060 */
1061 if (!*cannot_convert_error_pos)
1062 *cannot_convert_error_pos= from;
1063 from+= (-cnvres);
1064 wc= '?';
1065 }
1066 else
1067 break; // Not enough characters
1068
1069 outp:
1070 if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1071 to+= cnvres;
1072 else if (cnvres == MY_CS_ILUNI && wc != '?')
1073 {
1074 if (!*cannot_convert_error_pos)
1075 *cannot_convert_error_pos= from_prev;
1076 wc= '?';
1077 goto outp;
1078 }
1079 else
1080 {
1081 from= from_prev;
1082 break;
1083 }
1084 }
1085 *from_end_pos= from;
1086 res= (uint) (to - to_start);
1087 }
1088 return (uint32) res;
1089 }
1090
1091
1092
1093
print(String * str)1094 void String::print(String *str)
1095 {
1096 char *st= (char*)Ptr, *end= st+str_length;
1097 for (; st < end; st++)
1098 {
1099 uchar c= *st;
1100 switch (c)
1101 {
1102 case '\\':
1103 str->append(STRING_WITH_LEN("\\\\"));
1104 break;
1105 case '\0':
1106 str->append(STRING_WITH_LEN("\\0"));
1107 break;
1108 case '\'':
1109 str->append(STRING_WITH_LEN("\\'"));
1110 break;
1111 case '\n':
1112 str->append(STRING_WITH_LEN("\\n"));
1113 break;
1114 case '\r':
1115 str->append(STRING_WITH_LEN("\\r"));
1116 break;
1117 case '\032': // Ctrl-Z
1118 str->append(STRING_WITH_LEN("\\Z"));
1119 break;
1120 default:
1121 str->append(c);
1122 }
1123 }
1124 }
1125
1126
1127 /*
1128 Exchange state of this object and argument.
1129
1130 SYNOPSIS
1131 String::swap()
1132
1133 RETURN
1134 Target string will contain state of this object and vice versa.
1135 */
1136
swap(String & s)1137 void String::swap(String &s)
1138 {
1139 swap_variables(char *, Ptr, s.Ptr);
1140 swap_variables(uint32, str_length, s.str_length);
1141 swap_variables(uint32, Alloced_length, s.Alloced_length);
1142 swap_variables(bool, alloced, s.alloced);
1143 swap_variables(const CHARSET_INFO *, str_charset, s.str_charset);
1144 }
1145
1146
1147 /**
1148 Convert string to printable ASCII string
1149
1150 @details This function converts input string "from" replacing non-ASCII bytes
1151 with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1152 the resulting string.
1153 This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1154 e.g. when a string cannot be converted to a result charset.
1155
1156
1157 @param to output buffer
1158 @param to_len size of the output buffer (8 bytes or greater)
1159 @param from input string
1160 @param from_len size of the input string
1161 @param from_cs input charset
1162 @param nbytes maximal number of bytes to convert (from_len if 0)
1163
1164 @return number of bytes in the output string
1165 */
1166
convert_to_printable(char * to,size_t to_len,const char * from,size_t from_len,const CHARSET_INFO * from_cs,size_t nbytes)1167 uint convert_to_printable(char *to, size_t to_len,
1168 const char *from, size_t from_len,
1169 const CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1170 {
1171 /* needs at least 8 bytes for '\xXX...' and zero byte */
1172 DBUG_ASSERT(to_len >= 8);
1173
1174 char *t= to;
1175 char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1176 const char *f= from;
1177 const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
1178 char *dots= to; // last safe place to append '...'
1179
1180 if (!f || t == t_end)
1181 return 0;
1182
1183 for (; t < t_end && f < f_end; f++)
1184 {
1185 /*
1186 If the source string is ASCII compatible (mbminlen==1)
1187 and the source character is in ASCII printable range (0x20..0x7F),
1188 then display the character as is.
1189
1190 Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1191 or the source character is not in the printable range,
1192 then print the character using HEX notation.
1193 */
1194 if (((unsigned char) *f) >= 0x20 &&
1195 ((unsigned char) *f) <= 0x7F &&
1196 from_cs->mbminlen == 1)
1197 {
1198 *t++= *f;
1199 }
1200 else
1201 {
1202 if (t_end - t < 4) // \xXX
1203 break;
1204 *t++= '\\';
1205 *t++= 'x';
1206 *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
1207 *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
1208 }
1209 if (t_end - t >= 3) // '...'
1210 dots= t;
1211 }
1212 if (f < from + from_len)
1213 memcpy(dots, STRING_WITH_LEN("...\0"));
1214 else
1215 *t= '\0';
1216 return t - to;
1217 }
1218
1219 /**
1220 Check if an input byte sequence is a valid character string of a given charset
1221
1222 @param cs The input character set.
1223 @param str The input byte sequence to validate.
1224 @param length A byte length of the str.
1225 @param [out] valid_length A byte length of a valid prefix of the str.
1226 @param [out] length_error True in the case of a character length error:
1227 some byte[s] in the input is not a valid
1228 prefix for a character, i.e. the byte length
1229 of that invalid character is undefined.
1230
1231 @retval true if the whole input byte sequence is a valid character string.
1232 The length_error output parameter is undefined.
1233
1234 @return
1235 if the whole input byte sequence is a valid character string
1236 then
1237 return false
1238 else
1239 if the length of some character in the input is undefined (MY_CS_ILSEQ)
1240 or the last character is truncated (MY_CS_TOOSMALL)
1241 then
1242 *length_error= true; // fatal error!
1243 else
1244 *length_error= false; // non-fatal error: there is no wide character
1245 // encoding for some input character
1246 return true
1247 */
validate_string(const CHARSET_INFO * cs,const char * str,uint32 length,size_t * valid_length,bool * length_error)1248 bool validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
1249 size_t *valid_length, bool *length_error)
1250 {
1251 if (cs->mbmaxlen > 1)
1252 {
1253 int well_formed_error;
1254 *valid_length= cs->cset->well_formed_len(cs, str, str + length,
1255 length, &well_formed_error);
1256 *length_error= well_formed_error;
1257 return well_formed_error;
1258 }
1259
1260 /*
1261 well_formed_len() is not functional on single-byte character sets,
1262 so use mb_wc() instead:
1263 */
1264 *length_error= false;
1265
1266 const uchar *from= reinterpret_cast<const uchar *>(str);
1267 const uchar *from_end= from + length;
1268 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1269
1270 while (from < from_end)
1271 {
1272 my_wc_t wc;
1273 int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
1274 if (cnvres <= 0)
1275 {
1276 *valid_length= from - reinterpret_cast<const uchar *>(str);
1277 return true;
1278 }
1279 from+= cnvres;
1280 }
1281 *valid_length= length;
1282 return false;
1283 }
1284