1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights
2  * reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 /* This file is originally from the mysql distribution. Coded by monty */
25 
26 #include <my_global.h>
27 #include <my_sys.h>
28 #include <m_string.h>
29 #include <m_ctype.h>
30 #include <mysql_com.h>
31 
32 #include "sql_string.h"
33 
34 #include <algorithm>
35 
36 using std::min;
37 using std::max;
38 
39 /*****************************************************************************
40 ** String functions
41 *****************************************************************************/
42 
real_alloc(uint32 length)43 bool String::real_alloc(uint32 length)
44 {
45   uint32 arg_length= ALIGN_SIZE(length + 1);
46   DBUG_ASSERT(arg_length > length);
47   if (arg_length <= length)
48     return TRUE;                                 /* Overflow */
49   str_length=0;
50   if (Alloced_length < arg_length)
51   {
52     free();
53     if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME))))
54       return TRUE;
55     Alloced_length=arg_length;
56     alloced=1;
57   }
58   Ptr[0]=0;
59   return FALSE;
60 }
61 
62 
63 /**
64    Allocates a new buffer on the heap for this String.
65 
66    - If the String's internal buffer is privately owned and heap allocated,
67      one of the following is performed.
68 
69      - If the requested length is greater than what fits in the buffer, a new
70        buffer is allocated, data moved and the old buffer freed.
71 
72      - If the requested length is less or equal to what fits in the buffer, a
73        null character is inserted at the appropriate position.
74 
75    - If the String does not keep a private buffer on the heap:
76 
77       - If the requested length is greater than what fits in the buffer, or
78         force_on_heap is true, a new buffer is allocated, data is copied.
79       - If the requested length is less or equal to what fits in the buffer,
80         and force_on_heap is false, a null character is inserted at the
81         appropriate position.
82 
83    For C compatibility, the new string buffer is null terminated.
84 
85    @param alloc_length The requested string size in characters, excluding any
86    null terminator.
87    @param force_on_heap If the caller wants String's 'str' buffer to be on the
88    heap in all cases.
89 
90    @retval false Either the copy operation is complete or, if the size of the
91    new buffer is smaller than the currently allocated buffer (if one exists),
92    no allocation occured.
93 
94    @retval true An error occured when attempting to allocate memory.
95 */
realloc(uint32 alloc_length,bool force_on_heap)96 bool String::realloc(uint32 alloc_length, bool force_on_heap)
97 {
98   uint32 len=ALIGN_SIZE(alloc_length+1);
99   DBUG_ASSERT(len > alloc_length);
100   if (len <= alloc_length)
101     return TRUE;                                 /* Overflow */
102 
103   if (force_on_heap && !alloced)
104   {
105     /* Bytes will be allocated on the heap.*/
106     Alloced_length= 0;
107   }
108 
109   if (Alloced_length < len)
110   {
111     char *new_ptr;
112     if (alloced)
113     {
114       if (!(new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
115         return TRUE;				// Signal error
116     }
117     else if ((new_ptr= (char*) my_malloc(len,MYF(MY_WME))))
118     {
119       if (str_length > len - 1)
120         str_length= 0;
121       if (str_length)				// Avoid bugs in memcpy on AIX
122 	memcpy(new_ptr,Ptr,str_length);
123       new_ptr[str_length]=0;
124       alloced=1;
125     }
126     else
127       return TRUE;			// Signal error
128     Ptr= new_ptr;
129     Alloced_length= len;
130   }
131   Ptr[alloc_length]=0;			// This make other funcs shorter
132   return FALSE;
133 }
134 
set_int(longlong num,bool unsigned_flag,const CHARSET_INFO * cs)135 bool String::set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs)
136 {
137   uint l=20*cs->mbmaxlen+1;
138   int base= unsigned_flag ? 10 : -10;
139 
140   if (alloc(l))
141     return TRUE;
142   str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
143   str_charset=cs;
144   return FALSE;
145 }
146 
set_real(double num,uint decimals,const CHARSET_INFO * cs)147 bool String::set_real(double num,uint decimals, const CHARSET_INFO *cs)
148 {
149   char buff[FLOATING_POINT_BUFFER];
150   uint dummy_errors;
151   size_t len;
152 
153   str_charset=cs;
154   if (decimals >= NOT_FIXED_DEC)
155   {
156     len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
157     return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
158   }
159   len= my_fcvt(num, decimals, buff, NULL);
160   return copy(buff, (uint32) len, &my_charset_latin1, cs,
161               &dummy_errors);
162 }
163 
164 
copy()165 bool String::copy()
166 {
167   if (!alloced)
168   {
169     Alloced_length=0;				// Force realloc
170     return realloc(str_length);
171   }
172   return FALSE;
173 }
174 
175 /**
176    Copies the internal buffer from str. If this String has a private heap
177    allocated buffer where new data does not fit, a new buffer is allocated
178    before copying and the old buffer freed. Character set information is also
179    copied.
180 
181    @param str The string whose internal buffer is to be copied.
182 
183    @retval false Success.
184    @retval true Memory allocation failed.
185 */
copy(const String & str)186 bool String::copy(const String &str)
187 {
188   if (alloc(str.str_length))
189     return TRUE;
190   str_length=str.str_length;
191   bmove(Ptr,str.Ptr,str_length);		// May be overlapping
192   Ptr[str_length]=0;
193   str_charset=str.str_charset;
194   return FALSE;
195 }
196 
copy(const char * str,uint32 arg_length,const CHARSET_INFO * cs)197 bool String::copy(const char *str,uint32 arg_length,
198                   const CHARSET_INFO *cs)
199 {
200   if (alloc(arg_length))
201     return TRUE;
202   if ((str_length=arg_length))
203     memcpy(Ptr,str,arg_length);
204   Ptr[arg_length]=0;
205   str_charset=cs;
206   return FALSE;
207 }
208 
209 
210 /*
211   Checks that the source string can be just copied to the destination string
212   without conversion.
213 
214   SYNPOSIS
215 
216   needs_conversion()
217   arg_length		Length of string to copy.
218   from_cs		Character set to copy from
219   to_cs			Character set to copy to
220   uint32 *offset	Returns number of unaligned characters.
221 
222   RETURN
223    0  No conversion needed
224    1  Either character set conversion or adding leading  zeros
225       (e.g. for UCS-2) must be done
226 
227   NOTE
228   to_cs may be NULL for "no conversion" if the system variable
229   character_set_results is NULL.
230 */
231 
needs_conversion(uint32 arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,uint32 * offset)232 bool String::needs_conversion(uint32 arg_length,
233 			      const CHARSET_INFO *from_cs,
234 			      const CHARSET_INFO *to_cs,
235 			      uint32 *offset)
236 {
237   *offset= 0;
238   if (!to_cs ||
239       (to_cs == &my_charset_bin) ||
240       (to_cs == from_cs) ||
241       my_charset_same(from_cs, to_cs) ||
242       ((from_cs == &my_charset_bin) &&
243        (!(*offset=(arg_length % to_cs->mbminlen)))))
244     return FALSE;
245   return TRUE;
246 }
247 
248 
249 /*
250   Checks that the source string can just be copied to the destination string
251   without conversion.
252   Unlike needs_conversion it will require conversion on incoming binary data
253   to ensure the data are verified for vailidity first.
254 
255   @param arg_length   Length of string to copy.
256   @param from_cs      Character set to copy from
257   @param to_cs        Character set to copy to
258 
259   @return conversion needed
260 */
needs_conversion_on_storage(uint32 arg_length,const CHARSET_INFO * cs_from,const CHARSET_INFO * cs_to)261 bool String::needs_conversion_on_storage(uint32 arg_length,
262                                          const CHARSET_INFO *cs_from,
263                                          const CHARSET_INFO *cs_to)
264 {
265   uint32 offset;
266   return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
267           /* force conversion when storing a binary string */
268           (cs_from == &my_charset_bin &&
269           /* into a non-binary destination */
270            cs_to != &my_charset_bin &&
271            /* and any of the following is true :*/
272            (
273             /* it's a variable length encoding */
274             cs_to->mbminlen != cs_to->mbmaxlen ||
275             /* longer than 2 bytes : neither 1 byte nor ucs2 */
276             cs_to->mbminlen > 2 ||
277             /* and is not a multiple of the char byte size */
278             0 != (arg_length % cs_to->mbmaxlen)
279            )
280           )
281          );
282 }
283 
284 
285 /*
286   Copy a multi-byte character sets with adding leading zeros.
287 
288   SYNOPSIS
289 
290   copy_aligned()
291   str			String to copy
292   arg_length		Length of string. This should NOT be dividable with
293 			cs->mbminlen.
294   offset		arg_length % cs->mb_minlength
295   cs			Character set for 'str'
296 
297   NOTES
298     For real multi-byte, ascii incompatible charactser sets,
299     like UCS-2, add leading zeros if we have an incomplete character.
300     Thus,
301       SELECT _ucs2 0xAA
302     will automatically be converted into
303       SELECT _ucs2 0x00AA
304 
305   RETURN
306     0  ok
307     1  error
308 */
309 
copy_aligned(const char * str,uint32 arg_length,uint32 offset,const CHARSET_INFO * cs)310 bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
311 			  const CHARSET_INFO *cs)
312 {
313   /* How many bytes are in incomplete character */
314   offset= cs->mbminlen - offset; /* How many zeros we should prepend */
315   DBUG_ASSERT(offset && offset != cs->mbminlen);
316 
317   uint32 aligned_length= arg_length + offset;
318   if (alloc(aligned_length))
319     return TRUE;
320 
321   /*
322     Note, this is only safe for big-endian UCS-2.
323     If we add little-endian UCS-2 sometimes, this code
324     will be more complicated. But it's OK for now.
325   */
326   memset(Ptr, 0, offset);
327   memcpy(Ptr + offset, str, arg_length);
328   Ptr[aligned_length]=0;
329   /* str_length is always >= 0 as arg_length is != 0 */
330   str_length= aligned_length;
331   str_charset= cs;
332   return FALSE;
333 }
334 
335 
set_or_copy_aligned(const char * str,uint32 arg_length,const CHARSET_INFO * cs)336 bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
337 				 const CHARSET_INFO *cs)
338 {
339   /* How many bytes are in incomplete character */
340   uint32 offset= (arg_length % cs->mbminlen);
341 
342   if (!offset) /* All characters are complete, just copy */
343   {
344     set(str, arg_length, cs);
345     return FALSE;
346   }
347   return copy_aligned(str, arg_length, offset, cs);
348 }
349 
350 
351 /**
352    Copies the character data into this String, with optional character set
353    conversion.
354 
355    @return
356    FALSE ok
357    TRUE  Could not allocate result buffer
358 
359 */
360 
copy(const char * str,uint32 arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,uint * errors)361 bool String::copy(const char *str, uint32 arg_length,
362 		  const CHARSET_INFO *from_cs, const CHARSET_INFO *to_cs, uint *errors)
363 {
364   uint32 offset;
365 
366   DBUG_ASSERT(!str || str != Ptr);
367 
368   if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
369   {
370     *errors= 0;
371     return copy(str, arg_length, to_cs);
372   }
373   if ((from_cs == &my_charset_bin) && offset)
374   {
375     *errors= 0;
376     return copy_aligned(str, arg_length, offset, to_cs);
377   }
378   uint32 new_length= to_cs->mbmaxlen*arg_length;
379   if (alloc(new_length))
380     return TRUE;
381   str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
382                               str, arg_length, from_cs, errors);
383   str_charset=to_cs;
384   return FALSE;
385 }
386 
387 
388 /*
389   Set a string to the value of a latin1-string, keeping the original charset
390 
391   SYNOPSIS
392     copy_or_set()
393     str			String of a simple charset (latin1)
394     arg_length		Length of string
395 
396   IMPLEMENTATION
397     If string object is of a simple character set, set it to point to the
398     given string.
399     If not, make a copy and convert it to the new character set.
400 
401   RETURN
402     0	ok
403     1	Could not allocate result buffer
404 
405 */
406 
set_ascii(const char * str,uint32 arg_length)407 bool String::set_ascii(const char *str, uint32 arg_length)
408 {
409   if (str_charset->mbminlen == 1)
410   {
411     set(str, arg_length, str_charset);
412     return 0;
413   }
414   uint dummy_errors;
415   return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
416 }
417 
418 
419 /* This is used by mysql.cc */
420 
fill(uint32 max_length,char fill_char)421 bool String::fill(uint32 max_length,char fill_char)
422 {
423   if (str_length > max_length)
424     Ptr[str_length=max_length]=0;
425   else
426   {
427     if (realloc(max_length))
428       return TRUE;
429     memset(Ptr+str_length, fill_char, max_length-str_length);
430     str_length=max_length;
431   }
432   return FALSE;
433 }
434 
strip_sp()435 void String::strip_sp()
436 {
437    while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
438     str_length--;
439 }
440 
append(const String & s)441 bool String::append(const String &s)
442 {
443   if (s.length())
444   {
445     DBUG_ASSERT(!this->uses_buffer_owned_by(&s));
446     DBUG_ASSERT(!s.uses_buffer_owned_by(this));
447 
448     if (realloc(str_length+s.length()))
449       return TRUE;
450     memcpy(Ptr+str_length,s.ptr(),s.length());
451     str_length+=s.length();
452   }
453   return FALSE;
454 }
455 
456 
457 /*
458   Append an ASCII string to the a string of the current character set
459 */
460 
append(const char * s,uint32 arg_length)461 bool String::append(const char *s,uint32 arg_length)
462 {
463   if (!arg_length)
464     return FALSE;
465 
466   /*
467     For an ASCII incompatible string, e.g. UCS-2, we need to convert
468   */
469   if (str_charset->mbminlen > 1)
470   {
471     uint32 add_length=arg_length * str_charset->mbmaxlen;
472     uint dummy_errors;
473     if (realloc(str_length+ add_length))
474       return TRUE;
475     str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
476 				  s, arg_length, &my_charset_latin1,
477                                   &dummy_errors);
478     return FALSE;
479   }
480 
481   /*
482     For an ASCII compatinble string we can just append.
483   */
484   if (realloc(str_length+arg_length))
485     return TRUE;
486   memcpy(Ptr+str_length,s,arg_length);
487   str_length+=arg_length;
488   return FALSE;
489 }
490 
491 
492 /*
493   Append a 0-terminated ASCII string
494 */
495 
append(const char * s)496 bool String::append(const char *s)
497 {
498   return append(s, (uint) strlen(s));
499 }
500 
501 
502 
append_ulonglong(ulonglong val)503 bool String::append_ulonglong(ulonglong val)
504 {
505   if (realloc(str_length+MAX_BIGINT_WIDTH+2))
506     return TRUE;
507   char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
508   str_length= end - Ptr;
509   return FALSE;
510 }
511 
512 /*
513   Append a string in the given charset to the string
514   with character set recoding
515 */
516 
append(const char * s,uint32 arg_length,const CHARSET_INFO * cs)517 bool String::append(const char *s,uint32 arg_length, const CHARSET_INFO *cs)
518 {
519   uint32 offset;
520 
521   if (needs_conversion(arg_length, cs, str_charset, &offset))
522   {
523     uint32 add_length;
524     if ((cs == &my_charset_bin) && offset)
525     {
526       DBUG_ASSERT(str_charset->mbminlen > offset);
527       offset= str_charset->mbminlen - offset; // How many characters to pad
528       add_length= arg_length + offset;
529       if (realloc(str_length + add_length))
530         return TRUE;
531       memset(Ptr + str_length, 0, offset);
532       memcpy(Ptr + str_length + offset, s, arg_length);
533       str_length+= add_length;
534       return FALSE;
535     }
536 
537     add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
538     uint dummy_errors;
539     if (realloc(str_length + add_length))
540       return TRUE;
541     str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
542 				  s, arg_length, cs, &dummy_errors);
543   }
544   else
545   {
546     if (realloc(str_length + arg_length))
547       return TRUE;
548     memcpy(Ptr + str_length, s, arg_length);
549     str_length+= arg_length;
550   }
551   return FALSE;
552 }
553 
append(IO_CACHE * file,uint32 arg_length)554 bool String::append(IO_CACHE* file, uint32 arg_length)
555 {
556   if (realloc(str_length+arg_length))
557     return TRUE;
558   if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
559   {
560     shrink(str_length);
561     return TRUE;
562   }
563   str_length+=arg_length;
564   return FALSE;
565 }
566 
567 
568 /**
569   Append a parenthesized number to String.
570   Used in various pieces of SHOW related code.
571 
572   @param nr     Number
573   @param radix  Radix, optional parameter, 10 by default.
574 */
append_parenthesized(long nr,int radix)575 bool String::append_parenthesized(long nr, int radix)
576 {
577   char buff[64], *end;
578   buff[0]= '(';
579   end= int10_to_str(nr, buff + 1, radix);
580   *end++ = ')';
581   return append(buff, (uint) (end - buff));
582 }
583 
584 
append_with_prefill(const char * s,uint32 arg_length,uint32 full_length,char fill_char)585 bool String::append_with_prefill(const char *s,uint32 arg_length,
586 		 uint32 full_length, char fill_char)
587 {
588   int t_length= arg_length > full_length ? arg_length : full_length;
589 
590   if (realloc(str_length + t_length))
591     return TRUE;
592   t_length= full_length - arg_length;
593   if (t_length > 0)
594   {
595     memset(Ptr+str_length, fill_char, t_length);
596     str_length=str_length + t_length;
597   }
598   append(s, arg_length);
599   return FALSE;
600 }
601 
numchars() const602 uint32 String::numchars() const
603 {
604   return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
605 }
606 
charpos(int i,uint32 offset)607 int String::charpos(int i,uint32 offset)
608 {
609   if (i <= 0)
610     return i;
611   return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
612 }
613 
strstr(const String & s,uint32 offset)614 int String::strstr(const String &s,uint32 offset)
615 {
616   if (s.length()+offset <= str_length)
617   {
618     if (!s.length())
619       return ((int) offset);	// Empty string is always found
620 
621     const char *str = Ptr+offset;
622     const char *search=s.ptr();
623     const char *end=Ptr+str_length-s.length()+1;
624     const char *search_end=s.ptr()+s.length();
625 skip:
626     while (str != end)
627     {
628       if (*str++ == *search)
629       {
630 	char *i,*j;
631 	i=(char*) str; j=(char*) search+1;
632 	while (j != search_end)
633 	  if (*i++ != *j++) goto skip;
634 	return (int) (str-Ptr) -1;
635       }
636     }
637   }
638   return -1;
639 }
640 
641 /*
642 ** Search string from end. Offset is offset to the end of string
643 */
644 
strrstr(const String & s,uint32 offset)645 int String::strrstr(const String &s,uint32 offset)
646 {
647   if (s.length() <= offset && offset <= str_length)
648   {
649     if (!s.length())
650       return offset;				// Empty string is always found
651     const char *str = Ptr+offset-1;
652     const char *search=s.ptr()+s.length()-1;
653 
654     const char *end=Ptr+s.length()-2;
655     const char *search_end=s.ptr()-1;
656 skip:
657     while (str != end)
658     {
659       if (*str-- == *search)
660       {
661 	char *i,*j;
662 	i=(char*) str; j=(char*) search-1;
663 	while (j != search_end)
664 	  if (*i-- != *j--) goto skip;
665 	return (int) (i-Ptr) +1;
666       }
667     }
668   }
669   return -1;
670 }
671 
672 /*
673   Replace substring with string
674   If wrong parameter or not enough memory, do nothing
675 */
676 
replace(uint32 offset,uint32 arg_length,const String & to)677 bool String::replace(uint32 offset,uint32 arg_length,const String &to)
678 {
679   return replace(offset,arg_length,to.ptr(),to.length());
680 }
681 
replace(uint32 offset,uint32 arg_length,const char * to,uint32 to_length)682 bool String::replace(uint32 offset,uint32 arg_length,
683                      const char *to, uint32 to_length)
684 {
685   long diff = (long) to_length-(long) arg_length;
686   if (offset+arg_length <= str_length)
687   {
688     if (diff < 0)
689     {
690       if (to_length)
691 	memcpy(Ptr+offset,to,to_length);
692       bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
693 	    str_length-offset-arg_length);
694     }
695     else
696     {
697       if (diff)
698       {
699 	if (realloc(str_length+(uint32) diff))
700 	  return TRUE;
701 	bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
702 		  str_length-offset-arg_length);
703       }
704       if (to_length)
705 	memcpy(Ptr+offset,to,to_length);
706     }
707     str_length+=(uint32) diff;
708   }
709   return FALSE;
710 }
711 
712 
713 // added by Holyfoot for "geometry" needs
reserve(uint32 space_needed,uint32 grow_by)714 int String::reserve(uint32 space_needed, uint32 grow_by)
715 {
716   if (Alloced_length < str_length + space_needed)
717   {
718     if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
719       return TRUE;
720   }
721   return FALSE;
722 }
723 
qs_append(const char * str,uint32 len)724 void String::qs_append(const char *str, uint32 len)
725 {
726   memcpy(Ptr + str_length, str, len + 1);
727   str_length += len;
728 }
729 
qs_append(double d)730 void String::qs_append(double d)
731 {
732   char *buff = Ptr + str_length;
733   str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
734                        NULL);
735 }
736 
qs_append(double * d)737 void String::qs_append(double *d)
738 {
739   double ld;
740   float8get(ld, (char*) d);
741   qs_append(ld);
742 }
743 
qs_append(int i)744 void String::qs_append(int i)
745 {
746   char *buff= Ptr + str_length;
747   char *end= int10_to_str(i, buff, -10);
748   str_length+= (int) (end-buff);
749 }
750 
qs_append(uint i)751 void String::qs_append(uint i)
752 {
753   char *buff= Ptr + str_length;
754   char *end= int10_to_str(i, buff, 10);
755   str_length+= (int) (end-buff);
756 }
757 
758 /*
759   Compare strings according to collation, without end space.
760 
761   SYNOPSIS
762     sortcmp()
763     s		First string
764     t		Second string
765     cs		Collation
766 
767   NOTE:
768     Normally this is case sensitive comparison
769 
770   RETURN
771   < 0	s < t
772   0	s == t
773   > 0	s > t
774 */
775 
776 
sortcmp(const String * s,const String * t,const CHARSET_INFO * cs)777 int sortcmp(const String *s,const String *t, const CHARSET_INFO *cs)
778 {
779  return cs->coll->strnncollsp(cs,
780                               (uchar *) s->ptr(),s->length(),
781                               (uchar *) t->ptr(),t->length(), 0);
782 }
783 
784 
785 /*
786   Compare strings byte by byte. End spaces are also compared.
787 
788   SYNOPSIS
789     stringcmp()
790     s		First string
791     t		Second string
792 
793   NOTE:
794     Strings are compared as a stream of uchars
795 
796   RETURN
797   < 0	s < t
798   0	s == t
799   > 0	s > t
800 */
801 
802 
stringcmp(const String * s,const String * t)803 int stringcmp(const String *s,const String *t)
804 {
805   uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
806   int cmp= memcmp(s->ptr(), t->ptr(), len);
807   return (cmp) ? cmp : (int) (s_len - t_len);
808 }
809 
810 /**
811   Makes a copy of a String's buffer unless it's already heap-allocated.
812 
813   If the buffer ('str') of 'from' is on the heap, this function returns
814   'from', possibly re-allocated to be at least from_length bytes long.
815   It is also the case if from==to or to==NULL.
816   Otherwise, this function makes and returns a copy of "from" into "to"; the
817   buffer of "to" is heap-allocated; a pre-condition is that from->str and
818   to->str must point to non-overlapping buffers.
819   The logic behind this complex design, is that a caller, typically a
820   val_str() function, sometimes has an input String ('from') which buffer it
821   wants to modify; but this String's buffer may or not be heap-allocated; if
822   it's not heap-allocated it is possibly in static storage or belongs to an
823   outer context, and thus should not be modified; in that case the caller
824   wants a heap-allocated copy which it can freely modify.
825 
826   @param  to    destination string
827   @param  from  source string
828   @param  from_length  destination string will hold at least from_length bytes.
829 */
copy_if_not_alloced(String * to,String * from,uint32 from_length)830 String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
831 {
832   if (from->alloced && from->Alloced_length >= from_length)
833     return from;
834   if ((from->alloced && (from->Alloced_length != 0)) || !to || from == to)
835   {
836     (void) from->realloc(from_length, true);
837     return from;
838   }
839   if (to->realloc(from_length, true))
840     return from;				// Actually an error
841 
842   // from and to should not be overlapping
843   DBUG_ASSERT(!to->uses_buffer_owned_by(from));
844   DBUG_ASSERT(!from->uses_buffer_owned_by(to));
845 
846   if ((to->str_length=min(from->str_length,from_length)))
847     memcpy(to->Ptr,from->Ptr,to->str_length);
848   to->str_charset=from->str_charset;
849   return to;
850 }
851 
852 
853 /****************************************************************************
854   Help functions
855 ****************************************************************************/
856 
857 /**
858   Copy string with HEX-encoding of "bad" characters.
859 
860   @details This functions copies the string pointed by "src"
861   to the string pointed by "dst". Not more than "srclen" bytes
862   are read from "src". Any sequences of bytes representing
863   a not-well-formed substring (according to cs) are hex-encoded,
864   and all well-formed substrings (according to cs) are copied as is.
865   Not more than "dstlen" bytes are written to "dst". The number
866   of bytes written to "dst" is returned.
867 
868    @param      cs       character set pointer of the destination string
869    @param[out] dst      destination string
870    @param      dstlen   size of dst
871    @param      src      source string
872    @param      srclen   length of src
873 
874    @retval     result length
875 */
876 
877 size_t
my_copy_with_hex_escaping(const CHARSET_INFO * cs,char * dst,size_t dstlen,const char * src,size_t srclen)878 my_copy_with_hex_escaping(const CHARSET_INFO *cs,
879                           char *dst, size_t dstlen,
880                           const char *src, size_t srclen)
881 {
882   const char *srcend= src + srclen;
883   char *dst0= dst;
884 
885   for ( ; src < srcend ; )
886   {
887     size_t chlen;
888     if ((chlen= my_ismbchar(cs, src, srcend)))
889     {
890       if (dstlen < chlen)
891         break; /* purecov: inspected */
892       memcpy(dst, src, chlen);
893       src+= chlen;
894       dst+= chlen;
895       dstlen-= chlen;
896     }
897     else if (*src & 0x80)
898     {
899       if (dstlen < 4)
900         break; /* purecov: inspected */
901       *dst++= '\\';
902       *dst++= 'x';
903       *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
904       *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
905       src++;
906       dstlen-= 4;
907     }
908     else
909     {
910       if (dstlen < 1)
911         break; /* purecov: inspected */
912       *dst++= *src++;
913       dstlen--;
914     }
915   }
916   return dst - dst0;
917 }
918 
919 /*
920   copy a string,
921   with optional character set conversion,
922   with optional left padding (for binary -> UCS2 conversion)
923 
924   SYNOPSIS
925     well_formed_copy_nchars()
926     to			     Store result here
927     to_length                Maxinum length of "to" string
928     to_cs		     Character set of "to" string
929     from		     Copy from here
930     from_length		     Length of from string
931     from_cs		     From character set
932     nchars                   Copy not more that nchars characters
933     well_formed_error_pos    Return position when "from" is not well formed
934                              or NULL otherwise.
935     cannot_convert_error_pos Return position where a not convertable
936                              character met, or NULL otherwise.
937     from_end_pos             Return position where scanning of "from"
938                              string stopped.
939   NOTES
940 
941   RETURN
942     length of bytes copied to 'to'
943 */
944 
945 
946 uint32
well_formed_copy_nchars(const CHARSET_INFO * to_cs,char * to,uint to_length,const CHARSET_INFO * from_cs,const char * from,uint from_length,uint nchars,const char ** well_formed_error_pos,const char ** cannot_convert_error_pos,const char ** from_end_pos)947 well_formed_copy_nchars(const CHARSET_INFO *to_cs,
948                         char *to, uint to_length,
949                         const CHARSET_INFO *from_cs,
950                         const char *from, uint from_length,
951                         uint nchars,
952                         const char **well_formed_error_pos,
953                         const char **cannot_convert_error_pos,
954                         const char **from_end_pos)
955 {
956   uint res;
957 
958   if ((to_cs == &my_charset_bin) ||
959       (from_cs == &my_charset_bin) ||
960       (to_cs == from_cs) ||
961       my_charset_same(from_cs, to_cs))
962   {
963     if (to_length < to_cs->mbminlen || !nchars)
964     {
965       *from_end_pos= from;
966       *cannot_convert_error_pos= NULL;
967       *well_formed_error_pos= NULL;
968       return 0;
969     }
970 
971     if (to_cs == &my_charset_bin)
972     {
973       res= min(min(nchars, to_length), from_length);
974       memmove(to, from, res);
975       *from_end_pos= from + res;
976       *well_formed_error_pos= NULL;
977       *cannot_convert_error_pos= NULL;
978     }
979     else
980     {
981       int well_formed_error;
982       uint from_offset;
983 
984       if ((from_offset= (from_length % to_cs->mbminlen)) &&
985           (from_cs == &my_charset_bin))
986       {
987         /*
988           Copying from BINARY to UCS2 needs to prepend zeros sometimes:
989           INSERT INTO t1 (ucs2_column) VALUES (0x01);
990           0x01 -> 0x0001
991         */
992         uint pad_length= to_cs->mbminlen - from_offset;
993         memset(to, 0, pad_length);
994         memmove(to + pad_length, from, from_offset);
995         /*
996           In some cases left zero-padding can create an incorrect character.
997           For example:
998             INSERT INTO t1 (utf32_column) VALUES (0x110000);
999           We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
1000           The valid characters range is limited to 0x00000000..0x0010FFFF.
1001 
1002           Make sure we didn't pad to an incorrect character.
1003         */
1004         if (to_cs->cset->well_formed_len(to_cs,
1005                                          to, to + to_cs->mbminlen, 1,
1006                                          &well_formed_error) !=
1007                                          to_cs->mbminlen)
1008         {
1009           *from_end_pos= *well_formed_error_pos= from;
1010           *cannot_convert_error_pos= NULL;
1011           return 0;
1012         }
1013         nchars--;
1014         from+= from_offset;
1015         from_length-= from_offset;
1016         to+= to_cs->mbminlen;
1017         to_length-= to_cs->mbminlen;
1018       }
1019 
1020       set_if_smaller(from_length, to_length);
1021       res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
1022                                         nchars, &well_formed_error);
1023       memmove(to, from, res);
1024       *from_end_pos= from + res;
1025       *well_formed_error_pos= well_formed_error ? from + res : NULL;
1026       *cannot_convert_error_pos= NULL;
1027       if (from_offset)
1028         res+= to_cs->mbminlen;
1029     }
1030   }
1031   else
1032   {
1033     int cnvres;
1034     my_wc_t wc;
1035     my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
1036     my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
1037     const uchar *from_end= (const uchar*) from + from_length;
1038     uchar *to_end= (uchar*) to + to_length;
1039     char *to_start= to;
1040     *well_formed_error_pos= NULL;
1041     *cannot_convert_error_pos= NULL;
1042 
1043     for ( ; nchars; nchars--)
1044     {
1045       const char *from_prev= from;
1046       if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
1047         from+= cnvres;
1048       else if (cnvres == MY_CS_ILSEQ)
1049       {
1050         if (!*well_formed_error_pos)
1051           *well_formed_error_pos= from;
1052         from++;
1053         wc= '?';
1054       }
1055       else if (cnvres > MY_CS_TOOSMALL)
1056       {
1057         /*
1058           A correct multibyte sequence detected
1059           But it doesn't have Unicode mapping.
1060         */
1061         if (!*cannot_convert_error_pos)
1062           *cannot_convert_error_pos= from;
1063         from+= (-cnvres);
1064         wc= '?';
1065       }
1066       else
1067         break;  // Not enough characters
1068 
1069 outp:
1070       if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1071         to+= cnvres;
1072       else if (cnvres == MY_CS_ILUNI && wc != '?')
1073       {
1074         if (!*cannot_convert_error_pos)
1075           *cannot_convert_error_pos= from_prev;
1076         wc= '?';
1077         goto outp;
1078       }
1079       else
1080       {
1081         from= from_prev;
1082         break;
1083       }
1084     }
1085     *from_end_pos= from;
1086     res= (uint) (to - to_start);
1087   }
1088   return (uint32) res;
1089 }
1090 
1091 
1092 
1093 
print(String * str)1094 void String::print(String *str)
1095 {
1096   char *st= (char*)Ptr, *end= st+str_length;
1097   for (; st < end; st++)
1098   {
1099     uchar c= *st;
1100     switch (c)
1101     {
1102     case '\\':
1103       str->append(STRING_WITH_LEN("\\\\"));
1104       break;
1105     case '\0':
1106       str->append(STRING_WITH_LEN("\\0"));
1107       break;
1108     case '\'':
1109       str->append(STRING_WITH_LEN("\\'"));
1110       break;
1111     case '\n':
1112       str->append(STRING_WITH_LEN("\\n"));
1113       break;
1114     case '\r':
1115       str->append(STRING_WITH_LEN("\\r"));
1116       break;
1117     case '\032': // Ctrl-Z
1118       str->append(STRING_WITH_LEN("\\Z"));
1119       break;
1120     default:
1121       str->append(c);
1122     }
1123   }
1124 }
1125 
1126 
1127 /*
1128   Exchange state of this object and argument.
1129 
1130   SYNOPSIS
1131     String::swap()
1132 
1133   RETURN
1134     Target string will contain state of this object and vice versa.
1135 */
1136 
swap(String & s)1137 void String::swap(String &s)
1138 {
1139   swap_variables(char *, Ptr, s.Ptr);
1140   swap_variables(uint32, str_length, s.str_length);
1141   swap_variables(uint32, Alloced_length, s.Alloced_length);
1142   swap_variables(bool, alloced, s.alloced);
1143   swap_variables(const CHARSET_INFO *, str_charset, s.str_charset);
1144 }
1145 
1146 
1147 /**
1148   Convert string to printable ASCII string
1149 
1150   @details This function converts input string "from" replacing non-ASCII bytes
1151   with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1152   the resulting string.
1153   This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1154   e.g. when a string cannot be converted to a result charset.
1155 
1156 
1157   @param    to          output buffer
1158   @param    to_len      size of the output buffer (8 bytes or greater)
1159   @param    from        input string
1160   @param    from_len    size of the input string
1161   @param    from_cs     input charset
1162   @param    nbytes      maximal number of bytes to convert (from_len if 0)
1163 
1164   @return   number of bytes in the output string
1165 */
1166 
convert_to_printable(char * to,size_t to_len,const char * from,size_t from_len,const CHARSET_INFO * from_cs,size_t nbytes)1167 uint convert_to_printable(char *to, size_t to_len,
1168                           const char *from, size_t from_len,
1169                           const CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1170 {
1171   /* needs at least 8 bytes for '\xXX...' and zero byte */
1172   DBUG_ASSERT(to_len >= 8);
1173 
1174   char *t= to;
1175   char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1176   const char *f= from;
1177   const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
1178   char *dots= to; // last safe place to append '...'
1179 
1180   if (!f || t == t_end)
1181     return 0;
1182 
1183   for (; t < t_end && f < f_end; f++)
1184   {
1185     /*
1186       If the source string is ASCII compatible (mbminlen==1)
1187       and the source character is in ASCII printable range (0x20..0x7F),
1188       then display the character as is.
1189 
1190       Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1191       or the source character is not in the printable range,
1192       then print the character using HEX notation.
1193     */
1194     if (((unsigned char) *f) >= 0x20 &&
1195         ((unsigned char) *f) <= 0x7F &&
1196         from_cs->mbminlen == 1)
1197     {
1198       *t++= *f;
1199     }
1200     else
1201     {
1202       if (t_end - t < 4) // \xXX
1203         break;
1204       *t++= '\\';
1205       *t++= 'x';
1206       *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
1207       *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
1208     }
1209     if (t_end - t >= 3) // '...'
1210       dots= t;
1211   }
1212   if (f < from + from_len)
1213     memcpy(dots, STRING_WITH_LEN("...\0"));
1214   else
1215     *t= '\0';
1216   return t - to;
1217 }
1218 
1219 /**
1220   Check if an input byte sequence is a valid character string of a given charset
1221 
1222   @param cs                     The input character set.
1223   @param str                    The input byte sequence to validate.
1224   @param length                 A byte length of the str.
1225   @param [out] valid_length     A byte length of a valid prefix of the str.
1226   @param [out] length_error     True in the case of a character length error:
1227                                 some byte[s] in the input is not a valid
1228                                 prefix for a character, i.e. the byte length
1229                                 of that invalid character is undefined.
1230 
1231   @retval true if the whole input byte sequence is a valid character string.
1232                The length_error output parameter is undefined.
1233 
1234   @return
1235     if the whole input byte sequence is a valid character string
1236     then
1237         return false
1238     else
1239         if the length of some character in the input is undefined (MY_CS_ILSEQ)
1240            or the last character is truncated (MY_CS_TOOSMALL)
1241         then
1242             *length_error= true; // fatal error!
1243         else
1244             *length_error= false; // non-fatal error: there is no wide character
1245                                   // encoding for some input character
1246         return true
1247 */
validate_string(const CHARSET_INFO * cs,const char * str,uint32 length,size_t * valid_length,bool * length_error)1248 bool validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
1249                      size_t *valid_length, bool *length_error)
1250 {
1251   if (cs->mbmaxlen > 1)
1252   {
1253     int well_formed_error;
1254     *valid_length= cs->cset->well_formed_len(cs, str, str + length,
1255                                              length, &well_formed_error);
1256     *length_error= well_formed_error;
1257     return well_formed_error;
1258   }
1259 
1260   /*
1261     well_formed_len() is not functional on single-byte character sets,
1262     so use mb_wc() instead:
1263   */
1264   *length_error= false;
1265 
1266   const uchar *from= reinterpret_cast<const uchar *>(str);
1267   const uchar *from_end= from + length;
1268   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1269 
1270   while (from < from_end)
1271   {
1272     my_wc_t wc;
1273     int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
1274     if (cnvres <= 0)
1275     {
1276       *valid_length= from - reinterpret_cast<const uchar *>(str);
1277       return true;
1278     }
1279     from+= cnvres;
1280   }
1281   *valid_length= length;
1282   return false;
1283 }
1284