1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /* This file is originally from the mysql distribution. Coded by monty */
24 
25 #include <my_global.h>
26 #include <my_sys.h>
27 #include <m_string.h>
28 #include <m_ctype.h>
29 #include <mysql_com.h>
30 
31 #include "sql_string.h"
32 
33 #include <algorithm>
34 #include <limits>
35 
36 using std::min;
37 using std::max;
38 
39 #ifdef MYSQL_SERVER
40 PSI_memory_key key_memory_String_value;
41 #endif
42 
43 /*****************************************************************************
44 ** String functions
45 *****************************************************************************/
46 
real_alloc(size_t length)47 bool String::real_alloc(size_t length)
48 {
49   size_t arg_length= ALIGN_SIZE(length + 1);
50   assert(arg_length > length);
51   if (arg_length <= length)
52     return true;                                 /* Overflow */
53   m_length= 0;
54   if (m_alloced_length < arg_length)
55   {
56     mem_free();
57     if (!(m_ptr= static_cast<char*>(my_malloc(STRING_PSI_MEMORY_KEY,
58                                               arg_length, MYF(MY_WME)))))
59       return true;
60     m_alloced_length= static_cast<uint32>(arg_length);
61     m_is_alloced= true;
62   }
63   m_ptr[0]= 0;
64   return false;
65 }
66 
67 
68 /**
69    Allocates a new buffer on the heap for this String.
70 
71    - If the String's internal buffer is privately owned and heap allocated,
72      one of the following is performed.
73 
74      - If the requested length is greater than what fits in the buffer, a new
75        buffer is allocated, data moved and the old buffer freed.
76 
77      - If the requested length is less or equal to what fits in the buffer, a
78        null character is inserted at the appropriate position.
79 
80    - If the String does not keep a private buffer on the heap:
81 
82       - If the requested length is greater than what fits in the buffer, or
83         force_on_heap is true, a new buffer is allocated, data is copied.
84       - If the requested length is less or equal to what fits in the buffer,
85         and force_on_heap is false, a null character is inserted at the
86         appropriate position.
87 
88    For C compatibility, the new string buffer is null terminated.
89 
90    @param alloc_length The requested string size in characters, excluding any
91    null terminator.
92    @param force_on_heap If the caller wants String's 'str' buffer to be on the
93    heap in all cases.
94 
95    @retval false Either the copy operation is complete or, if the size of the
96    new buffer is smaller than the currently allocated buffer (if one exists),
97    no allocation occured.
98 
99    @retval true An error occured when attempting to allocate memory or memory
100    allocation length exceeded allowed limit (4GB) for String Class.
101 */
mem_realloc(size_t alloc_length,bool force_on_heap)102 bool String::mem_realloc(size_t alloc_length, bool force_on_heap)
103 {
104   size_t len= ALIGN_SIZE(alloc_length + 1);
105   assert(len > alloc_length);
106   if (len <= alloc_length)
107     return true;                                 /* Overflow */
108 
109   if (force_on_heap && !m_is_alloced)
110   {
111     /*
112       Caller wants bytes on the heap, and the currently available bytes are
113       not; they are thus irrelevant:
114       */
115     m_alloced_length= 0;
116   }
117 
118   if (m_alloced_length < len)     // Available bytes are not enough
119   {
120     // Signal an error if len exceeds uint32 max on 64-bit word platform.
121 #if defined(__WORDSIZE) && (__WORDSIZE == 64)
122     if (len > std::numeric_limits<uint32>::max())
123       return true;
124 #endif
125     char *new_ptr;
126     if (m_is_alloced)
127     {
128       if (!(new_ptr= static_cast<char*>(my_realloc(STRING_PSI_MEMORY_KEY,
129                                                    m_ptr, len, MYF(MY_WME)))))
130         return true;				// Signal error
131     }
132     else if ((new_ptr= static_cast<char*>(my_malloc(STRING_PSI_MEMORY_KEY,
133                                                     len, MYF(MY_WME)))))
134     {
135       if (m_length > len - 1)
136         m_length= 0;
137       memcpy(new_ptr, m_ptr, m_length);
138       new_ptr[m_length]= 0;
139       m_is_alloced= true;
140     }
141     else
142       return true;			// Signal error
143     m_ptr= new_ptr;
144     m_alloced_length= static_cast<uint32>(len);
145   }
146   m_ptr[alloc_length]= 0;			// This make other funcs shorter
147   return false;
148 }
149 
150 /*
151   Helper function for @see mem_realloc_exp.
152  */
next_realloc_exp_size(size_t sz)153 inline size_t String::next_realloc_exp_size(size_t sz)
154 {
155   const size_t len= ALIGN_SIZE(sz + 1);
156   const size_t ret=
157     (m_is_alloced && m_alloced_length < len) ? sz + (m_length / 4) : sz;
158   return ret;
159 }
160 
161 /**
162   This function is used by the various append() member functions, to ensure
163   that append() has amortized constant cost. Once we have started to allocate
164   buffer on the heap, we increase the buffer size exponentially, rather
165   than linearly.
166 
167   @param alloc_length The requested string size in characters, excluding any
168                       null terminator.
169 
170   @retval false Either the copy operation is complete or, if the size of the
171   new buffer is smaller than the currently allocated buffer (if one exists),
172   no allocation occured.
173 
174   @retval true An error occured when attempting to allocate memory.
175 
176   @see mem_realloc.
177  */
mem_realloc_exp(size_t alloc_length)178 bool String::mem_realloc_exp(size_t alloc_length)
179 {
180   if (mem_realloc(next_realloc_exp_size(alloc_length)))
181     return true;
182   m_ptr[alloc_length]= '\0';
183   return false;
184 }
185 
186 
set_int(longlong num,bool unsigned_flag,const CHARSET_INFO * cs)187 bool String::set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs)
188 {
189   uint l= 20 * cs->mbmaxlen + 1;
190   int base= unsigned_flag ? 10 : -10;
191 
192   if (alloc(l))
193     return true;
194   m_length=(uint32) (cs->cset->longlong10_to_str)(cs, m_ptr, l, base, num);
195   m_charset= cs;
196   return false;
197 }
198 
set_real(double num,uint decimals,const CHARSET_INFO * cs)199 bool String::set_real(double num,uint decimals, const CHARSET_INFO *cs)
200 {
201   char buff[FLOATING_POINT_BUFFER];
202   uint dummy_errors;
203   size_t len;
204 
205   m_charset=cs;
206   if (decimals >= NOT_FIXED_DEC)
207   {
208     len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, static_cast<int>(sizeof(buff)) - 1,
209                  buff, NULL);
210     return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
211   }
212   len= my_fcvt(num, decimals, buff, NULL);
213   return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
214 }
215 
216 
copy()217 bool String::copy()
218 {
219   if (!m_is_alloced)
220   {
221     m_alloced_length= 0;				// Force realloc
222     return mem_realloc(m_length);
223   }
224   return false;
225 }
226 
227 /**
228    Copies the internal buffer from str. If this String has a private heap
229    allocated buffer where new data does not fit, a new buffer is allocated
230    before copying and the old buffer freed. Character set information is also
231    copied.
232 
233    If str is the same as this and str doesn't own its buffer, a
234    new buffer is allocated and it's owned by str.
235 
236    @param str The string whose internal buffer is to be copied.
237 
238    @retval false Success.
239    @retval true Memory allocation failed.
240 */
copy(const String & str)241 bool String::copy(const String &str)
242 {
243   /*
244     If &str == this and it owns the buffer, this operation is a no-op, so skip
245     the meaningless copy. Otherwise if we do, we will read freed memory at
246     the memmove call below.
247   */
248   if (&str == this && str.is_alloced())
249     return false;
250 
251   /*
252     If a String s doesn't own its buffer, here we should allocate
253     a new buffer owned by s and copy the contents there. But alloc()
254     will change this->m_ptr and this->m_length, and if this == &str, this
255     will also change str->m_ptr and str->m_length, so we need to save
256     these values first.
257   */
258   const size_t str_length= str.m_length;
259   const char *str_ptr= str.m_ptr;
260   if (alloc(str.m_length))
261     return true;
262   m_length= str_length;
263   memmove(m_ptr, str_ptr, m_length);		// May be overlapping
264   m_ptr[m_length]= 0;
265   m_charset= str.m_charset;
266   return false;
267 }
268 
copy(const char * str,size_t arg_length,const CHARSET_INFO * cs)269 bool String::copy(const char *str, size_t arg_length, const CHARSET_INFO *cs)
270 {
271   if (alloc(arg_length))
272     return true;
273   if ((m_length= arg_length))
274     memcpy(m_ptr, str, arg_length);
275   m_ptr[arg_length]= 0;
276   m_charset= cs;
277   return false;
278 }
279 
280 
281 /*
282   Checks that the source string can be just copied to the destination string
283   without conversion.
284 
285   SYNPOSIS
286 
287   needs_conversion()
288   arg_length		Length of string to copy.
289   from_cs		Character set to copy from
290   to_cs			Character set to copy to
291   uint32 *offset	Returns number of unaligned characters.
292 
293   RETURN
294    0  No conversion needed
295    1  Either character set conversion or adding leading  zeros
296       (e.g. for UCS-2) must be done
297 
298   NOTE
299   to_cs may be NULL for "no conversion" if the system variable
300   character_set_results is NULL.
301 */
302 
needs_conversion(size_t arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,size_t * offset)303 bool String::needs_conversion(size_t arg_length,
304 			      const CHARSET_INFO *from_cs,
305 			      const CHARSET_INFO *to_cs,
306 			      size_t *offset)
307 {
308   *offset= 0;
309   if (!to_cs ||
310       (to_cs == &my_charset_bin) ||
311       (to_cs == from_cs) ||
312       my_charset_same(from_cs, to_cs) ||
313       ((from_cs == &my_charset_bin) &&
314        (!(*offset=(arg_length % to_cs->mbminlen)))))
315     return false;
316   return true;
317 }
318 
319 
320 /*
321   Checks that the source string can just be copied to the destination string
322   without conversion.
323   Unlike needs_conversion it will require conversion on incoming binary data
324   to ensure the data are verified for vailidity first.
325 
326   @param arg_length   Length of string to copy.
327   @param from_cs      Character set to copy from
328   @param to_cs        Character set to copy to
329 
330   @return conversion needed
331 */
needs_conversion_on_storage(size_t arg_length,const CHARSET_INFO * cs_from,const CHARSET_INFO * cs_to)332 bool String::needs_conversion_on_storage(size_t arg_length,
333                                          const CHARSET_INFO *cs_from,
334                                          const CHARSET_INFO *cs_to)
335 {
336   size_t offset;
337   return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
338           /* force conversion when storing a binary string */
339           (cs_from == &my_charset_bin &&
340           /* into a non-binary destination */
341            cs_to != &my_charset_bin &&
342            /* and any of the following is true :*/
343            (
344             /* it's a variable length encoding */
345             cs_to->mbminlen != cs_to->mbmaxlen ||
346             /* longer than 2 bytes : neither 1 byte nor ucs2 */
347             cs_to->mbminlen > 2 ||
348             /* and is not a multiple of the char byte size */
349             0 != (arg_length % cs_to->mbmaxlen)
350            )
351           )
352          );
353 }
354 
355 
356 /*
357   Copy a multi-byte character sets with adding leading zeros.
358 
359   SYNOPSIS
360 
361   copy_aligned()
362   str			String to copy
363   arg_length		Length of string. This should NOT be dividable with
364 			cs->mbminlen.
365   offset		arg_length % cs->mb_minlength
366   cs			Character set for 'str'
367 
368   NOTES
369     For real multi-byte, ascii incompatible charactser sets,
370     like UCS-2, add leading zeros if we have an incomplete character.
371     Thus,
372       SELECT _ucs2 0xAA
373     will automatically be converted into
374       SELECT _ucs2 0x00AA
375 
376   RETURN
377     0  ok
378     1  error
379 */
380 
copy_aligned(const char * str,size_t arg_length,size_t offset,const CHARSET_INFO * cs)381 bool String::copy_aligned(const char *str, size_t arg_length, size_t offset,
382 			  const CHARSET_INFO *cs)
383 {
384   /* How many bytes are in incomplete character */
385   offset= cs->mbminlen - offset; /* How many zeros we should prepend */
386   assert(offset && offset != cs->mbminlen);
387 
388   size_t aligned_length= arg_length + offset;
389   if (alloc(aligned_length))
390     return true;
391 
392   /*
393     Note, this is only safe for big-endian UCS-2.
394     If we add little-endian UCS-2 sometimes, this code
395     will be more complicated. But it's OK for now.
396   */
397   memset(m_ptr, 0, offset);
398   memcpy(m_ptr + offset, str, arg_length);
399   m_ptr[aligned_length]= 0;
400   /* m_length is always >= 0 as arg_length is != 0 */
401   m_length= aligned_length;
402   m_charset= cs;
403   return false;
404 }
405 
406 
set_or_copy_aligned(const char * str,size_t arg_length,const CHARSET_INFO * cs)407 bool String::set_or_copy_aligned(const char *str, size_t arg_length,
408 				 const CHARSET_INFO *cs)
409 {
410   /* How many bytes are in incomplete character */
411   size_t offset= (arg_length % cs->mbminlen);
412 
413   if (!offset) /* All characters are complete, just copy */
414   {
415     set(str, arg_length, cs);
416     return false;
417   }
418   return copy_aligned(str, arg_length, offset, cs);
419 }
420 
421 
422 /**
423    Copies the character data into this String, with optional character set
424    conversion.
425 
426    @return
427    false ok
428    true  Could not allocate result buffer
429 
430 */
431 
copy(const char * str,size_t arg_length,const CHARSET_INFO * from_cs,const CHARSET_INFO * to_cs,uint * errors)432 bool String::copy(const char *str, size_t arg_length,
433 		  const CHARSET_INFO *from_cs, const CHARSET_INFO *to_cs, uint *errors)
434 {
435   size_t offset;
436 
437   assert(!str || str != m_ptr);
438 
439   if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
440   {
441     *errors= 0;
442     return copy(str, arg_length, to_cs);
443   }
444   if ((from_cs == &my_charset_bin) && offset)
445   {
446     *errors= 0;
447     return copy_aligned(str, arg_length, offset, to_cs);
448   }
449   size_t new_length= to_cs->mbmaxlen*arg_length;
450   if (alloc(new_length))
451     return true;
452   m_length= copy_and_convert(m_ptr, new_length, to_cs,
453                              str, arg_length, from_cs, errors);
454   m_charset= to_cs;
455   return false;
456 }
457 
458 
459 /*
460   Set a string to the value of a latin1-string, keeping the original charset
461 
462   SYNOPSIS
463     copy_or_set()
464     str			String of a simple charset (latin1)
465     arg_length		Length of string
466 
467   IMPLEMENTATION
468     If string object is of a simple character set, set it to point to the
469     given string.
470     If not, make a copy and convert it to the new character set.
471 
472   RETURN
473     0	ok
474     1	Could not allocate result buffer
475 
476 */
477 
set_ascii(const char * str,size_t arg_length)478 bool String::set_ascii(const char *str, size_t arg_length)
479 {
480   if (m_charset->mbminlen == 1)
481   {
482     set(str, arg_length, m_charset);
483     return 0;
484   }
485   uint dummy_errors;
486   return copy(str, arg_length, &my_charset_latin1, m_charset, &dummy_errors);
487 }
488 
489 
490 /* This is used by mysql.cc */
491 
fill(size_t max_length,char fill_char)492 bool String::fill(size_t max_length,char fill_char)
493 {
494   if (m_length > max_length)
495     m_ptr[m_length= max_length]= 0;
496   else
497   {
498     if (mem_realloc(max_length))
499       return true;
500     memset(m_ptr + m_length, fill_char, max_length - m_length);
501     m_length= max_length;
502   }
503   return false;
504 }
505 
strip_sp()506 void String::strip_sp()
507 {
508    while (m_length && my_isspace(m_charset, m_ptr[m_length - 1]))
509     m_length--;
510 }
511 
append(const String & s)512 bool String::append(const String &s)
513 {
514   if (s.length())
515   {
516     assert(!this->uses_buffer_owned_by(&s));
517     assert(!s.uses_buffer_owned_by(this));
518 
519     if (mem_realloc_exp((m_length + s.length())))
520       return true;
521     memcpy(m_ptr + m_length,s.ptr(), s.length());
522     m_length+=s.length();
523   }
524   return false;
525 }
526 
527 
528 /*
529   Append an ASCII string to the a string of the current character set
530 */
531 
append(const char * s,size_t arg_length)532 bool String::append(const char *s, size_t arg_length)
533 {
534   if (!arg_length)
535     return false;
536 
537   /*
538     For an ASCII incompatible string, e.g. UCS-2, we need to convert
539   */
540   if (m_charset->mbminlen > 1)
541   {
542     size_t add_length= arg_length * m_charset->mbmaxlen;
543     uint dummy_errors;
544     if (mem_realloc(m_length + add_length))
545       return true;
546     m_length+= copy_and_convert(m_ptr + m_length, add_length, m_charset,
547                                 s, arg_length, &my_charset_latin1,
548                                 &dummy_errors);
549     return false;
550   }
551 
552   /*
553     For an ASCII compatinble string we can just append.
554   */
555   if (mem_realloc_exp(m_length + arg_length))
556     return true;
557   memcpy(m_ptr + m_length, s, arg_length);
558   m_length+= arg_length;
559   return false;
560 }
561 
562 
563 /*
564   Append a 0-terminated ASCII string
565 */
566 
append(const char * s)567 bool String::append(const char *s)
568 {
569   return append(s, (uint) strlen(s));
570 }
571 
572 
573 /**
574   Append an unsigned longlong to the string.
575 */
append_ulonglong(ulonglong val)576 bool String::append_ulonglong(ulonglong val)
577 {
578   if (mem_realloc_exp(m_length + MAX_BIGINT_WIDTH + 2))
579     return true;
580   char *end= longlong10_to_str(val, m_ptr + m_length, 10);
581   m_length= end - m_ptr;
582   return false;
583 }
584 
585 
586 /**
587   Append a signed longlong to the string.
588 */
append_longlong(longlong val)589 bool String::append_longlong(longlong val)
590 {
591   if (mem_realloc_exp(m_length + MAX_BIGINT_WIDTH + 2))
592     return true;                              /* purecov: inspected */
593   char *end= longlong10_to_str(val, m_ptr + m_length, -10);
594   m_length= end - m_ptr;
595   return false;
596 }
597 
598 
599 /*
600   Append a string in the given charset to the string
601   with character set recoding
602 */
603 
append(const char * s,size_t arg_length,const CHARSET_INFO * cs)604 bool String::append(const char *s, size_t arg_length, const CHARSET_INFO *cs)
605 {
606   size_t offset;
607 
608   if (needs_conversion(arg_length, cs, m_charset, &offset))
609   {
610     size_t add_length;
611     if ((cs == &my_charset_bin) && offset)
612     {
613       assert(m_charset->mbminlen > offset);
614       offset= m_charset->mbminlen - offset; // How many characters to pad
615       add_length= arg_length + offset;
616       if (mem_realloc_exp(m_length + add_length))
617         return true;
618       memset(m_ptr + m_length, 0, offset);
619       memcpy(m_ptr + m_length + offset, s, arg_length);
620       m_length+= add_length;
621       return false;
622     }
623 
624     add_length= arg_length / cs->mbminlen * m_charset->mbmaxlen;
625     uint dummy_errors;
626     if (mem_realloc_exp(m_length + add_length))
627       return true;
628     m_length+= copy_and_convert(m_ptr + m_length, add_length, m_charset,
629                                 s, arg_length, cs, &dummy_errors);
630   }
631   else
632   {
633     if (mem_realloc_exp(m_length + arg_length))
634       return true;
635     memcpy(m_ptr + m_length, s, arg_length);
636     m_length+= arg_length;
637   }
638   return false;
639 }
640 
append(IO_CACHE * file,size_t arg_length)641 bool String::append(IO_CACHE* file, size_t arg_length)
642 {
643   if (mem_realloc(m_length + arg_length))
644     return true;
645   if (my_b_read(file, reinterpret_cast<uchar*>(m_ptr) + m_length, arg_length))
646   {
647     shrink(m_length);
648     return true;
649   }
650   m_length+= arg_length;
651   return false;
652 }
653 
654 
655 /**
656   Append a parenthesized number to String.
657   Used in various pieces of SHOW related code.
658 
659   @param nr     Number
660   @param radix  Radix, optional parameter, 10 by default.
661 */
append_parenthesized(long nr,int radix)662 bool String::append_parenthesized(long nr, int radix)
663 {
664   char buff[64], *end;
665   buff[0]= '(';
666   end= int10_to_str(nr, buff + 1, radix);
667   *end++ = ')';
668   return append(buff, (uint) (end - buff));
669 }
670 
671 
append_with_prefill(const char * s,size_t arg_length,size_t full_length,char fill_char)672 bool String::append_with_prefill(const char *s, size_t arg_length,
673                                  size_t full_length, char fill_char)
674 {
675   size_t t_length= arg_length > full_length ? arg_length : full_length;
676 
677   if (mem_realloc(m_length + t_length))
678     return true;
679   if (full_length > arg_length)
680   {
681     t_length= full_length - arg_length;
682     memset(m_ptr + m_length, fill_char, t_length);
683     m_length= m_length + t_length;
684   }
685   append(s, arg_length);
686   return false;
687 }
688 
numchars() const689 size_t String::numchars() const
690 {
691   return m_charset->cset->numchars(m_charset, m_ptr, m_ptr + m_length);
692 }
693 
charpos(size_t i,size_t offset)694 size_t String::charpos(size_t i, size_t offset)
695 {
696   if (i <= 0)
697     return i;
698   return m_charset->cset->charpos(m_charset, m_ptr + offset, m_ptr + m_length, i);
699 }
700 
strstr(const String & s,size_t offset)701 int String::strstr(const String &s, size_t offset)
702 {
703   if (s.length()+offset <= m_length)
704   {
705     if (!s.length())
706       return ((int) offset);	// Empty string is always found
707 
708     const char *str= m_ptr + offset;
709     const char *search= s.ptr();
710     const char *end= m_ptr + m_length - s.length() + 1;
711     const char *search_end= s.ptr() + s.length();
712 skip:
713     while (str != end)
714     {
715       if (*str++ == *search)
716       {
717         const char *i= str;
718         const char *j= search + 1;
719         while (j != search_end)
720           if (*i++ != *j++) goto skip;
721         return (int) (str - m_ptr) -1;
722       }
723     }
724   }
725   return -1;
726 }
727 
728 /*
729 ** Search string from end. Offset is offset to the end of string
730 */
731 
strrstr(const String & s,size_t offset)732 int String::strrstr(const String &s, size_t offset)
733 {
734   if (s.length() <= offset && offset <= m_length)
735   {
736     if (!s.length())
737       return static_cast<int>(offset); // Empty string is always found
738     const char *str= m_ptr + offset - 1;
739     const char *search= s.ptr() + s.length() - 1;
740 
741     const char *end= m_ptr + s.length() - 2;
742     const char *search_end= s.ptr() - 1;
743 skip:
744     while (str != end)
745     {
746       if (*str-- == *search)
747       {
748         const char *i= str;
749         const char *j= search - 1;
750         while (j != search_end)
751           if (*i-- != *j--) goto skip;
752         return (int) (i - m_ptr) +1;
753       }
754     }
755   }
756   return -1;
757 }
758 
substr(int offset,int count)759 String String::substr(int offset, int count)
760 {
761   int original_count = this->numchars();
762   if (offset > original_count)
763   {
764     offset= original_count;
765   }
766   if (offset + count > original_count)
767   {
768     count= original_count - offset;
769   }
770   size_t bytes_offset= this->charpos(offset);
771 
772   return String(this->m_ptr + bytes_offset,
773     this->charpos(offset + count) - bytes_offset, this->m_charset);
774 }
775 
776 /*
777   Replace substring with string
778   If wrong parameter or not enough memory, do nothing
779 */
780 
replace(size_t offset,size_t arg_length,const String & to)781 bool String::replace(size_t offset, size_t arg_length,const String &to)
782 {
783   return replace(offset, arg_length, to.ptr(), to.length());
784 }
785 
replace(size_t offset,size_t arg_length,const char * to,size_t to_length)786 bool String::replace(size_t offset, size_t arg_length,
787                      const char *to, size_t to_length)
788 {
789   long diff = static_cast<long>(to_length) - static_cast<long>(arg_length);
790   if (offset+arg_length <= m_length)
791   {
792     if (diff < 0)
793     {
794       if (to_length)
795         memcpy(m_ptr + offset, to, to_length);
796       memmove(m_ptr + offset + to_length,
797               m_ptr + offset + arg_length,
798               m_length - offset - arg_length);
799     }
800     else
801     {
802       if (diff)
803       {
804         if (mem_realloc(m_length + diff))
805           return true;
806         memmove(m_ptr + offset + to_length,
807                 m_ptr + offset + arg_length,
808                 m_length - offset - arg_length);
809       }
810       if (to_length)
811         memcpy(m_ptr + offset, to, to_length);
812     }
813     m_length+= diff;
814   }
815   return false;
816 }
817 
818 
819 // added by Holyfoot for "geometry" needs
reserve(size_t space_needed,size_t grow_by)820 int String::reserve(size_t space_needed, size_t grow_by)
821 {
822   if (m_alloced_length < m_length + space_needed)
823   {
824     if (mem_realloc(m_alloced_length + max(space_needed, grow_by) - 1))
825       return true;
826   }
827   return false;
828 }
829 
qs_append(const char * str,size_t len)830 void String::qs_append(const char *str, size_t len)
831 {
832   memcpy(m_ptr + m_length, str, len + 1);
833   m_length += len;
834 }
835 
qs_append(double d,size_t len)836 void String::qs_append(double d, size_t len)
837 {
838   char *buff = m_ptr + m_length;
839   m_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, len, buff, NULL);
840 }
841 
qs_append(int i)842 void String::qs_append(int i)
843 {
844   char *buff= m_ptr + m_length;
845   char *end= int10_to_str(i, buff, -10);
846   m_length+= (int) (end-buff);
847 }
848 
qs_append(uint i)849 void String::qs_append(uint i)
850 {
851   char *buff= m_ptr + m_length;
852   char *end= int10_to_str(i, buff, 10);
853   m_length+= (int) (end-buff);
854 }
855 
856 /*
857   Compare strings according to collation, without end space.
858 
859   SYNOPSIS
860     sortcmp()
861     s		First string
862     t		Second string
863     cs		Collation
864 
865   NOTE:
866     Normally this is case sensitive comparison
867 
868   RETURN
869   < 0	s < t
870   0	s == t
871   > 0	s > t
872 */
873 
874 
sortcmp(const String * s,const String * t,const CHARSET_INFO * cs)875 int sortcmp(const String *s,const String *t, const CHARSET_INFO *cs)
876 {
877  return cs->coll->strnncollsp(cs,
878                               (uchar *) s->ptr(),s->length(),
879                               (uchar *) t->ptr(),t->length(), 0);
880 }
881 
882 
883 /*
884   Compare strings byte by byte. End spaces are also compared.
885 
886   SYNOPSIS
887     stringcmp()
888     s		First string
889     t		Second string
890 
891   NOTE:
892     Strings are compared as a stream of uchars
893 
894   RETURN
895   < 0	s < t
896   0	s == t
897   > 0	s > t
898 */
899 
900 
stringcmp(const String * s,const String * t)901 int stringcmp(const String *s,const String *t)
902 {
903   size_t s_len= s->length();
904   size_t t_len= t->length();
905   size_t len= min(s_len, t_len);
906   int cmp= memcmp(s->ptr(), t->ptr(), len);
907   return (cmp) ? cmp : static_cast<int>(s_len) - static_cast<int>(t_len);
908 }
909 
910 /**
911   Makes a copy of a String's buffer unless it's already heap-allocated.
912 
913   If the buffer ('str') of 'from' is on the heap, this function returns
914   'from', possibly re-allocated to be at least from_length bytes long.
915   It is also the case if from==to or to==NULL.
916   Otherwise, this function makes and returns a copy of "from" into "to"; the
917   buffer of "to" is heap-allocated; a pre-condition is that from->str and
918   to->str must point to non-overlapping buffers.
919   The logic behind this complex design, is that a caller, typically a
920   val_str() function, sometimes has an input String ('from') which buffer it
921   wants to modify; but this String's buffer may or not be heap-allocated; if
922   it's not heap-allocated it is possibly in static storage or belongs to an
923   outer context, and thus should not be modified; in that case the caller
924   wants a heap-allocated copy which it can freely modify.
925 
926   @param  to    destination string
927   @param  from  source string
928   @param  from_length  destination string will hold at least from_length bytes.
929  */
930 
copy_if_not_alloced(String * to,String * from,size_t from_length)931 String *copy_if_not_alloced(String *to,String *from, size_t from_length)
932 {
933   if (from->m_is_alloced && from->m_alloced_length >= from_length)
934     return from;
935   if ((from->m_is_alloced && (from->m_alloced_length != 0)) || !to || from == to)
936   {
937     (void) from->mem_realloc(from_length,
938                             true /* force heap allocation */);
939     return from;
940   }
941   if (to->mem_realloc(from_length, true))
942     return from;				// Actually an error
943 
944   // from and to should not be overlapping
945   assert(!to->uses_buffer_owned_by(from));
946   assert(!from->uses_buffer_owned_by(to));
947 
948   if ((to->m_length= min(from->m_length, from_length)))
949     memcpy(to->m_ptr, from->m_ptr, to->m_length);
950   to->m_charset=from->m_charset;
951   return to;
952 }
953 
954 
955 /****************************************************************************
956   Help functions
957 ****************************************************************************/
958 
959 /*
960   copy a string,
961   with optional character set conversion,
962   with optional left padding (for binary -> UCS2 conversion)
963 
964   SYNOPSIS
965     well_formed_copy_nchars()
966     to			     Store result here
967     to_length                Maxinum length of "to" string
968     to_cs		     Character set of "to" string
969     from		     Copy from here
970     from_length		     Length of from string
971     from_cs		     From character set
972     nchars                   Copy not more that nchars characters
973     well_formed_error_pos    Return position when "from" is not well formed
974                              or NULL otherwise.
975     cannot_convert_error_pos Return position where a not convertable
976                              character met, or NULL otherwise.
977     from_end_pos             Return position where scanning of "from"
978                              string stopped.
979   NOTES
980 
981   RETURN
982     length of bytes copied to 'to'
983 */
984 
985 
well_formed_copy_nchars(const CHARSET_INFO * to_cs,char * to,size_t to_length,const CHARSET_INFO * from_cs,const char * from,size_t from_length,size_t nchars,const char ** well_formed_error_pos,const char ** cannot_convert_error_pos,const char ** from_end_pos)986 size_t well_formed_copy_nchars(const CHARSET_INFO *to_cs,
987                                char *to, size_t to_length,
988                                const CHARSET_INFO *from_cs,
989                                const char *from, size_t from_length,
990                                size_t nchars,
991                                const char **well_formed_error_pos,
992                                const char **cannot_convert_error_pos,
993                                const char **from_end_pos)
994 {
995   size_t res;
996 
997   if ((to_cs == &my_charset_bin) ||
998       (from_cs == &my_charset_bin) ||
999       (to_cs == from_cs) ||
1000       my_charset_same(from_cs, to_cs))
1001   {
1002     if (to_length < to_cs->mbminlen || !nchars)
1003     {
1004       *from_end_pos= from;
1005       *cannot_convert_error_pos= NULL;
1006       *well_formed_error_pos= NULL;
1007       return 0;
1008     }
1009 
1010     if (to_cs == &my_charset_bin)
1011     {
1012       res= min(min(nchars, to_length), from_length);
1013       memmove(to, from, res);
1014       *from_end_pos= from + res;
1015       *well_formed_error_pos= NULL;
1016       *cannot_convert_error_pos= NULL;
1017     }
1018     else
1019     {
1020       int well_formed_error;
1021       uint from_offset;
1022 
1023       if ((from_offset= (from_length % to_cs->mbminlen)) &&
1024           (from_cs == &my_charset_bin))
1025       {
1026         /*
1027           Copying from BINARY to UCS2 needs to prepend zeros sometimes:
1028           INSERT INTO t1 (ucs2_column) VALUES (0x01);
1029           0x01 -> 0x0001
1030         */
1031         uint pad_length= to_cs->mbminlen - from_offset;
1032         memset(to, 0, pad_length);
1033         memmove(to + pad_length, from, from_offset);
1034         /*
1035           In some cases left zero-padding can create an incorrect character.
1036           For example:
1037             INSERT INTO t1 (utf32_column) VALUES (0x110000);
1038           We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
1039           The valid characters range is limited to 0x00000000..0x0010FFFF.
1040 
1041           Make sure we didn't pad to an incorrect character.
1042         */
1043         if (to_cs->cset->well_formed_len(to_cs,
1044                                          to, to + to_cs->mbminlen, 1,
1045                                          &well_formed_error) !=
1046                                          to_cs->mbminlen)
1047         {
1048           *from_end_pos= *well_formed_error_pos= from;
1049           *cannot_convert_error_pos= NULL;
1050           return 0;
1051         }
1052         nchars--;
1053         from+= from_offset;
1054         from_length-= from_offset;
1055         to+= to_cs->mbminlen;
1056         to_length-= to_cs->mbminlen;
1057       }
1058 
1059       set_if_smaller(from_length, to_length);
1060       res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
1061                                         nchars, &well_formed_error);
1062       memmove(to, from, res);
1063       *from_end_pos= from + res;
1064       *well_formed_error_pos= well_formed_error ? from + res : NULL;
1065       *cannot_convert_error_pos= NULL;
1066       if (from_offset)
1067         res+= to_cs->mbminlen;
1068     }
1069   }
1070   else
1071   {
1072     int cnvres;
1073     my_wc_t wc;
1074     my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
1075     my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
1076     const uchar *from_end= (const uchar*) from + from_length;
1077     uchar *to_end= (uchar*) to + to_length;
1078     char *to_start= to;
1079     *well_formed_error_pos= NULL;
1080     *cannot_convert_error_pos= NULL;
1081 
1082     for ( ; nchars; nchars--)
1083     {
1084       const char *from_prev= from;
1085       if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
1086         from+= cnvres;
1087       else if (cnvres == MY_CS_ILSEQ)
1088       {
1089         if (!*well_formed_error_pos)
1090           *well_formed_error_pos= from;
1091         from++;
1092         wc= '?';
1093       }
1094       else if (cnvres > MY_CS_TOOSMALL)
1095       {
1096         /*
1097           A correct multibyte sequence detected
1098           But it doesn't have Unicode mapping.
1099         */
1100         if (!*cannot_convert_error_pos)
1101           *cannot_convert_error_pos= from;
1102         from+= (-cnvres);
1103         wc= '?';
1104       }
1105       else
1106         break;  // Not enough characters
1107 
1108 outp:
1109       if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1110         to+= cnvres;
1111       else if (cnvres == MY_CS_ILUNI && wc != '?')
1112       {
1113         if (!*cannot_convert_error_pos)
1114           *cannot_convert_error_pos= from_prev;
1115         wc= '?';
1116         goto outp;
1117       }
1118       else
1119       {
1120         from= from_prev;
1121         break;
1122       }
1123     }
1124     *from_end_pos= from;
1125     res= to - to_start;
1126   }
1127   return res;
1128 }
1129 
1130 
1131 
1132 
print(String * str)1133 void String::print(String *str)
1134 {
1135   char *st= m_ptr;
1136   char *end= st + m_length;
1137   for (; st < end; st++)
1138   {
1139     uchar c= *st;
1140     switch (c)
1141     {
1142     case '\\':
1143       str->append(STRING_WITH_LEN("\\\\"));
1144       break;
1145     case '\0':
1146       str->append(STRING_WITH_LEN("\\0"));
1147       break;
1148     case '\'':
1149       str->append(STRING_WITH_LEN("\\'"));
1150       break;
1151     case '\n':
1152       str->append(STRING_WITH_LEN("\\n"));
1153       break;
1154     case '\r':
1155       str->append(STRING_WITH_LEN("\\r"));
1156       break;
1157     case '\032': // Ctrl-Z
1158       str->append(STRING_WITH_LEN("\\Z"));
1159       break;
1160     default:
1161       str->append(c);
1162     }
1163   }
1164 }
1165 
1166 
1167 /*
1168   Exchange state of this object and argument.
1169 
1170   SYNOPSIS
1171     String::swap()
1172 
1173   RETURN
1174     Target string will contain state of this object and vice versa.
1175 */
1176 
swap(String & s)1177 void String::swap(String &s)
1178 {
1179   swap_variables(char *, m_ptr, s.m_ptr);
1180   swap_variables(size_t, m_length, s.m_length);
1181   swap_variables(uint32, m_alloced_length, s.m_alloced_length);
1182   swap_variables(bool, m_is_alloced, s.m_is_alloced);
1183   swap_variables(const CHARSET_INFO *, m_charset, s.m_charset);
1184 }
1185 
1186 
1187 /**
1188   Convert string to printable ASCII string
1189 
1190   @details This function converts input string "from" replacing non-ASCII bytes
1191   with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1192   the resulting string.
1193   This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1194   e.g. when a string cannot be converted to a result charset.
1195 
1196 
1197   @param    to          output buffer
1198   @param    to_len      size of the output buffer (8 bytes or greater)
1199   @param    from        input string
1200   @param    from_len    size of the input string
1201   @param    from_cs     input charset
1202   @param    nbytes      maximal number of bytes to convert (from_len if 0)
1203 
1204   @return   number of bytes in the output string
1205 */
1206 
convert_to_printable(char * to,size_t to_len,const char * from,size_t from_len,const CHARSET_INFO * from_cs,size_t nbytes)1207 size_t convert_to_printable(char *to, size_t to_len,
1208                             const char *from, size_t from_len,
1209                             const CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1210 {
1211   /* needs at least 8 bytes for '\xXX...' and zero byte */
1212   assert(to_len >= 8);
1213 
1214   char *t= to;
1215   char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1216   const char *f= from;
1217   const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
1218   char *dots= to; // last safe place to append '...'
1219 
1220   if (!f || t == t_end)
1221     return 0;
1222 
1223   for (; t < t_end && f < f_end; f++)
1224   {
1225     /*
1226       If the source string is ASCII compatible (mbminlen==1)
1227       and the source character is in ASCII printable range (0x20..0x7F),
1228       then display the character as is.
1229 
1230       Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1231       or the source character is not in the printable range,
1232       then print the character using HEX notation.
1233     */
1234     if (((unsigned char) *f) >= 0x20 &&
1235         ((unsigned char) *f) <= 0x7F &&
1236         from_cs->mbminlen == 1)
1237     {
1238       *t++= *f;
1239     }
1240     else
1241     {
1242       if (t_end - t < 4) // \xXX
1243         break;
1244       *t++= '\\';
1245       *t++= 'x';
1246       *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
1247       *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
1248     }
1249     if (t_end - t >= 3) // '...'
1250       dots= t;
1251   }
1252   if (f < from + from_len)
1253     memcpy(dots, STRING_WITH_LEN("...\0"));
1254   else
1255     *t= '\0';
1256   return t - to;
1257 }
1258 
1259 
1260 /**
1261   Convert a buffer to printable HEX encoded string
1262   For eg: ABCDEF1234
1263 
1264 
1265   @param    to          output buffer
1266   @param    to_len      size of the output buffer (from_len*2 + 1 or greater)
1267   @param    from        input buffer
1268   @param    from_len    size of the input buffer
1269 
1270   @return   number of bytes in the output string
1271 */
bin_to_hex_str(char * to,size_t to_len,char * from,size_t from_len)1272 size_t bin_to_hex_str(char *to, size_t to_len, char *from, size_t from_len)
1273 {
1274   char *out;
1275   char *in;
1276   size_t i;
1277 
1278   if (to_len < ((from_len * 2) + 1))
1279     return 0 ;
1280 
1281   out= to;
1282   in= from;
1283 
1284   for (i=0; i < from_len; i++, in++)
1285   {
1286     *out++=_dig_vec_upper[((unsigned char) *in) >> 4];
1287     *out++=_dig_vec_upper[((unsigned char) *in) & 0xF];
1288   }
1289 
1290   *out= '\0';
1291 
1292   return out - to;
1293 }
1294 
1295 /**
1296   Check if an input byte sequence is a valid character string of a given charset
1297 
1298   @param cs                     The input character set.
1299   @param str                    The input byte sequence to validate.
1300   @param length                 A byte length of the str.
1301   @param [out] valid_length     A byte length of a valid prefix of the str.
1302   @param [out] length_error     True in the case of a character length error:
1303                                 some byte[s] in the input is not a valid
1304                                 prefix for a character, i.e. the byte length
1305                                 of that invalid character is undefined.
1306 
1307   @retval true if the whole input byte sequence is a valid character string.
1308                The length_error output parameter is undefined.
1309 
1310   @return
1311     if the whole input byte sequence is a valid character string
1312     then
1313         return false
1314     else
1315         if the length of some character in the input is undefined (MY_CS_ILSEQ)
1316            or the last character is truncated (MY_CS_TOOSMALL)
1317         then
1318             *length_error= true; // fatal error!
1319         else
1320             *length_error= false; // non-fatal error: there is no wide character
1321                                   // encoding for some input character
1322         return true
1323 */
validate_string(const CHARSET_INFO * cs,const char * str,uint32 length,size_t * valid_length,bool * length_error)1324 bool validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
1325                      size_t *valid_length, bool *length_error)
1326 {
1327   if (cs->mbmaxlen > 1)
1328   {
1329     int well_formed_error;
1330     *valid_length= cs->cset->well_formed_len(cs, str, str + length,
1331                                              length, &well_formed_error);
1332     *length_error= well_formed_error;
1333     return well_formed_error;
1334   }
1335 
1336   /*
1337     well_formed_len() is not functional on single-byte character sets,
1338     so use mb_wc() instead:
1339   */
1340   *length_error= false;
1341 
1342   const uchar *from= reinterpret_cast<const uchar *>(str);
1343   const uchar *from_end= from + length;
1344   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1345 
1346   while (from < from_end)
1347   {
1348     my_wc_t wc;
1349     int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
1350     if (cnvres <= 0)
1351     {
1352       *valid_length= from - reinterpret_cast<const uchar *>(str);
1353       return true;
1354     }
1355     from+= cnvres;
1356   }
1357   *valid_length= length;
1358   return false;
1359 }
1360