1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
2    Copyright (c) 2016, 2020, MariaDB
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 /* This file is originally from the mysql distribution. Coded by monty */
18 
19 #ifdef USE_PRAGMA_IMPLEMENTATION
20 #pragma implementation				// gcc: Class implementation
21 #endif
22 
23 #include "mariadb.h"
24 #include <m_string.h>
25 #include <m_ctype.h>
26 #include <mysql_com.h>
27 
28 #include "sql_string.h"
29 
30 /*****************************************************************************
31 ** String functions
32 *****************************************************************************/
33 
real_alloc(size_t length)34 bool String::real_alloc(size_t length)
35 {
36   size_t arg_length= ALIGN_SIZE(length + 1);
37   DBUG_ASSERT(arg_length > length);
38   if (arg_length <= length)
39     return TRUE;                                 /* Overflow */
40   str_length=0;
41   if (Alloced_length < arg_length)
42   {
43     free();
44     if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME |
45                                                (thread_specific ?
46                                                 MY_THREAD_SPECIFIC : 0)))))
47       return TRUE;
48     DBUG_ASSERT(length < UINT_MAX32);
49     Alloced_length=(uint32) arg_length;
50     alloced=1;
51   }
52   Ptr[0]=0;
53   return FALSE;
54 }
55 
56 
57 /**
58    Allocates a new buffer on the heap for this String.
59 
60    - If the String's internal buffer is privately owned and heap allocated,
61      one of the following is performed.
62 
63      - If the requested length is greater than what fits in the buffer, a new
64        buffer is allocated, data moved and the old buffer freed.
65 
66      - If the requested length is less or equal to what fits in the buffer, a
67        null character is inserted at the appropriate position.
68 
69    - If the String does not keep a private buffer on the heap, such a buffer
70      will be allocated and the string copied accoring to its length, as found
71      in String::length().
72 
73    For C compatibility, the new string buffer is null terminated.
74 
75    @param alloc_length The requested string size in characters, excluding any
76    null terminator.
77 
78    @retval false Either the copy operation is complete or, if the size of the
79    new buffer is smaller than the currently allocated buffer (if one exists),
80    no allocation occurred.
81 
82    @retval true An error occurred when attempting to allocate memory.
83 */
realloc_raw(size_t alloc_length)84 bool String::realloc_raw(size_t alloc_length)
85 {
86   if (Alloced_length <= alloc_length)
87   {
88     char *new_ptr;
89     uint32 len= ALIGN_SIZE(alloc_length+1);
90     DBUG_ASSERT(len > alloc_length);
91     if (len <= alloc_length)
92       return TRUE;                                 /* Overflow */
93     if (alloced)
94     {
95       if (!(new_ptr= (char*) my_realloc(Ptr,len,
96                                         MYF(MY_WME |
97                                             (thread_specific ?
98                                              MY_THREAD_SPECIFIC : 0)))))
99         return TRUE;				// Signal error
100     }
101     else if ((new_ptr= (char*) my_malloc(len,
102                                          MYF(MY_WME |
103                                              (thread_specific ?
104                                               MY_THREAD_SPECIFIC : 0)))))
105     {
106       if (str_length > len - 1)
107         str_length= 0;
108       if (str_length)				// Avoid bugs in memcpy on AIX
109 	memcpy(new_ptr,Ptr,str_length);
110       new_ptr[str_length]=0;
111       alloced=1;
112     }
113     else
114       return TRUE;			// Signal error
115     Ptr= new_ptr;
116     DBUG_ASSERT(len < UINT_MAX32);
117     Alloced_length=  (uint32)len;
118   }
119   return FALSE;
120 }
121 
set_int(longlong num,bool unsigned_flag,CHARSET_INFO * cs)122 bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
123 {
124   uint l=20*cs->mbmaxlen+1;
125   int base= unsigned_flag ? 10 : -10;
126 
127   if (alloc(l))
128     return TRUE;
129   str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
130   str_charset=cs;
131   return FALSE;
132 }
133 
134 
135 // Convert a number into its HEX representation
set_hex(ulonglong num)136 bool String::set_hex(ulonglong num)
137 {
138   char *n_end;
139   if (alloc(65) || !(n_end= longlong2str(num, Ptr, 16)))
140     return true;
141   length((uint32) (n_end - Ptr));
142   set_charset(&my_charset_latin1);
143   return false;
144 }
145 
146 
147 /**
148   Append a hex representation of the byte "value" into "to".
149   Note:
150     "to" is incremented for the caller by two bytes. It's passed by reference!
151     So it resembles a macros, hence capital letters in the name.
152 */
APPEND_HEX(char * & to,uchar value)153 static inline void APPEND_HEX(char *&to, uchar value)
154 {
155   *to++= _dig_vec_upper[((uchar) value) >> 4];
156   *to++= _dig_vec_upper[((uchar) value) & 0x0F];
157 }
158 
159 
qs_append_hex(const char * str,uint32 len)160 void String::qs_append_hex(const char *str, uint32 len)
161 {
162   const char *str_end= str + len;
163   for (char *to= Ptr + str_length ; str < str_end; str++)
164     APPEND_HEX(to, (uchar) *str);
165   str_length+= len * 2;
166 }
167 
168 
169 // Convert a string to its HEX representation
set_hex(const char * str,uint32 len)170 bool String::set_hex(const char *str, uint32 len)
171 {
172   /*
173     Safety: cut the source string if "len" is too large.
174     Note, alloc() can allocate some more space than requested, due to:
175     - ALIGN_SIZE
176     - one extra byte for a null terminator
177     So cut the source string to 0x7FFFFFF0 rather than 0x7FFFFFFE.
178   */
179   set_if_smaller(len, 0x7FFFFFF0);
180   if (alloc(len * 2))
181     return true;
182   length(0);
183   qs_append_hex(str, len);
184   set_charset(&my_charset_latin1);
185   return false;
186 }
187 
188 
set_real(double num,uint decimals,CHARSET_INFO * cs)189 bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
190 {
191   char buff[FLOATING_POINT_BUFFER];
192   uint dummy_errors;
193   size_t len;
194 
195   str_charset=cs;
196   if (decimals >= FLOATING_POINT_DECIMALS)
197   {
198     len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
199     return copy(buff, (uint)len, &my_charset_latin1, cs, &dummy_errors);
200   }
201   len= my_fcvt(num, decimals, buff, NULL);
202   return copy(buff, (uint32) len, &my_charset_latin1, cs,
203               &dummy_errors);
204 }
205 
206 
copy()207 bool String::copy()
208 {
209   if (!alloced)
210   {
211     Alloced_length=0;				// Force realloc
212     return realloc(str_length);
213   }
214   return FALSE;
215 }
216 
217 /**
218    Copies the internal buffer from str. If this String has a private heap
219    allocated buffer where new data does not fit, a new buffer is allocated
220    before copying and the old buffer freed. Character set information is also
221    copied.
222 
223    @param str The string whose internal buffer is to be copied.
224 
225    @retval false Success.
226    @retval true Memory allocation failed.
227 */
copy(const String & str)228 bool String::copy(const String &str)
229 {
230   if (alloc(str.str_length))
231     return TRUE;
232   if ((str_length=str.str_length))
233     bmove(Ptr,str.Ptr,str_length);		// May be overlapping
234   Ptr[str_length]=0;
235   str_charset=str.str_charset;
236   return FALSE;
237 }
238 
copy(const char * str,size_t arg_length,CHARSET_INFO * cs)239 bool String::copy(const char *str,size_t arg_length, CHARSET_INFO *cs)
240 {
241   DBUG_ASSERT(arg_length < UINT_MAX32);
242   if (alloc(arg_length))
243     return TRUE;
244   if (Ptr == str && arg_length == uint32(str_length))
245   {
246     /*
247       This can happen in some cases. This code is here mainly to avoid
248       warnings from valgrind, but can also be an indication of error.
249     */
250     DBUG_PRINT("warning", ("Copying string on itself: %p  %zu",
251                            str, arg_length));
252   }
253   else if ((str_length=uint32(arg_length)))
254     memcpy(Ptr,str,arg_length);
255   Ptr[arg_length]=0;
256   str_charset=cs;
257   return FALSE;
258 }
259 
260 /*
261   Copy string, where strings may overlap.
262   Same as String::copy, but use memmove instead of memcpy to avoid warnings
263   from valgrind
264 */
265 
copy_or_move(const char * str,size_t arg_length,CHARSET_INFO * cs)266 bool String::copy_or_move(const char *str,size_t arg_length, CHARSET_INFO *cs)
267 {
268   DBUG_ASSERT(arg_length < UINT_MAX32);
269   if (alloc(arg_length))
270     return TRUE;
271   if ((str_length=uint32(arg_length)))
272     memmove(Ptr,str,arg_length);
273   Ptr[arg_length]=0;
274   str_charset=cs;
275   return FALSE;
276 }
277 
278 
279 /*
280   Checks that the source string can be just copied to the destination string
281   without conversion.
282 
283   SYNPOSIS
284 
285   needs_conversion()
286   arg_length		Length of string to copy.
287   from_cs		Character set to copy from
288   to_cs			Character set to copy to
289   uint32 *offset	Returns number of unaligned characters.
290 
291   RETURN
292    0  No conversion needed
293    1  Either character set conversion or adding leading  zeros
294       (e.g. for UCS-2) must be done
295 
296   NOTE
297   to_cs may be NULL for "no conversion" if the system variable
298   character_set_results is NULL.
299 */
300 
needs_conversion(size_t arg_length,CHARSET_INFO * from_cs,CHARSET_INFO * to_cs,uint32 * offset)301 bool String::needs_conversion(size_t arg_length,
302 			      CHARSET_INFO *from_cs,
303 			      CHARSET_INFO *to_cs,
304 			      uint32 *offset)
305 {
306   *offset= 0;
307   if (!to_cs ||
308       (to_cs == &my_charset_bin) ||
309       (to_cs == from_cs) ||
310       my_charset_same(from_cs, to_cs) ||
311       ((from_cs == &my_charset_bin) &&
312        (!(*offset=(uint32)(arg_length % to_cs->mbminlen)))))
313     return FALSE;
314   return TRUE;
315 }
316 
317 
318 /*
319   Checks that the source string can just be copied to the destination string
320   without conversion.
321   Unlike needs_conversion it will require conversion on incoming binary data
322   to ensure the data are verified for vailidity first.
323 
324   @param arg_length   Length of string to copy.
325   @param from_cs      Character set to copy from
326   @param to_cs        Character set to copy to
327 
328   @return conversion needed
329 */
needs_conversion_on_storage(size_t arg_length,CHARSET_INFO * cs_from,CHARSET_INFO * cs_to)330 bool String::needs_conversion_on_storage(size_t arg_length,
331                                          CHARSET_INFO *cs_from,
332                                          CHARSET_INFO *cs_to)
333 {
334   uint32 offset;
335   return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
336           /* force conversion when storing a binary string */
337           (cs_from == &my_charset_bin &&
338           /* into a non-binary destination */
339            cs_to != &my_charset_bin &&
340            /* and any of the following is true :*/
341            (
342             /* it's a variable length encoding */
343             cs_to->mbminlen != cs_to->mbmaxlen ||
344             /* longer than 2 bytes : neither 1 byte nor ucs2 */
345             cs_to->mbminlen > 2 ||
346             /* and is not a multiple of the char byte size */
347             0 != (arg_length % cs_to->mbmaxlen)
348            )
349           )
350          );
351 }
352 
353 
354 /*
355   Copy a multi-byte character sets with adding leading zeros.
356 
357   SYNOPSIS
358 
359   copy_aligned()
360   str			String to copy
361   arg_length		Length of string. This should NOT be dividable with
362 			cs->mbminlen.
363   offset		arg_length % cs->mb_minlength
364   cs			Character set for 'str'
365 
366   NOTES
367     For real multi-byte, ascii incompatible charactser sets,
368     like UCS-2, add leading zeros if we have an incomplete character.
369     Thus,
370       SELECT _ucs2 0xAA
371     will automatically be converted into
372       SELECT _ucs2 0x00AA
373 
374   RETURN
375     0  ok
376     1  error
377 */
378 
copy_aligned(const char * str,size_t arg_length,size_t offset,CHARSET_INFO * cs)379 bool String::copy_aligned(const char *str, size_t arg_length, size_t offset,
380 			  CHARSET_INFO *cs)
381 {
382   /* How many bytes are in incomplete character */
383   offset= cs->mbminlen - offset; /* How many zeros we should prepend */
384   DBUG_ASSERT(offset && offset != cs->mbminlen);
385 
386   size_t aligned_length= arg_length + offset;
387   if (alloc(aligned_length))
388     return TRUE;
389 
390   /*
391     Note, this is only safe for big-endian UCS-2.
392     If we add little-endian UCS-2 sometimes, this code
393     will be more complicated. But it's OK for now.
394   */
395   bzero((char*) Ptr, offset);
396   memcpy(Ptr + offset, str, arg_length);
397   Ptr[aligned_length]=0;
398   /* str_length is always >= 0 as arg_length is != 0 */
399   str_length= (uint32)aligned_length;
400   str_charset= cs;
401   return FALSE;
402 }
403 
404 
set_or_copy_aligned(const char * str,size_t arg_length,CHARSET_INFO * cs)405 bool String::set_or_copy_aligned(const char *str, size_t arg_length,
406 				 CHARSET_INFO *cs)
407 {
408   /* How many bytes are in incomplete character */
409   size_t offset= (arg_length % cs->mbminlen);
410 
411   if (!offset)
412   {
413     /* All characters are complete, just use given string */
414     set(str, arg_length, cs);
415     return FALSE;
416   }
417   return copy_aligned(str, arg_length, offset, cs);
418 }
419 
420 
421 /**
422    Copies the character data into this String, with optional character set
423    conversion.
424 
425    @return
426    FALSE ok
427    TRUE  Could not allocate result buffer
428 
429 */
430 
copy(const char * str,size_t arg_length,CHARSET_INFO * from_cs,CHARSET_INFO * to_cs,uint * errors)431 bool String::copy(const char *str, size_t arg_length,
432 		  CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
433 {
434   uint32 offset;
435 
436   DBUG_ASSERT(!str || str != Ptr || !alloced);
437 
438   if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
439   {
440     *errors= 0;
441     return copy(str, arg_length, to_cs);
442   }
443   if ((from_cs == &my_charset_bin) && offset)
444   {
445     *errors= 0;
446     return copy_aligned(str, arg_length, offset, to_cs);
447   }
448   size_t new_length= to_cs->mbmaxlen*arg_length;
449   if (alloc(new_length))
450     return TRUE;
451   str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
452                               str, arg_length, from_cs, errors);
453   str_charset=to_cs;
454   return FALSE;
455 }
456 
457 
458 /*
459   Set a string to the value of a latin1-string, keeping the original charset
460 
461   SYNOPSIS
462     copy_or_set()
463     str			String of a simple charset (latin1)
464     arg_length		Length of string
465 
466   IMPLEMENTATION
467     If string object is of a simple character set, set it to point to the
468     given string.
469     If not, make a copy and convert it to the new character set.
470 
471   RETURN
472     0	ok
473     1	Could not allocate result buffer
474 
475 */
476 
set_ascii(const char * str,size_t arg_length)477 bool String::set_ascii(const char *str, size_t arg_length)
478 {
479   if (str_charset->mbminlen == 1)
480   {
481     set(str, arg_length, str_charset);
482     return 0;
483   }
484   uint dummy_errors;
485   return copy(str, (uint32)arg_length, &my_charset_latin1, str_charset, &dummy_errors);
486 }
487 
488 
489 /* This is used by mysql.cc */
490 
fill(uint32 max_length,char fill_char)491 bool String::fill(uint32 max_length,char fill_char)
492 {
493   if (str_length > max_length)
494     Ptr[str_length=max_length]=0;
495   else
496   {
497     if (realloc(max_length))
498       return TRUE;
499     bfill(Ptr+str_length,max_length-str_length,fill_char);
500     str_length=max_length;
501   }
502   return FALSE;
503 }
504 
strip_sp()505 void String::strip_sp()
506 {
507    while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
508     str_length--;
509 }
510 
append(const String & s)511 bool String::append(const String &s)
512 {
513   if (s.length())
514   {
515     if (realloc_with_extra_if_needed(str_length+s.length()))
516       return TRUE;
517     memcpy(Ptr+str_length,s.ptr(),s.length());
518     str_length+=s.length();
519   }
520   return FALSE;
521 }
522 
523 
524 /*
525   Append an ASCII string to the a string of the current character set
526 */
527 
append(const char * s,size_t size)528 bool String::append(const char *s,size_t size)
529 {
530   DBUG_ASSERT(size <= UINT_MAX32);
531   uint32 arg_length= (uint32) size;
532   if (!arg_length)
533     return FALSE;
534 
535   /*
536     For an ASCII incompatible string, e.g. UCS-2, we need to convert
537   */
538   if (str_charset->mbminlen > 1)
539   {
540     uint32 add_length=arg_length * str_charset->mbmaxlen;
541     uint dummy_errors;
542     if (realloc_with_extra_if_needed(str_length+ add_length))
543       return TRUE;
544     str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
545 				  s, arg_length, &my_charset_latin1,
546                                   &dummy_errors);
547     return FALSE;
548   }
549 
550   /*
551     For an ASCII compatinble string we can just append.
552   */
553   if (realloc_with_extra_if_needed(str_length+arg_length))
554     return TRUE;
555   memcpy(Ptr+str_length,s,arg_length);
556   str_length+=arg_length;
557   return FALSE;
558 }
559 
560 
561 /*
562   Append a 0-terminated ASCII string
563 */
564 
append(const char * s)565 bool String::append(const char *s)
566 {
567   return append(s, (uint) strlen(s));
568 }
569 
append_longlong(longlong val)570 bool String::append_longlong(longlong val)
571 {
572   if (realloc(str_length+MAX_BIGINT_WIDTH+2))
573     return TRUE;
574   char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, -10);
575   str_length= (uint32)(end - Ptr);
576   return FALSE;
577 }
578 
579 
append_ulonglong(ulonglong val)580 bool String::append_ulonglong(ulonglong val)
581 {
582   if (realloc(str_length+MAX_BIGINT_WIDTH+2))
583     return TRUE;
584   char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
585   str_length= (uint32) (end - Ptr);
586   return FALSE;
587 }
588 
589 /*
590   Append a string in the given charset to the string
591   with character set recoding
592 */
593 
append(const char * s,size_t arg_length,CHARSET_INFO * cs)594 bool String::append(const char *s, size_t arg_length, CHARSET_INFO *cs)
595 {
596   if (!arg_length)
597     return false;
598 
599   uint32 offset;
600 
601   if (needs_conversion((uint32)arg_length, cs, str_charset, &offset))
602   {
603     size_t add_length;
604     if ((cs == &my_charset_bin) && offset)
605     {
606       DBUG_ASSERT(str_charset->mbminlen > offset);
607       offset= str_charset->mbminlen - offset; // How many characters to pad
608       add_length= arg_length + offset;
609       if (realloc(str_length + add_length))
610         return TRUE;
611       bzero((char*) Ptr + str_length, offset);
612       memcpy(Ptr + str_length + offset, s, arg_length);
613       str_length+= (uint32)add_length;
614       return FALSE;
615     }
616 
617     add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
618     uint dummy_errors;
619     if (realloc_with_extra_if_needed(str_length + add_length))
620       return TRUE;
621     str_length+= copy_and_convert(Ptr+str_length, (uint32)add_length, str_charset,
622 				  s, (uint32)arg_length, cs, &dummy_errors);
623   }
624   else
625   {
626     if (realloc_with_extra_if_needed(str_length + arg_length))
627       return TRUE;
628     memcpy(Ptr + str_length, s, arg_length);
629     str_length+= (uint32)arg_length;
630   }
631   return FALSE;
632 }
633 
append(IO_CACHE * file,uint32 arg_length)634 bool String::append(IO_CACHE* file, uint32 arg_length)
635 {
636   if (realloc_with_extra_if_needed(str_length+arg_length))
637     return TRUE;
638   if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
639   {
640     shrink(str_length ? str_length : 1);
641     return TRUE;
642   }
643   str_length+=arg_length;
644   return FALSE;
645 }
646 
647 
648 /**
649   Append a parenthesized number to String.
650   Used in various pieces of SHOW related code.
651 
652   @param nr     Number
653   @param radix  Radix, optional parameter, 10 by default.
654 */
append_parenthesized(long nr,int radix)655 bool String::append_parenthesized(long nr, int radix)
656 {
657   char buff[64], *end;
658   buff[0]= '(';
659   end= int10_to_str(nr, buff + 1, radix);
660   *end++ = ')';
661   return append(buff, (uint) (end - buff));
662 }
663 
664 
append_with_prefill(const char * s,uint32 arg_length,uint32 full_length,char fill_char)665 bool String::append_with_prefill(const char *s,uint32 arg_length,
666 		 uint32 full_length, char fill_char)
667 {
668   int t_length= arg_length > full_length ? arg_length : full_length;
669 
670   if (realloc_with_extra_if_needed(str_length + t_length))
671     return TRUE;
672   t_length= full_length - arg_length;
673   if (t_length > 0)
674   {
675     bfill(Ptr+str_length, t_length, fill_char);
676     str_length=str_length + t_length;
677   }
678   append(s, arg_length);
679   return FALSE;
680 }
681 
numchars() const682 uint32 String::numchars() const
683 {
684   return (uint32) str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
685 }
686 
charpos(longlong i,uint32 offset)687 int String::charpos(longlong i,uint32 offset)
688 {
689   if (i <= 0)
690     return (int)i;
691   return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i);
692 }
693 
strstr(const String & s,uint32 offset)694 int String::strstr(const String &s,uint32 offset)
695 {
696   if (s.length()+offset <= str_length)
697   {
698     if (!s.length())
699       return ((int) offset);	// Empty string is always found
700 
701     const char *str = Ptr+offset;
702     const char *search=s.ptr();
703     const char *end=Ptr+str_length-s.length()+1;
704     const char *search_end=s.ptr()+s.length();
705 skip:
706     while (str != end)
707     {
708       if (*str++ == *search)
709       {
710 	char *i,*j;
711 	i=(char*) str; j=(char*) search+1;
712 	while (j != search_end)
713 	  if (*i++ != *j++) goto skip;
714 	return (int) (str-Ptr) -1;
715       }
716     }
717   }
718   return -1;
719 }
720 
721 /*
722 ** Search string from end. Offset is offset to the end of string
723 */
724 
strrstr(const String & s,uint32 offset)725 int String::strrstr(const String &s,uint32 offset)
726 {
727   if (s.length() <= offset && offset <= str_length)
728   {
729     if (!s.length())
730       return offset;				// Empty string is always found
731     const char *str = Ptr+offset-1;
732     const char *search=s.ptr()+s.length()-1;
733 
734     const char *end=Ptr+s.length()-2;
735     const char *search_end=s.ptr()-1;
736 skip:
737     while (str != end)
738     {
739       if (*str-- == *search)
740       {
741 	char *i,*j;
742 	i=(char*) str; j=(char*) search-1;
743 	while (j != search_end)
744 	  if (*i-- != *j--) goto skip;
745 	return (int) (i-Ptr) +1;
746       }
747     }
748   }
749   return -1;
750 }
751 
752 /*
753   Replace substring with string
754   If wrong parameter or not enough memory, do nothing
755 */
756 
replace(uint32 offset,uint32 arg_length,const String & to)757 bool String::replace(uint32 offset,uint32 arg_length,const String &to)
758 {
759   return replace(offset,arg_length,to.ptr(),to.length());
760 }
761 
replace(uint32 offset,uint32 arg_length,const char * to,uint32 to_length)762 bool String::replace(uint32 offset,uint32 arg_length,
763                      const char *to, uint32 to_length)
764 {
765   long diff = (long) to_length-(long) arg_length;
766   if (offset+arg_length <= str_length)
767   {
768     if (diff < 0)
769     {
770       if (to_length)
771 	memcpy(Ptr+offset,to,to_length);
772       bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
773 	    str_length-offset-arg_length);
774     }
775     else
776     {
777       if (diff)
778       {
779 	if (realloc_with_extra_if_needed(str_length+(uint32) diff))
780 	  return TRUE;
781 	bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
782 		  str_length-offset-arg_length);
783       }
784       if (to_length)
785 	memcpy(Ptr+offset,to,to_length);
786     }
787     str_length+=(uint32) diff;
788   }
789   return FALSE;
790 }
791 
792 
793 // added by Holyfoot for "geometry" needs
reserve(size_t space_needed,size_t grow_by)794 int String::reserve(size_t space_needed, size_t grow_by)
795 {
796   if (Alloced_length < str_length + space_needed)
797   {
798     if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1))
799       return TRUE;
800   }
801   return FALSE;
802 }
803 
qs_append(const char * str,size_t len)804 void String::qs_append(const char *str, size_t len)
805 {
806   memcpy(Ptr + str_length, str, len + 1);
807   str_length += (uint32)len;
808 }
809 
qs_append(double d)810 void String::qs_append(double d)
811 {
812   char *buff = Ptr + str_length;
813   str_length+= (uint32) my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
814                        NULL);
815 }
816 
qs_append(double * d)817 void String::qs_append(double *d)
818 {
819   double ld;
820   float8get(ld, (char*) d);
821   qs_append(ld);
822 }
823 
qs_append(int i)824 void String::qs_append(int i)
825 {
826   char *buff= Ptr + str_length;
827   char *end= int10_to_str(i, buff, -10);
828   str_length+= (int) (end-buff);
829 }
830 
qs_append(ulonglong i)831 void String::qs_append(ulonglong i)
832 {
833   char *buff= Ptr + str_length;
834   char *end= longlong10_to_str(i, buff, 10);
835   str_length+= (int) (end-buff);
836 }
837 
838 /*
839   Compare strings according to collation, without end space.
840 
841   SYNOPSIS
842     sortcmp()
843     s		First string
844     t		Second string
845     cs		Collation
846 
847   NOTE:
848     Normally this is case sensitive comparison
849 
850   RETURN
851   < 0	s < t
852   0	s == t
853   > 0	s > t
854 */
855 
856 
sortcmp(const String * s,const String * t,CHARSET_INFO * cs)857 int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
858 {
859  return cs->coll->strnncollsp(cs,
860                               (uchar *) s->ptr(),s->length(),
861                               (uchar *) t->ptr(),t->length());
862 }
863 
864 
865 /*
866   Compare strings byte by byte. End spaces are also compared.
867 
868   SYNOPSIS
869     stringcmp()
870     s		First string
871     t		Second string
872 
873   NOTE:
874     Strings are compared as a stream of uchars
875 
876   RETURN
877   < 0	s < t
878   0	s == t
879   > 0	s > t
880 */
881 
882 
stringcmp(const String * s,const String * t)883 int stringcmp(const String *s,const String *t)
884 {
885   uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len);
886   int cmp= len ? memcmp(s->ptr(), t->ptr(), len) : 0;
887   return (cmp) ? cmp : (int) (s_len - t_len);
888 }
889 
890 
891 /**
892   Return a string which has the same value with "from" and
893   which is safe to modify, trying to avoid unnecessary allocation
894   and copying when possible.
895 
896   @param to           Buffer. Must not be a constant string.
897   @param from         Some existing value. We'll try to reuse it.
898                       Can be a constant or a variable string.
899   @param from_length  The total size that will be possibly needed.
900                       Note, can be 0.
901 
902   Note, in some cases "from" and "to" can point to the same object.
903 
904   If "from" is a variable string and its allocated memory is enough
905   to store "from_length" bytes, then "from" is returned as is.
906 
907   If "from" is a variable string and its allocated memory is not enough
908   to store "from_length" bytes, then "from" is reallocated and returned.
909 
910   Otherwise (if "from" is a constant string, or looks like a constant string),
911   then "to" is reallocated to fit "from_length" bytes, the value is copied
912   from "from" to "to", then "to" is returned.
913 */
copy_if_not_alloced(String * to,String * from,uint32 from_length)914 String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
915 {
916   DBUG_ASSERT(to);
917   /*
918     If "from" is a constant string, e.g.:
919        SELECT INSERT('', <pos>, <length>, <replacement>);
920     we should not return it. See MDEV-9332.
921 
922     The code below detects different string types:
923 
924     a. All constant strings have Alloced_length==0 and alloced==false.
925        They point to a static memory array, or a mem_root memory,
926        and should stay untouched until the end of their life cycle.
927        Not safe to reuse.
928 
929     b. Some variable string have Alloced_length==0 and alloced==false initially,
930        they are not bound to any char array and allocate space on the first use
931        (and become #d). A typical example of such String is Item::str_value.
932        This type of string could be reused, but there is no a way to distinguish
933        them from the true constant strings (#a).
934        Not safe to reuse.
935 
936     c. Some variable strings have Alloced_length>0 and alloced==false.
937        They point to a fixed size writtable char array (typically on stack)
938        initially but can later allocate more space on the heap when the
939        fixed size array is too small (these strings become #d after allocation).
940        Safe to reuse.
941 
942     d. Some variable strings have Alloced_length>0 and alloced==true.
943        They already store data on the heap.
944        Safe to reuse.
945 
946     e. Some strings can have Alloced_length==0 and alloced==true.
947        This type of strings allocate space on the heap, but then are marked
948        as constant strings using String::mark_as_const().
949        A typical example - the result of a character set conversion
950        of a constant string.
951        Not safe to reuse.
952   */
953   if (from->Alloced_length > 0) // "from" is  #c or #d (not a constant)
954   {
955     if (from->Alloced_length >= from_length)
956       return from; // #c or #d (large enough to store from_length bytes)
957 
958     if (from->alloced)
959     {
960       (void) from->realloc(from_length);
961       return from; // #d (reallocated to fit from_length bytes)
962     }
963     /*
964       "from" is of type #c. It currently points to a writtable char array
965       (typically on stack), but is too small for "from_length" bytes.
966       We need to reallocate either "from" or "to".
967 
968       "from" typically points to a temporary buffer inside Item_xxx::val_str(),
969       or to Item::str_value, and thus is "less permanent" than "to".
970 
971       Reallocating "to" may give more benifits:
972       - "to" can point to a "more permanent" storage and can be reused
973         for multiple rows, e.g. str_buffer in Protocol::send_result_set_row(),
974         which is passed to val_str() for all string type rows.
975       - "from" can stay pointing to its original fixed size stack char array,
976         and thus reduce the total amount of my_alloc/my_free.
977     */
978   }
979 
980   if (from == to)
981   {
982     /*
983       Possible string types:
984       #a  not possible (constants should not be passed as "to")
985       #b  possible     (a fresh variable with no associated char buffer)
986       #c  possible     (a variable with a char buffer,
987                         in case it's smaller than fixed_length)
988       #d  not possible (handled earlier)
989       #e  not possible (constants should not be passed as "to")
990 
991       If a string of types #a or #e appears here, that means the caller made
992       something wrong. Otherwise, it's safe to reallocate and return "to".
993 
994       Note, as we can't distinguish between #a and #b for sure,
995       so we can't assert "not #a", but we can at least assert "not #e".
996     */
997     DBUG_ASSERT(!from->alloced || from->Alloced_length > 0); // Not #e
998 
999     (void) from->realloc(from_length);
1000     return from;
1001   }
1002   if (from->uses_buffer_owned_by(to))
1003   {
1004     DBUG_ASSERT(!from->alloced);
1005     DBUG_ASSERT(to->alloced);
1006     /*
1007       "from" is a constant string pointing to a fragment of alloced string "to":
1008         to=  xxxFFFyyy
1009       - FFF is the part of "to" pointed by "from"
1010       - xxx is the part of "to" before "from"
1011       - yyy is the part of "to" after "from"
1012     */
1013     uint32 xxx_length= (uint32) (from->ptr() - to->ptr());
1014     uint32 yyy_length= (uint32) (to->end() - from->end());
1015     DBUG_ASSERT(to->length() >= yyy_length);
1016     to->length(to->length() - yyy_length); // Remove the "yyy" part
1017     DBUG_ASSERT(to->length() >= xxx_length);
1018     to->replace(0, xxx_length, "", 0);     // Remove the "xxx" part
1019     to->realloc(from_length);
1020     to->str_charset= from->str_charset;
1021     return to;
1022   }
1023   if (to->realloc(from_length))
1024     return from;				// Actually an error
1025   if ((to->str_length=MY_MIN(from->str_length,from_length)))
1026     memcpy(to->Ptr,from->Ptr,to->str_length);
1027   to->str_charset=from->str_charset;
1028   return to; // "from" was of types #a, #b, #e, or small #c.
1029 }
1030 
1031 
1032 /****************************************************************************
1033   Help functions
1034 ****************************************************************************/
1035 
1036 /**
1037   Copy string with HEX-encoding of "bad" characters.
1038 
1039   @details This functions copies the string pointed by "src"
1040   to the string pointed by "dst". Not more than "srclen" bytes
1041   are read from "src". Any sequences of bytes representing
1042   a not-well-formed substring (according to cs) are hex-encoded,
1043   and all well-formed substrings (according to cs) are copied as is.
1044   Not more than "dstlen" bytes are written to "dst". The number
1045   of bytes written to "dst" is returned.
1046 
1047    @param      cs       character set pointer of the destination string
1048    @param[out] dst      destination string
1049    @param      dstlen   size of dst
1050    @param      src      source string
1051    @param      srclen   length of src
1052 
1053    @retval     result length
1054 */
1055 
1056 size_t
my_copy_with_hex_escaping(CHARSET_INFO * cs,char * dst,size_t dstlen,const char * src,size_t srclen)1057 my_copy_with_hex_escaping(CHARSET_INFO *cs,
1058                           char *dst, size_t dstlen,
1059                           const char *src, size_t srclen)
1060 {
1061   const char *srcend= src + srclen;
1062   char *dst0= dst;
1063 
1064   for ( ; src < srcend ; )
1065   {
1066     size_t chlen;
1067     if ((chlen= my_ismbchar(cs, src, srcend)))
1068     {
1069       if (dstlen < chlen)
1070         break; /* purecov: inspected */
1071       memcpy(dst, src, chlen);
1072       src+= chlen;
1073       dst+= chlen;
1074       dstlen-= chlen;
1075     }
1076     else if (*src & 0x80)
1077     {
1078       if (dstlen < 4)
1079         break; /* purecov: inspected */
1080       *dst++= '\\';
1081       *dst++= 'x';
1082       APPEND_HEX(dst, (uchar) *src);
1083       src++;
1084       dstlen-= 4;
1085     }
1086     else
1087     {
1088       if (dstlen < 1)
1089         break; /* purecov: inspected */
1090       *dst++= *src++;
1091       dstlen--;
1092     }
1093   }
1094   return dst - dst0;
1095 }
1096 
1097 
1098 /*
1099   Copy a string,
1100   with optional character set conversion,
1101   with optional left padding (for binary -> UCS2 conversion)
1102 
1103   Bad input bytes are replaced to '?'.
1104 
1105   The string that is written to "to" is always well-formed.
1106 
1107   @param to                  The destination string
1108   @param to_length           Space available in "to"
1109   @param to_cs               Character set of the "to" string
1110   @param from                The source string
1111   @param from_length         Length of the "from" string
1112   @param from_cs             Character set of the "from" string
1113   @param nchars              Copy not more than "nchars" characters
1114 
1115   The members as set as follows:
1116   m_well_formed_error_pos    To the position when "from" is not well formed
1117                              or NULL otherwise.
1118   m_cannot_convert_error_pos To the position where a not convertable
1119                              character met, or NULL otherwise.
1120   m_source_end_pos           To the position where scanning of the "from"
1121                              string stopped.
1122 
1123   @returns                   number of bytes that were written to 'to'
1124 */
1125 uint
well_formed_copy(CHARSET_INFO * to_cs,char * to,size_t to_length,CHARSET_INFO * from_cs,const char * from,size_t from_length,size_t nchars)1126 String_copier::well_formed_copy(CHARSET_INFO *to_cs,
1127                                 char *to, size_t to_length,
1128                                 CHARSET_INFO *from_cs,
1129                                 const char *from, size_t from_length, size_t nchars)
1130 {
1131   if ((to_cs == &my_charset_bin) ||
1132       (from_cs == &my_charset_bin) ||
1133       (to_cs == from_cs) ||
1134       my_charset_same(from_cs, to_cs))
1135   {
1136     m_cannot_convert_error_pos= NULL;
1137     return (uint) to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
1138                                  nchars, this);
1139   }
1140   return (uint) my_convert_fix(to_cs, to, to_length, from_cs, from, from_length,
1141                         nchars, this, this);
1142 }
1143 
1144 
1145 
1146 /*
1147   Append characters to a single-quoted string '...', escaping special
1148   characters with backslashes as necessary.
1149   Does not add the enclosing quotes, this is left up to caller.
1150 */
1151 #define APPEND(X)   if (append(X)) return 1; else break
append_for_single_quote(const char * st,size_t len)1152 bool String::append_for_single_quote(const char *st, size_t len)
1153 {
1154   const char *end= st+len;
1155   for (; st < end; st++)
1156   {
1157     uchar c= *st;
1158     switch (c)
1159     {
1160     case '\\':   APPEND(STRING_WITH_LEN("\\\\"));
1161     case '\0':   APPEND(STRING_WITH_LEN("\\0"));
1162     case '\'':   APPEND(STRING_WITH_LEN("\\'"));
1163     case '\n':   APPEND(STRING_WITH_LEN("\\n"));
1164     case '\r':   APPEND(STRING_WITH_LEN("\\r"));
1165     case '\032': APPEND(STRING_WITH_LEN("\\Z"));
1166     default:     APPEND(c);
1167     }
1168   }
1169   return 0;
1170 }
1171 
print(String * str) const1172 void String::print(String *str) const
1173 {
1174   str->append_for_single_quote(Ptr, str_length);
1175 }
1176 
1177 
print_with_conversion(String * print,CHARSET_INFO * cs) const1178 void String::print_with_conversion(String *print, CHARSET_INFO *cs) const
1179 {
1180   StringBuffer<256> tmp(cs);
1181   uint errors= 0;
1182   tmp.copy(this, cs, &errors);
1183   tmp.print(print);
1184 }
1185 
1186 
1187 /*
1188   Exchange state of this object and argument.
1189 
1190   SYNOPSIS
1191     String::swap()
1192 
1193   RETURN
1194     Target string will contain state of this object and vice versa.
1195 */
1196 
swap(String & s)1197 void String::swap(String &s)
1198 {
1199   swap_variables(char *, Ptr, s.Ptr);
1200   swap_variables(uint32, str_length, s.str_length);
1201   swap_variables(uint32, Alloced_length, s.Alloced_length);
1202   swap_variables(bool, alloced, s.alloced);
1203   swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
1204 }
1205 
1206 
1207 /**
1208   Convert string to printable ASCII string
1209 
1210   @details This function converts input string "from" replacing non-ASCII bytes
1211   with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1212   the resulting string.
1213   This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1214   e.g. when a string cannot be converted to a result charset.
1215 
1216 
1217   @param    to          output buffer
1218   @param    to_len      size of the output buffer (8 bytes or greater)
1219   @param    from        input string
1220   @param    from_len    size of the input string
1221   @param    from_cs     input charset
1222   @param    nbytes      maximal number of bytes to convert (from_len if 0)
1223 
1224   @return   number of bytes in the output string
1225 */
1226 
convert_to_printable(char * to,size_t to_len,const char * from,size_t from_len,CHARSET_INFO * from_cs,size_t nbytes)1227 uint convert_to_printable(char *to, size_t to_len,
1228                           const char *from, size_t from_len,
1229                           CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1230 {
1231   /* needs at least 8 bytes for '\xXX...' and zero byte */
1232   DBUG_ASSERT(to_len >= 8);
1233 
1234   char *t= to;
1235   char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1236   const char *f= from;
1237   const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len);
1238   char *dots= to; // last safe place to append '...'
1239 
1240   if (!f || t == t_end)
1241     return 0;
1242 
1243   for (; t < t_end && f < f_end; f++)
1244   {
1245     /*
1246       If the source string is ASCII compatible (mbminlen==1)
1247       and the source character is in ASCII printable range (0x20..0x7F),
1248       then display the character as is.
1249 
1250       Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1251       or the source character is not in the printable range,
1252       then print the character using HEX notation.
1253     */
1254     if (((unsigned char) *f) >= 0x20 &&
1255         ((unsigned char) *f) <= 0x7F &&
1256         from_cs->mbminlen == 1)
1257     {
1258       *t++= *f;
1259     }
1260     else
1261     {
1262       if (t_end - t < 4) // \xXX
1263         break;
1264       *t++= '\\';
1265       *t++= 'x';
1266       APPEND_HEX(t, *f);
1267     }
1268     if (t_end - t >= 3) // '...'
1269       dots= t;
1270   }
1271   if (f < from + from_len)
1272     memcpy(dots, STRING_WITH_LEN("...\0"));
1273   else
1274     *t= '\0';
1275   return (uint) (t - to);
1276 }
1277