1 #ifndef SQL_STRING_INCLUDED
2 #define SQL_STRING_INCLUDED
3 
4 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights
5  * reserved.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License, version 2.0,
9    as published by the Free Software Foundation.
10 
11    This program is also distributed with certain software (including
12    but not limited to OpenSSL) that is licensed under separate terms,
13    as designated in a particular file or component or in included license
14    documentation.  The authors of MySQL hereby grant you an additional
15    permission to link the program and your derivative works with the
16    separately licensed software that they have included with MySQL.
17 
18    Without limiting anything contained in the foregoing, this file,
19    which is part of C Driver for MySQL (Connector/C), is also subject to the
20    Universal FOSS Exception, version 1.0, a copy of which can be found at
21    http://oss.oracle.com/licenses/universal-foss-exception.
22 
23    This program is distributed in the hope that it will be useful,
24    but WITHOUT ANY WARRANTY; without even the implied warranty of
25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26    GNU General Public License, version 2.0, for more details.
27 
28    You should have received a copy of the GNU General Public License
29    along with this program; if not, write to the Free Software
30    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
31 
32 /* This file is originally from the mysql distribution. Coded by monty */
33 
34 #include "m_ctype.h"                            /* my_charset_bin */
35 #include "my_sys.h"              /* alloc_root, my_free, my_realloc */
36 #include "m_string.h"                           /* TRASH */
37 
38 
39 /**
40   A wrapper class for null-terminated constant strings.
41   Constructors make sure that the position of the '\0' terminating byte
42   in m_str is always in sync with m_length.
43 
44   This class must stay as small as possible as we often
45   pass it and its descendants (such as Name_string) into functions
46   using call-by-value evaluation.
47 
48   Don't add new members or virual methods into this class!
49 */
50 class Simple_cstring
51 {
52 private:
53   const char *m_str;
54   size_t m_length;
55 protected:
56   /**
57     Initialize from a C string whose length is already known.
58   */
set(const char * str_arg,size_t length_arg)59   void set(const char *str_arg, size_t length_arg)
60   {
61     // NULL is allowed only with length==0
62     DBUG_ASSERT(str_arg || length_arg == 0);
63     // For non-NULL, make sure length_arg is in sync with '\0' terminator.
64     DBUG_ASSERT(!str_arg || str_arg[length_arg] == '\0');
65     m_str= str_arg;
66     m_length= length_arg;
67   }
68 public:
Simple_cstring()69   Simple_cstring()
70   {
71     set(NULL, 0);
72   }
Simple_cstring(const char * str_arg,size_t length_arg)73   Simple_cstring(const char *str_arg, size_t length_arg)
74   {
75     set(str_arg, length_arg);
76   }
Simple_cstring(const LEX_STRING arg)77   Simple_cstring(const LEX_STRING arg)
78   {
79     set(arg.str, arg.length);
80   }
reset()81   void reset()
82   {
83     set(NULL, 0);
84   }
85   /**
86     Set to a null-terminated string.
87   */
set(const char * str)88   void set(const char *str)
89   {
90     set(str, str ? strlen(str) : 0);
91   }
92   /**
93     Return string buffer.
94   */
ptr()95   const char *ptr() const { return m_str; }
96   /**
97     Check if m_ptr is set.
98   */
is_set()99   bool is_set() const { return m_str != NULL; }
100   /**
101     Return name length.
102   */
length()103   size_t length() const { return m_length; }
104   /**
105     Compare to another Simple_cstring.
106   */
eq_bin(const Simple_cstring other)107   bool eq_bin(const Simple_cstring other) const
108   {
109     return m_length == other.m_length &&
110            memcmp(m_str, other.m_str, m_length) == 0;
111   }
112   /**
113     Copy to the given buffer
114   */
strcpy(char * buff)115   void strcpy(char *buff) const
116   {
117     memcpy(buff, m_str, m_length);
118     buff[m_length]= '\0';
119   }
120 };
121 
122 
123 class String;
124 typedef struct charset_info_st CHARSET_INFO;
125 typedef struct st_io_cache IO_CACHE;
126 typedef struct st_mem_root MEM_ROOT;
127 
128 int sortcmp(const String *a,const String *b, const CHARSET_INFO *cs);
129 String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
copy_and_convert(char * to,uint32 to_length,const CHARSET_INFO * to_cs,const char * from,uint32 from_length,const CHARSET_INFO * from_cs,uint * errors)130 inline uint32 copy_and_convert(char *to, uint32 to_length,
131                                const CHARSET_INFO *to_cs,
132                                const char *from, uint32 from_length,
133                                const CHARSET_INFO *from_cs, uint *errors)
134 {
135   return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
136 }
137 uint32 well_formed_copy_nchars(const CHARSET_INFO *to_cs,
138                                char *to, uint to_length,
139                                const CHARSET_INFO *from_cs,
140                                const char *from, uint from_length,
141                                uint nchars,
142                                const char **well_formed_error_pos,
143                                const char **cannot_convert_error_pos,
144                                const char **from_end_pos);
145 size_t my_copy_with_hex_escaping(const CHARSET_INFO *cs,
146                                  char *dst, size_t dstlen,
147                                  const char *src, size_t srclen);
148 uint convert_to_printable(char *to, size_t to_len,
149                           const char *from, size_t from_len,
150                           const CHARSET_INFO *from_cs, size_t nbytes= 0);
151 
152 class String
153 {
154   char *Ptr;
155   uint32 str_length,Alloced_length;
156   bool alloced;
157   const CHARSET_INFO *str_charset;
158 public:
String()159   String()
160   {
161     Ptr=0; str_length=Alloced_length=0; alloced=0;
162     str_charset= &my_charset_bin;
163   }
String(uint32 length_arg)164   String(uint32 length_arg)
165   {
166     alloced=0; Alloced_length=0; (void) real_alloc(length_arg);
167     str_charset= &my_charset_bin;
168   }
String(const char * str,const CHARSET_INFO * cs)169   String(const char *str, const CHARSET_INFO *cs)
170   {
171     Ptr=(char*) str; str_length=(uint) strlen(str); Alloced_length=0; alloced=0;
172     str_charset=cs;
173   }
String(const char * str,uint32 len,const CHARSET_INFO * cs)174   String(const char *str,uint32 len, const CHARSET_INFO *cs)
175   {
176     Ptr=(char*) str; str_length=len; Alloced_length=0; alloced=0;
177     str_charset=cs;
178   }
String(char * str,uint32 len,const CHARSET_INFO * cs)179   String(char *str,uint32 len, const CHARSET_INFO *cs)
180   {
181     Ptr=(char*) str; Alloced_length=str_length=len; alloced=0;
182     str_charset=cs;
183   }
String(const String & str)184   String(const String &str)
185   {
186     Ptr=str.Ptr ; str_length=str.str_length ;
187     Alloced_length=str.Alloced_length; alloced=0;
188     str_charset=str.str_charset;
189   }
new(size_t size,MEM_ROOT * mem_root)190   static void *operator new(size_t size, MEM_ROOT *mem_root) throw ()
191   { return (void*) alloc_root(mem_root, (uint) size); }
delete(void * ptr_arg,size_t size)192   static void operator delete(void *ptr_arg, size_t size)
193   {
194     (void) ptr_arg;
195     (void) size;
196     TRASH(ptr_arg, size);
197   }
delete(void *,MEM_ROOT *)198   static void operator delete(void *, MEM_ROOT *)
199   { /* never called */ }
~String()200   ~String() { free(); }
201 
set_charset(const CHARSET_INFO * charset_arg)202   inline void set_charset(const CHARSET_INFO *charset_arg)
203   { str_charset= charset_arg; }
charset()204   inline const CHARSET_INFO *charset() const { return str_charset; }
length()205   inline uint32 length() const { return str_length;}
alloced_length()206   inline uint32 alloced_length() const { return Alloced_length;}
207   inline char& operator [] (uint32 i) const { return Ptr[i]; }
length(uint32 len)208   inline void length(uint32 len) { str_length=len ; }
is_empty()209   inline bool is_empty() const { return (str_length == 0); }
mark_as_const()210   inline void mark_as_const() { Alloced_length= 0;}
ptr()211   inline const char *ptr() const { return Ptr; }
c_ptr()212   inline char *c_ptr()
213   {
214     DBUG_ASSERT(!alloced || !Ptr || !Alloced_length ||
215                 (Alloced_length >= (str_length + 1)));
216 
217     if (!Ptr || Ptr[str_length])		/* Should be safe */
218       (void) realloc(str_length);
219     return Ptr;
220   }
c_ptr_quick()221   inline char *c_ptr_quick()
222   {
223     if (Ptr && str_length < Alloced_length)
224       Ptr[str_length]=0;
225     return Ptr;
226   }
c_ptr_safe()227   inline char *c_ptr_safe()
228   {
229     if (Ptr && str_length < Alloced_length)
230       Ptr[str_length]=0;
231     else
232       (void) realloc(str_length);
233     return Ptr;
234   }
lex_string()235   LEX_STRING lex_string() const
236   {
237     LEX_STRING lex_string = { (char*) ptr(), length() };
238     return lex_string;
239   }
240 
set(String & str,uint32 offset,uint32 arg_length)241   void set(String &str,uint32 offset,uint32 arg_length)
242   {
243     DBUG_ASSERT(&str != this);
244     free();
245     Ptr=(char*) str.ptr()+offset; str_length=arg_length; alloced=0;
246     if (str.Alloced_length)
247       Alloced_length=str.Alloced_length-offset;
248     else
249       Alloced_length=0;
250     str_charset=str.str_charset;
251   }
252 
253 
254   /**
255      Points the internal buffer to the supplied one. The old buffer is freed.
256      @param str Pointer to the new buffer.
257      @param arg_length Length of the new buffer in characters, excluding any
258             null character.
259      @param cs Character set to use for interpreting string data.
260      @note The new buffer will not be null terminated.
261   */
set(char * str,uint32 arg_length,const CHARSET_INFO * cs)262   inline void set(char *str,uint32 arg_length, const CHARSET_INFO *cs)
263   {
264     free();
265     Ptr=(char*) str; str_length=Alloced_length=arg_length ; alloced=0;
266     str_charset=cs;
267   }
set(const char * str,uint32 arg_length,const CHARSET_INFO * cs)268   inline void set(const char *str,uint32 arg_length, const CHARSET_INFO *cs)
269   {
270     free();
271     Ptr=(char*) str; str_length=arg_length; Alloced_length=0 ; alloced=0;
272     str_charset=cs;
273   }
274   bool set_ascii(const char *str, uint32 arg_length);
set_quick(char * str,uint32 arg_length,const CHARSET_INFO * cs)275   inline void set_quick(char *str,uint32 arg_length, const CHARSET_INFO *cs)
276   {
277     if (!alloced)
278     {
279       Ptr=(char*) str; str_length=Alloced_length=arg_length;
280     }
281     str_charset=cs;
282   }
283   bool set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs);
set(longlong num,const CHARSET_INFO * cs)284   bool set(longlong num, const CHARSET_INFO *cs)
285   { return set_int(num, false, cs); }
set(ulonglong num,const CHARSET_INFO * cs)286   bool set(ulonglong num, const CHARSET_INFO *cs)
287   { return set_int((longlong)num, true, cs); }
288   bool set_real(double num,uint decimals, const CHARSET_INFO *cs);
289 
290   /*
291     PMG 2004.11.12
292     This is a method that works the same as perl's "chop". It simply
293     drops the last character of a string. This is useful in the case
294     of the federated storage handler where I'm building a unknown
295     number, list of values and fields to be used in a sql insert
296     statement to be run on the remote server, and have a comma after each.
297     When the list is complete, I "chop" off the trailing comma
298 
299     ex.
300       String stringobj;
301       stringobj.append("VALUES ('foo', 'fi', 'fo',");
302       stringobj.chop();
303       stringobj.append(")");
304 
305     In this case, the value of string was:
306 
307     VALUES ('foo', 'fi', 'fo',
308     VALUES ('foo', 'fi', 'fo'
309     VALUES ('foo', 'fi', 'fo')
310 
311   */
chop()312   inline void chop()
313   {
314     str_length--;
315     Ptr[str_length]= '\0';
316     DBUG_ASSERT(strlen(Ptr) == str_length);
317   }
318 
free()319   inline void free()
320   {
321     if (alloced)
322     {
323       alloced=0;
324       Alloced_length=0;
325       my_free(Ptr);
326       Ptr=0;
327       str_length=0;				/* Safety */
328     }
329   }
alloc(uint32 arg_length)330   inline bool alloc(uint32 arg_length)
331   {
332     if (arg_length < Alloced_length)
333       return 0;
334     return real_alloc(arg_length);
335   }
336   bool real_alloc(uint32 arg_length);			// Empties old string
337   bool realloc(uint32 arg_length, bool force_on_heap= false);
338 
339   // Shrink the buffer, but only if it is allocated on the heap.
shrink(uint32 arg_length)340   inline void shrink(uint32 arg_length)
341   {
342     if (!is_alloced())
343       return;
344     if (arg_length < Alloced_length)
345     {
346       char *new_ptr;
347       if (!(new_ptr=(char*) my_realloc(Ptr,arg_length,MYF(0))))
348       {
349 	Alloced_length = 0;
350 	real_alloc(arg_length);
351       }
352       else
353       {
354 	Ptr=new_ptr;
355 	Alloced_length=arg_length;
356       }
357     }
358   }
is_alloced()359   bool is_alloced() const { return alloced; }
360   inline String& operator = (const String &s)
361   {
362     if (&s != this)
363     {
364       /*
365         It is forbidden to do assignments like
366         some_string = substring_of_that_string
367        */
368       DBUG_ASSERT(!s.uses_buffer_owned_by(this));
369       free();
370       Ptr=s.Ptr ; str_length=s.str_length ; Alloced_length=s.Alloced_length;
371       str_charset=s.str_charset;
372       alloced=0;
373     }
374     return *this;
375   }
376   /**
377     Takeover the buffer owned by another string.
378     "this" becames the owner of the buffer and
379     is further responsible to free it.
380     The string "s" is detouched from the buffer (cleared).
381 
382     @param s - a String object to steal buffer from.
383   */
takeover(String & s)384   inline void takeover(String &s)
385   {
386     DBUG_ASSERT(this != &s);
387     // Make sure buffers of the two Strings do not overlap
388     DBUG_ASSERT(!s.uses_buffer_owned_by(this));
389     free();
390     Ptr= s.Ptr;
391     str_length= s.str_length;
392     Alloced_length= s.Alloced_length;
393     alloced= s.alloced;
394     str_charset= s.str_charset;
395     s.Ptr= NULL;
396     s.Alloced_length= 0;
397     s.str_length= 0;
398     s.alloced= 0;
399   }
400 
401   bool copy();					// Alloc string if not alloced
402   bool copy(const String &s);			// Allocate new string
403   // Allocate new string
404   bool copy(const char *s,uint32 arg_length, const CHARSET_INFO *cs);
405   static bool needs_conversion(uint32 arg_length,
406   			       const CHARSET_INFO *cs_from, const CHARSET_INFO *cs_to,
407 			       uint32 *offset);
408   static bool needs_conversion_on_storage(uint32 arg_length,
409                                           const CHARSET_INFO *cs_from,
410                                           const CHARSET_INFO *cs_to);
411   bool copy_aligned(const char *s, uint32 arg_length, uint32 offset,
412 		    const CHARSET_INFO *cs);
413   bool set_or_copy_aligned(const char *s, uint32 arg_length,
414                            const CHARSET_INFO *cs);
415   bool copy(const char*s,uint32 arg_length, const CHARSET_INFO *csfrom,
416 	    const CHARSET_INFO *csto, uint *errors);
417   bool append(const String &s);
418   bool append(const char *s);
append(LEX_STRING * ls)419   bool append(LEX_STRING *ls)
420   {
421     return append(ls->str, (uint32) ls->length);
422   }
append(Simple_cstring str)423   bool append(Simple_cstring str)
424   {
425     return append(str.ptr(), static_cast<uint>(str.length()));
426   }
427   bool append(const char *s, uint32 arg_length);
428   bool append(const char *s, uint32 arg_length, const CHARSET_INFO *cs);
429   bool append_ulonglong(ulonglong val);
430   bool append(IO_CACHE* file, uint32 arg_length);
431   bool append_with_prefill(const char *s, uint32 arg_length,
432 			   uint32 full_length, char fill_char);
433   bool append_parenthesized(long nr, int radix= 10);
434   int strstr(const String &search,uint32 offset=0); // Returns offset to substring or -1
435   int strrstr(const String &search,uint32 offset=0); // Returns offset to substring or -1
436   bool replace(uint32 offset,uint32 arg_length,const char *to,uint32 length);
437   bool replace(uint32 offset,uint32 arg_length,const String &to);
append(char chr)438   inline bool append(char chr)
439   {
440     if (str_length < Alloced_length)
441     {
442       Ptr[str_length++]=chr;
443     }
444     else
445     {
446       if (realloc(str_length+1))
447 	return 1;
448       Ptr[str_length++]=chr;
449     }
450     return 0;
451   }
452   bool fill(uint32 max_length,char fill);
453   void strip_sp();
454   friend int sortcmp(const String *a,const String *b, const CHARSET_INFO *cs);
455   friend int stringcmp(const String *a,const String *b);
456   friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
457   uint32 numchars() const;
458   int charpos(int i,uint32 offset=0);
459 
reserve(uint32 space_needed)460   int reserve(uint32 space_needed)
461   {
462     return realloc(str_length + space_needed);
463   }
464   int reserve(uint32 space_needed, uint32 grow_by);
465   /*
466     The following append operations do NOT check alloced memory
467     q_*** methods writes values of parameters itself
468     qs_*** methods writes string representation of value
469   */
q_append(const char c)470   void q_append(const char c)
471   {
472     Ptr[str_length++] = c;
473   }
q_append(const uint32 n)474   void q_append(const uint32 n)
475   {
476     int4store(Ptr + str_length, n);
477     str_length += 4;
478   }
q_append(double d)479   void q_append(double d)
480   {
481     float8store(Ptr + str_length, d);
482     str_length += 8;
483   }
q_append(double * d)484   void q_append(double *d)
485   {
486     float8store(Ptr + str_length, *d);
487     str_length += 8;
488   }
q_append(const char * data,uint32 data_len)489   void q_append(const char *data, uint32 data_len)
490   {
491     memcpy(Ptr + str_length, data, data_len);
492     str_length += data_len;
493   }
494 
write_at_position(int position,uint32 value)495   void write_at_position(int position, uint32 value)
496   {
497     int4store(Ptr + position,value);
498   }
499 
500   void qs_append(const char *str, uint32 len);
501   void qs_append(double d);
502   void qs_append(double *d);
qs_append(const char c)503   inline void qs_append(const char c)
504   {
505      Ptr[str_length]= c;
506      str_length++;
507   }
508   void qs_append(int i);
509   void qs_append(uint i);
510 
511   /* Inline (general) functions used by the protocol functions */
512 
prep_append(uint32 arg_length,uint32 step_alloc)513   inline char *prep_append(uint32 arg_length, uint32 step_alloc)
514   {
515     uint32 new_length= arg_length + str_length;
516     if (new_length > Alloced_length)
517     {
518       if (realloc(new_length + step_alloc))
519         return 0;
520     }
521     uint32 old_length= str_length;
522     str_length+= arg_length;
523     return Ptr+ old_length;			/* Area to use */
524   }
525 
append(const char * s,uint32 arg_length,uint32 step_alloc)526   inline bool append(const char *s, uint32 arg_length, uint32 step_alloc)
527   {
528     uint32 new_length= arg_length + str_length;
529     if (new_length > Alloced_length && realloc(new_length + step_alloc))
530       return TRUE;
531     memcpy(Ptr+str_length, s, arg_length);
532     str_length+= arg_length;
533     return FALSE;
534   }
535   void print(String *print);
536 
537   /* Swap two string objects. Efficient way to exchange data without memcpy. */
538   void swap(String &s);
539 
uses_buffer_owned_by(const String * s)540   inline bool uses_buffer_owned_by(const String *s) const
541   {
542     return (s->alloced && Ptr >= s->Ptr && Ptr < s->Ptr + s->str_length);
543   }
is_ascii()544   bool is_ascii() const
545   {
546     if (length() == 0)
547       return TRUE;
548     if (charset()->mbminlen > 1)
549       return FALSE;
550     for (const char *c= ptr(), *end= c + length(); c < end; c++)
551     {
552       if (!my_isascii(*c))
553         return FALSE;
554     }
555     return TRUE;
556   }
557   /**
558     Make a zero-terminated copy of our value,allocated in the specified MEM_ROOT
559 
560     @param root         MEM_ROOT to allocate the result
561 
562     @return allocated string or NULL
563   */
dup(MEM_ROOT * root)564   char *dup(MEM_ROOT *root) const
565   {
566     if (str_length > 0 && Ptr[str_length - 1] == 0)
567       return static_cast<char *>(memdup_root(root, Ptr, str_length));
568 
569     char *ret= static_cast<char*>(alloc_root(root, str_length + 1));
570     if (ret != NULL)
571     {
572       memcpy(ret, Ptr, str_length);
573       ret[str_length]= 0;
574     }
575     return ret;
576   }
577 };
578 
579 
580 /**
581   String class wrapper with a preallocated buffer of size buff_sz
582 
583   This class allows to replace sequences of:
584      char buff[12345];
585      String str(buff, sizeof(buff));
586      str.length(0);
587   with a simple equivalent declaration:
588      StringBuffer<12345> str;
589 */
590 
591 template<size_t buff_sz>
592 class StringBuffer : public String
593 {
594   char buff[buff_sz];
595 
596 public:
StringBuffer()597   StringBuffer() : String(buff, buff_sz, &my_charset_bin) { length(0); }
StringBuffer(const CHARSET_INFO * cs)598   explicit StringBuffer(const CHARSET_INFO *cs) : String(buff, buff_sz, cs)
599   {
600     length(0);
601   }
StringBuffer(const char * str,size_t length,const CHARSET_INFO * cs)602   StringBuffer(const char *str, size_t length, const CHARSET_INFO *cs)
603     : String(buff, buff_sz, cs)
604   {
605     set(str, length, cs);
606   }
607 };
608 
609 
check_if_only_end_space(const CHARSET_INFO * cs,char * str,char * end)610 static inline bool check_if_only_end_space(const CHARSET_INFO *cs, char *str,
611                                            char *end)
612 {
613   return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
614 }
615 
616 bool
617 validate_string(const CHARSET_INFO *cs, const char *str, uint32 length,
618                 size_t *valid_length, bool *length_error);
619 #endif /* SQL_STRING_INCLUDED */
620