1 /* Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /* Some useful string utility functions used by the MySQL server */
24 
25 #include "strfunc.h"
26 #include "sql_class.h"
27 #include "typelib.h"                            // TYPELIB
28 #include "m_ctype.h"                            // my_charset_latin1
29 #include "mysqld.h"                             // system_charset_info
30 
31 /*
32   Return bitmap for strings used in a set
33 
34   SYNOPSIS
35   find_set()
36   lib			Strings in set
37   str			Strings of set-strings separated by ','
38   err_pos		If error, set to point to start of wrong set string
39   err_len		If error, set to the length of wrong set string
40   set_warning		Set to 1 if some string in set couldn't be used
41 
42   NOTE
43     We delete all end space from str before comparison
44 
45   RETURN
46     bitmap of all sets found in x.
47     set_warning is set to 1 if there was any sets that couldn't be set
48 */
49 
50 static const char field_separator=',';
51 
find_set(TYPELIB * lib,const char * str,size_t length,const CHARSET_INFO * cs,char ** err_pos,uint * err_len,bool * set_warning)52 ulonglong find_set(TYPELIB *lib, const char *str, size_t length,
53                    const CHARSET_INFO *cs,
54                    char **err_pos, uint *err_len, bool *set_warning)
55 {
56   const CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
57   const char *end= str + strip->cset->lengthsp(strip, str, length);
58   ulonglong found= 0;
59   *err_pos= 0;                  // No error yet
60   *err_len= 0;
61   if (str != end)
62   {
63     const char *start= str;
64     for (;;)
65     {
66       const char *pos= start;
67       uint var_len;
68       int mblen= 1;
69 
70       if (cs && cs->mbminlen > 1)
71       {
72         for ( ; pos < end; pos+= mblen)
73         {
74           my_wc_t wc;
75           if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
76                                                (const uchar *) end)) < 1)
77             mblen= 1; // Not to hang on a wrong multibyte sequence
78           if (wc == (my_wc_t) field_separator)
79             break;
80         }
81       }
82       else
83         for (; pos != end && *pos != field_separator; pos++) ;
84       var_len= (uint) (pos - start);
85       uint find= cs ? find_type2(lib, start, var_len, cs) :
86                       find_type(lib, start, var_len, (bool) 0);
87       if (!find && *err_len == 0) // report the first error with length > 0
88       {
89         *err_pos= (char*) start;
90         *err_len= var_len;
91         *set_warning= 1;
92       }
93       else
94         found|= 1ULL << (find - 1);
95       if (pos >= end)
96         break;
97       start= pos + mblen;
98     }
99   }
100   return found;
101 }
102 
103 /*
104   Function to find a string in a TYPELIB
105   (similar to find_type() of mysys/typelib.c)
106 
107   SYNOPSIS
108    find_type()
109    lib			TYPELIB (struct of pointer to values + count)
110    find			String to find
111    length		Length of string to find
112    part_match		Allow part matching of value
113 
114  RETURN
115   0 error
116   > 0 position in TYPELIB->type_names +1
117 */
118 
find_type(const TYPELIB * lib,const char * find,size_t length,bool part_match)119 uint find_type(const TYPELIB *lib, const char *find, size_t length,
120                bool part_match)
121 {
122   uint found_count=0, found_pos=0;
123   const char *end= find+length;
124   const char *i;
125   const char *j;
126   for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
127   {
128     for (i=find ; i != end &&
129 	   my_toupper(system_charset_info,*i) ==
130 	   my_toupper(system_charset_info,*j) ; i++, j++) ;
131     if (i == end)
132     {
133       if (! *j)
134 	return(pos);
135       found_count++;
136       found_pos= pos;
137     }
138   }
139   return(found_count == 1 && part_match ? found_pos : 0);
140 }
141 
142 
143 /*
144   Find a string in a list of strings according to collation
145 
146   SYNOPSIS
147    find_type2()
148    lib			TYPELIB (struct of pointer to values + count)
149    x			String to find
150    length               String length
151    cs			Character set + collation to use for comparison
152 
153   NOTES
154 
155   RETURN
156     0	No matching value
157     >0  Offset+1 in typelib for matched string
158 */
159 
find_type2(const TYPELIB * typelib,const char * x,size_t length,const CHARSET_INFO * cs)160 uint find_type2(const TYPELIB *typelib, const char *x, size_t length,
161                 const CHARSET_INFO *cs)
162 {
163   int pos;
164   const char *j;
165   DBUG_ENTER("find_type2");
166   DBUG_PRINT("enter",("x: '%.*s'  lib: 0x%p",
167                       static_cast<int>(length), x, typelib));
168 
169   if (!typelib->count)
170   {
171     DBUG_PRINT("exit",("no count"));
172     DBUG_RETURN(0);
173   }
174 
175   for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
176   {
177     if (!my_strnncoll(cs, (const uchar*) x, length,
178                           (const uchar*) j, typelib->type_lengths[pos]))
179       DBUG_RETURN(pos+1);
180   }
181   DBUG_PRINT("exit",("Couldn't find type"));
182   DBUG_RETURN(0);
183 } /* find_type */
184 
185 
186 /*
187   Un-hex all elements in a typelib
188 
189   SYNOPSIS
190    unhex_type2()
191    interval       TYPELIB (struct of pointer to values + lengths + count)
192 
193   NOTES
194 
195   RETURN
196     N/A
197 */
198 
unhex_type2(TYPELIB * interval)199 void unhex_type2(TYPELIB *interval)
200 {
201   for (uint pos= 0; pos < interval->count; pos++)
202   {
203     char *from, *to;
204     for (from= to= (char*) interval->type_names[pos]; *from; )
205     {
206       /*
207         Note, hexchar_to_int(*from++) doesn't work
208         one some compilers, e.g. IRIX. Looks like a compiler
209         bug in inline functions in combination with arguments
210         that have a side effect. So, let's use from[0] and from[1]
211         and increment 'from' by two later.
212       */
213 
214       *to++= (char) (hexchar_to_int(from[0]) << 4) +
215                      hexchar_to_int(from[1]);
216       from+= 2;
217     }
218     interval->type_lengths[pos] /= 2;
219   }
220 }
221 
222 
223 /*
224   Check if the first word in a string is one of the ones in TYPELIB
225 
226   SYNOPSIS
227     check_word()
228     lib		TYPELIB
229     val		String to check
230     end		End of input
231     end_of_word	Store value of last used byte here if we found word
232 
233   RETURN
234     0	 No matching value
235     > 1  lib->type_names[#-1] matched
236 	 end_of_word will point to separator character/end in 'val'
237 */
238 
check_word(TYPELIB * lib,const char * val,const char * end,const char ** end_of_word)239 uint check_word(TYPELIB *lib, const char *val, const char *end,
240 		const char **end_of_word)
241 {
242   int res;
243   const char *ptr;
244 
245   /* Fiend end of word */
246   for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
247     ;
248   if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
249     *end_of_word= ptr;
250   return res;
251 }
252 
253 
254 /*
255   Converts a string between character sets
256 
257   SYNOPSIS
258     strconvert()
259     from_cs       source character set
260     from          source, a null terminated string
261     to            destination buffer
262     to_length     destination buffer length
263 
264   NOTES
265     'to' is always terminated with a '\0' character.
266     If there is no enough space to convert whole string,
267     only prefix is converted, and terminated with '\0'.
268 
269   RETURN VALUES
270     result string length
271 */
272 
273 
strconvert(CHARSET_INFO * from_cs,const char * from,CHARSET_INFO * to_cs,char * to,size_t to_length,uint * errors)274 size_t strconvert(CHARSET_INFO *from_cs, const char *from,
275                   CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors)
276 {
277   int cnvres;
278   my_wc_t wc;
279   char *to_start= to;
280   uchar *to_end= (uchar*) to + to_length - 1;
281   my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
282   my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
283   uint error_count= 0;
284 
285   while (1)
286   {
287     /*
288       Using 'from + 10' is safe:
289       - it is enough to scan a single character in any character set.
290       - if remaining string is shorter than 10, then mb_wc will return
291         with error because of unexpected '\0' character.
292     */
293     if ((cnvres= (*mb_wc)(from_cs, &wc,
294                           (uchar*) from, (uchar*) from + 10)) > 0)
295     {
296       if (!wc)
297         break;
298       from+= cnvres;
299     }
300     else if (cnvres == MY_CS_ILSEQ)
301     {
302       error_count++;
303       from++;
304       wc= '?';
305     }
306     else
307       break; // Impossible char.
308 
309 outp:
310 
311     if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
312       to+= cnvres;
313     else if (cnvres == MY_CS_ILUNI && wc != '?')
314     {
315       error_count++;
316       wc= '?';
317       goto outp;
318     }
319     else
320       break;
321   }
322   *to= '\0';
323   *errors= error_count;
324   return static_cast<size_t>(to - to_start);
325 
326 }
327 
328 
329 /*
330   Searches for a LEX_STRING in an LEX_STRING array.
331 
332   SYNOPSIS
333     find_string_in_array()
334       heap    The array
335       needle  The string to search for
336 
337   NOTE
338     The last LEX_STRING in the array should have str member set to NULL
339 
340   RETURN VALUES
341     -1   Not found
342     >=0  Ordinal position
343 */
344 
find_string_in_array(LEX_STRING * const haystack,LEX_STRING * const needle,CHARSET_INFO * const cs)345 int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
346                          CHARSET_INFO * const cs)
347 {
348   const LEX_STRING *pos;
349   for (pos= haystack; pos->str; pos++)
350     if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
351                                (uchar *) needle->str, needle->length, 0))
352     {
353       return static_cast<int>(pos - haystack);
354     }
355   return -1;
356 }
357 
358 
set_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])359 char *set_to_string(THD *thd, LEX_STRING *result, ulonglong set,
360                     const char *lib[])
361 {
362   char buff[STRING_BUFFER_USUAL_SIZE*8];
363   String tmp(buff, sizeof(buff), &my_charset_latin1);
364   LEX_STRING unused;
365 
366   if (!result)
367     result= &unused;
368 
369   tmp.length(0);
370 
371   for (uint i= 0; set; i++, set >>= 1)
372     if (set & 1) {
373       tmp.append(lib[i]);
374       tmp.append(',');
375     }
376 
377   if (tmp.length())
378   {
379     result->str=    thd->strmake(tmp.ptr(), tmp.length()-1);
380     result->length= tmp.length()-1;
381   }
382   else
383   {
384     result->str= const_cast<char*>("");
385     result->length= 0;
386   }
387   return result->str;
388 }
389 
flagset_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])390 char *flagset_to_string(THD *thd, LEX_STRING *result, ulonglong set,
391                         const char *lib[])
392 {
393   char buff[STRING_BUFFER_USUAL_SIZE*8];
394   String tmp(buff, sizeof(buff), &my_charset_latin1);
395   LEX_STRING unused;
396 
397   if (!result) result= &unused;
398 
399   tmp.length(0);
400 
401   // note that the last element is always "default", and it's ignored below
402   for (uint i= 0; lib[i+1]; i++, set >>= 1)
403   {
404     tmp.append(lib[i]);
405     tmp.append(set & 1 ? "=on," : "=off,");
406   }
407 
408   result->str=    thd->strmake(tmp.ptr(), tmp.length()-1);
409   result->length= tmp.length()-1;
410 
411   return result->str;
412 }
413 
414