1 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /* Some useful string utility functions used by the MySQL server */
24 
25 #include "sql_priv.h"
26 #include "unireg.h"
27 #include "strfunc.h"
28 #include "sql_class.h"
29 #include "typelib.h"                            // TYPELIB
30 #include "m_ctype.h"                            // my_charset_latin1
31 #include "mysqld.h"                             // system_charset_info
32 
33 /*
34   Return bitmap for strings used in a set
35 
36   SYNOPSIS
37   find_set()
38   lib			Strings in set
39   str			Strings of set-strings separated by ','
40   err_pos		If error, set to point to start of wrong set string
41   err_len		If error, set to the length of wrong set string
42   set_warning		Set to 1 if some string in set couldn't be used
43 
44   NOTE
45     We delete all end space from str before comparison
46 
47   RETURN
48     bitmap of all sets found in x.
49     set_warning is set to 1 if there was any sets that couldn't be set
50 */
51 
52 static const char field_separator=',';
53 
find_set(TYPELIB * lib,const char * str,uint length,const CHARSET_INFO * cs,char ** err_pos,uint * err_len,bool * set_warning)54 ulonglong find_set(TYPELIB *lib, const char *str, uint length,
55                    const CHARSET_INFO *cs,
56                    char **err_pos, uint *err_len, bool *set_warning)
57 {
58   const CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
59   const char *end= str + strip->cset->lengthsp(strip, str, length);
60   ulonglong found= 0;
61   *err_pos= 0;                  // No error yet
62   *err_len= 0;
63   if (str != end)
64   {
65     const char *start= str;
66     for (;;)
67     {
68       const char *pos= start;
69       uint var_len;
70       int mblen= 1;
71 
72       if (cs && cs->mbminlen > 1)
73       {
74         for ( ; pos < end; pos+= mblen)
75         {
76           my_wc_t wc;
77           if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
78                                                (const uchar *) end)) < 1)
79             mblen= 1; // Not to hang on a wrong multibyte sequence
80           if (wc == (my_wc_t) field_separator)
81             break;
82         }
83       }
84       else
85         for (; pos != end && *pos != field_separator; pos++) ;
86       var_len= (uint) (pos - start);
87       uint find= cs ? find_type2(lib, start, var_len, cs) :
88                       find_type(lib, start, var_len, (bool) 0);
89       if (!find && *err_len == 0) // report the first error with length > 0
90       {
91         *err_pos= (char*) start;
92         *err_len= var_len;
93         *set_warning= 1;
94       }
95       else
96         found|= 1ULL << (find - 1);
97       if (pos >= end)
98         break;
99       start= pos + mblen;
100     }
101   }
102   return found;
103 }
104 
105 /*
106   Function to find a string in a TYPELIB
107   (similar to find_type() of mysys/typelib.c)
108 
109   SYNOPSIS
110    find_type()
111    lib			TYPELIB (struct of pointer to values + count)
112    find			String to find
113    length		Length of string to find
114    part_match		Allow part matching of value
115 
116  RETURN
117   0 error
118   > 0 position in TYPELIB->type_names +1
119 */
120 
find_type(const TYPELIB * lib,const char * find,uint length,bool part_match)121 uint find_type(const TYPELIB *lib, const char *find, uint length,
122                bool part_match)
123 {
124   uint found_count=0, found_pos=0;
125   const char *end= find+length;
126   const char *i;
127   const char *j;
128   for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
129   {
130     for (i=find ; i != end &&
131 	   my_toupper(system_charset_info,*i) ==
132 	   my_toupper(system_charset_info,*j) ; i++, j++) ;
133     if (i == end)
134     {
135       if (! *j)
136 	return(pos);
137       found_count++;
138       found_pos= pos;
139     }
140   }
141   return(found_count == 1 && part_match ? found_pos : 0);
142 }
143 
144 
145 /*
146   Find a string in a list of strings according to collation
147 
148   SYNOPSIS
149    find_type2()
150    lib			TYPELIB (struct of pointer to values + count)
151    x			String to find
152    length               String length
153    cs			Character set + collation to use for comparison
154 
155   NOTES
156 
157   RETURN
158     0	No matching value
159     >0  Offset+1 in typelib for matched string
160 */
161 
find_type2(const TYPELIB * typelib,const char * x,uint length,const CHARSET_INFO * cs)162 uint find_type2(const TYPELIB *typelib, const char *x, uint length,
163                 const CHARSET_INFO *cs)
164 {
165   int pos;
166   const char *j;
167   DBUG_ENTER("find_type2");
168   DBUG_PRINT("enter",("x: '%.*s'  lib: 0x%lx", length, x, (long) typelib));
169 
170   if (!typelib->count)
171   {
172     DBUG_PRINT("exit",("no count"));
173     DBUG_RETURN(0);
174   }
175 
176   for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
177   {
178     if (!my_strnncoll(cs, (const uchar*) x, length,
179                           (const uchar*) j, typelib->type_lengths[pos]))
180       DBUG_RETURN(pos+1);
181   }
182   DBUG_PRINT("exit",("Couldn't find type"));
183   DBUG_RETURN(0);
184 } /* find_type */
185 
186 
187 /*
188   Un-hex all elements in a typelib
189 
190   SYNOPSIS
191    unhex_type2()
192    interval       TYPELIB (struct of pointer to values + lengths + count)
193 
194   NOTES
195 
196   RETURN
197     N/A
198 */
199 
unhex_type2(TYPELIB * interval)200 void unhex_type2(TYPELIB *interval)
201 {
202   for (uint pos= 0; pos < interval->count; pos++)
203   {
204     char *from, *to;
205     for (from= to= (char*) interval->type_names[pos]; *from; )
206     {
207       /*
208         Note, hexchar_to_int(*from++) doesn't work
209         one some compilers, e.g. IRIX. Looks like a compiler
210         bug in inline functions in combination with arguments
211         that have a side effect. So, let's use from[0] and from[1]
212         and increment 'from' by two later.
213       */
214 
215       *to++= (char) (hexchar_to_int(from[0]) << 4) +
216                      hexchar_to_int(from[1]);
217       from+= 2;
218     }
219     interval->type_lengths[pos] /= 2;
220   }
221 }
222 
223 
224 /*
225   Check if the first word in a string is one of the ones in TYPELIB
226 
227   SYNOPSIS
228     check_word()
229     lib		TYPELIB
230     val		String to check
231     end		End of input
232     end_of_word	Store value of last used byte here if we found word
233 
234   RETURN
235     0	 No matching value
236     > 1  lib->type_names[#-1] matched
237 	 end_of_word will point to separator character/end in 'val'
238 */
239 
check_word(TYPELIB * lib,const char * val,const char * end,const char ** end_of_word)240 uint check_word(TYPELIB *lib, const char *val, const char *end,
241 		const char **end_of_word)
242 {
243   int res;
244   const char *ptr;
245 
246   /* Fiend end of word */
247   for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
248     ;
249   if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
250     *end_of_word= ptr;
251   return res;
252 }
253 
254 
255 /*
256   Converts a string between character sets
257 
258   SYNOPSIS
259     strconvert()
260     from_cs       source character set
261     from          source, a null terminated string
262     to            destination buffer
263     to_length     destination buffer length
264 
265   NOTES
266     'to' is always terminated with a '\0' character.
267     If there is no enough space to convert whole string,
268     only prefix is converted, and terminated with '\0'.
269 
270   RETURN VALUES
271     result string length
272 */
273 
274 
strconvert(CHARSET_INFO * from_cs,const char * from,CHARSET_INFO * to_cs,char * to,uint to_length,uint * errors)275 uint strconvert(CHARSET_INFO *from_cs, const char *from,
276                 CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
277 {
278   int cnvres;
279   my_wc_t wc;
280   char *to_start= to;
281   uchar *to_end= (uchar*) to + to_length - 1;
282   my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
283   my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
284   uint error_count= 0;
285 
286   while (1)
287   {
288     /*
289       Using 'from + 10' is safe:
290       - it is enough to scan a single character in any character set.
291       - if remaining string is shorter than 10, then mb_wc will return
292         with error because of unexpected '\0' character.
293     */
294     if ((cnvres= (*mb_wc)(from_cs, &wc,
295                           (uchar*) from, (uchar*) from + 10)) > 0)
296     {
297       if (!wc)
298         break;
299       from+= cnvres;
300     }
301     else if (cnvres == MY_CS_ILSEQ)
302     {
303       error_count++;
304       from++;
305       wc= '?';
306     }
307     else
308       break; // Impossible char.
309 
310 outp:
311 
312     if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
313       to+= cnvres;
314     else if (cnvres == MY_CS_ILUNI && wc != '?')
315     {
316       error_count++;
317       wc= '?';
318       goto outp;
319     }
320     else
321       break;
322   }
323   *to= '\0';
324   *errors= error_count;
325   return (uint32) (to - to_start);
326 
327 }
328 
329 
330 /*
331   Searches for a LEX_STRING in an LEX_STRING array.
332 
333   SYNOPSIS
334     find_string_in_array()
335       heap    The array
336       needle  The string to search for
337 
338   NOTE
339     The last LEX_STRING in the array should have str member set to NULL
340 
341   RETURN VALUES
342     -1   Not found
343     >=0  Ordinal position
344 */
345 
find_string_in_array(LEX_STRING * const haystack,LEX_STRING * const needle,CHARSET_INFO * const cs)346 int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
347                          CHARSET_INFO * const cs)
348 {
349   const LEX_STRING *pos;
350   for (pos= haystack; pos->str; pos++)
351     if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
352                                (uchar *) needle->str, needle->length, 0))
353     {
354       return (pos - haystack);
355     }
356   return -1;
357 }
358 
359 
set_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])360 char *set_to_string(THD *thd, LEX_STRING *result, ulonglong set,
361                     const char *lib[])
362 {
363   char buff[STRING_BUFFER_USUAL_SIZE*8];
364   String tmp(buff, sizeof(buff), &my_charset_latin1);
365   LEX_STRING unused;
366 
367   if (!result)
368     result= &unused;
369 
370   tmp.length(0);
371 
372   for (uint i= 0; set; i++, set >>= 1)
373     if (set & 1) {
374       tmp.append(lib[i]);
375       tmp.append(',');
376     }
377 
378   if (tmp.length())
379   {
380     result->str=    thd->strmake(tmp.ptr(), tmp.length()-1);
381     result->length= tmp.length()-1;
382   }
383   else
384   {
385     result->str= const_cast<char*>("");
386     result->length= 0;
387   }
388   return result->str;
389 }
390 
flagset_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])391 char *flagset_to_string(THD *thd, LEX_STRING *result, ulonglong set,
392                         const char *lib[])
393 {
394   char buff[STRING_BUFFER_USUAL_SIZE*8];
395   String tmp(buff, sizeof(buff), &my_charset_latin1);
396   LEX_STRING unused;
397 
398   if (!result) result= &unused;
399 
400   tmp.length(0);
401 
402   // note that the last element is always "default", and it's ignored below
403   for (uint i= 0; lib[i+1]; i++, set >>= 1)
404   {
405     tmp.append(lib[i]);
406     tmp.append(set & 1 ? "=on," : "=off,");
407   }
408 
409   result->str=    thd->strmake(tmp.ptr(), tmp.length()-1);
410   result->length= tmp.length()-1;
411 
412   return result->str;
413 }
414 
415