1 /* Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /* Some useful string utility functions used by the MySQL server */
24
25 #include "strfunc.h"
26 #include "sql_class.h"
27 #include "typelib.h" // TYPELIB
28 #include "m_ctype.h" // my_charset_latin1
29 #include "mysqld.h" // system_charset_info
30
31 /*
32 Return bitmap for strings used in a set
33
34 SYNOPSIS
35 find_set()
36 lib Strings in set
37 str Strings of set-strings separated by ','
38 err_pos If error, set to point to start of wrong set string
39 err_len If error, set to the length of wrong set string
40 set_warning Set to 1 if some string in set couldn't be used
41
42 NOTE
43 We delete all end space from str before comparison
44
45 RETURN
46 bitmap of all sets found in x.
47 set_warning is set to 1 if there was any sets that couldn't be set
48 */
49
50 static const char field_separator=',';
51
find_set(TYPELIB * lib,const char * str,size_t length,const CHARSET_INFO * cs,char ** err_pos,uint * err_len,bool * set_warning)52 ulonglong find_set(TYPELIB *lib, const char *str, size_t length,
53 const CHARSET_INFO *cs,
54 char **err_pos, uint *err_len, bool *set_warning)
55 {
56 const CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
57 const char *end= str + strip->cset->lengthsp(strip, str, length);
58 ulonglong found= 0;
59 *err_pos= 0; // No error yet
60 *err_len= 0;
61 if (str != end)
62 {
63 const char *start= str;
64 for (;;)
65 {
66 const char *pos= start;
67 uint var_len;
68 int mblen= 1;
69
70 if (cs && cs->mbminlen > 1)
71 {
72 for ( ; pos < end; pos+= mblen)
73 {
74 my_wc_t wc;
75 if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
76 (const uchar *) end)) < 1)
77 mblen= 1; // Not to hang on a wrong multibyte sequence
78 if (wc == (my_wc_t) field_separator)
79 break;
80 }
81 }
82 else
83 for (; pos != end && *pos != field_separator; pos++) ;
84 var_len= (uint) (pos - start);
85 uint find= cs ? find_type2(lib, start, var_len, cs) :
86 find_type(lib, start, var_len, (bool) 0);
87 if (!find && *err_len == 0) // report the first error with length > 0
88 {
89 *err_pos= (char*) start;
90 *err_len= var_len;
91 *set_warning= 1;
92 }
93 else
94 found|= 1ULL << (find - 1);
95 if (pos >= end)
96 break;
97 start= pos + mblen;
98 }
99 }
100 return found;
101 }
102
103 /*
104 Function to find a string in a TYPELIB
105 (similar to find_type() of mysys/typelib.c)
106
107 SYNOPSIS
108 find_type()
109 lib TYPELIB (struct of pointer to values + count)
110 find String to find
111 length Length of string to find
112 part_match Allow part matching of value
113
114 RETURN
115 0 error
116 > 0 position in TYPELIB->type_names +1
117 */
118
find_type(const TYPELIB * lib,const char * find,size_t length,bool part_match)119 uint find_type(const TYPELIB *lib, const char *find, size_t length,
120 bool part_match)
121 {
122 uint found_count=0, found_pos=0;
123 const char *end= find+length;
124 const char *i;
125 const char *j;
126 for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
127 {
128 for (i=find ; i != end &&
129 my_toupper(system_charset_info,*i) ==
130 my_toupper(system_charset_info,*j) ; i++, j++) ;
131 if (i == end)
132 {
133 if (! *j)
134 return(pos);
135 found_count++;
136 found_pos= pos;
137 }
138 }
139 return(found_count == 1 && part_match ? found_pos : 0);
140 }
141
142
143 /*
144 Find a string in a list of strings according to collation
145
146 SYNOPSIS
147 find_type2()
148 lib TYPELIB (struct of pointer to values + count)
149 x String to find
150 length String length
151 cs Character set + collation to use for comparison
152
153 NOTES
154
155 RETURN
156 0 No matching value
157 >0 Offset+1 in typelib for matched string
158 */
159
find_type2(const TYPELIB * typelib,const char * x,size_t length,const CHARSET_INFO * cs)160 uint find_type2(const TYPELIB *typelib, const char *x, size_t length,
161 const CHARSET_INFO *cs)
162 {
163 int pos;
164 const char *j;
165 DBUG_ENTER("find_type2");
166 DBUG_PRINT("enter",("x: '%.*s' lib: 0x%p",
167 static_cast<int>(length), x, typelib));
168
169 if (!typelib->count)
170 {
171 DBUG_PRINT("exit",("no count"));
172 DBUG_RETURN(0);
173 }
174
175 for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
176 {
177 if (!my_strnncoll(cs, (const uchar*) x, length,
178 (const uchar*) j, typelib->type_lengths[pos]))
179 DBUG_RETURN(pos+1);
180 }
181 DBUG_PRINT("exit",("Couldn't find type"));
182 DBUG_RETURN(0);
183 } /* find_type */
184
185
186 /*
187 Un-hex all elements in a typelib
188
189 SYNOPSIS
190 unhex_type2()
191 interval TYPELIB (struct of pointer to values + lengths + count)
192
193 NOTES
194
195 RETURN
196 N/A
197 */
198
unhex_type2(TYPELIB * interval)199 void unhex_type2(TYPELIB *interval)
200 {
201 for (uint pos= 0; pos < interval->count; pos++)
202 {
203 char *from, *to;
204 for (from= to= (char*) interval->type_names[pos]; *from; )
205 {
206 /*
207 Note, hexchar_to_int(*from++) doesn't work
208 one some compilers, e.g. IRIX. Looks like a compiler
209 bug in inline functions in combination with arguments
210 that have a side effect. So, let's use from[0] and from[1]
211 and increment 'from' by two later.
212 */
213
214 *to++= (char) (hexchar_to_int(from[0]) << 4) +
215 hexchar_to_int(from[1]);
216 from+= 2;
217 }
218 interval->type_lengths[pos] /= 2;
219 }
220 }
221
222
223 /*
224 Check if the first word in a string is one of the ones in TYPELIB
225
226 SYNOPSIS
227 check_word()
228 lib TYPELIB
229 val String to check
230 end End of input
231 end_of_word Store value of last used byte here if we found word
232
233 RETURN
234 0 No matching value
235 > 1 lib->type_names[#-1] matched
236 end_of_word will point to separator character/end in 'val'
237 */
238
check_word(TYPELIB * lib,const char * val,const char * end,const char ** end_of_word)239 uint check_word(TYPELIB *lib, const char *val, const char *end,
240 const char **end_of_word)
241 {
242 int res;
243 const char *ptr;
244
245 /* Fiend end of word */
246 for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
247 ;
248 if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
249 *end_of_word= ptr;
250 return res;
251 }
252
253
254 /*
255 Converts a string between character sets
256
257 SYNOPSIS
258 strconvert()
259 from_cs source character set
260 from source, a null terminated string
261 to destination buffer
262 to_length destination buffer length
263
264 NOTES
265 'to' is always terminated with a '\0' character.
266 If there is no enough space to convert whole string,
267 only prefix is converted, and terminated with '\0'.
268
269 RETURN VALUES
270 result string length
271 */
272
273
strconvert(CHARSET_INFO * from_cs,const char * from,CHARSET_INFO * to_cs,char * to,size_t to_length,uint * errors)274 size_t strconvert(CHARSET_INFO *from_cs, const char *from,
275 CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors)
276 {
277 int cnvres;
278 my_wc_t wc;
279 char *to_start= to;
280 uchar *to_end= (uchar*) to + to_length - 1;
281 my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
282 my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
283 uint error_count= 0;
284
285 while (1)
286 {
287 /*
288 Using 'from + 10' is safe:
289 - it is enough to scan a single character in any character set.
290 - if remaining string is shorter than 10, then mb_wc will return
291 with error because of unexpected '\0' character.
292 */
293 if ((cnvres= (*mb_wc)(from_cs, &wc,
294 (uchar*) from, (uchar*) from + 10)) > 0)
295 {
296 if (!wc)
297 break;
298 from+= cnvres;
299 }
300 else if (cnvres == MY_CS_ILSEQ)
301 {
302 error_count++;
303 from++;
304 wc= '?';
305 }
306 else
307 break; // Impossible char.
308
309 outp:
310
311 if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
312 to+= cnvres;
313 else if (cnvres == MY_CS_ILUNI && wc != '?')
314 {
315 error_count++;
316 wc= '?';
317 goto outp;
318 }
319 else
320 break;
321 }
322 *to= '\0';
323 *errors= error_count;
324 return static_cast<size_t>(to - to_start);
325
326 }
327
328
329 /*
330 Searches for a LEX_STRING in an LEX_STRING array.
331
332 SYNOPSIS
333 find_string_in_array()
334 heap The array
335 needle The string to search for
336
337 NOTE
338 The last LEX_STRING in the array should have str member set to NULL
339
340 RETURN VALUES
341 -1 Not found
342 >=0 Ordinal position
343 */
344
find_string_in_array(LEX_STRING * const haystack,LEX_STRING * const needle,CHARSET_INFO * const cs)345 int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
346 CHARSET_INFO * const cs)
347 {
348 const LEX_STRING *pos;
349 for (pos= haystack; pos->str; pos++)
350 if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
351 (uchar *) needle->str, needle->length, 0))
352 {
353 return static_cast<int>(pos - haystack);
354 }
355 return -1;
356 }
357
358
set_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])359 char *set_to_string(THD *thd, LEX_STRING *result, ulonglong set,
360 const char *lib[])
361 {
362 char buff[STRING_BUFFER_USUAL_SIZE*8];
363 String tmp(buff, sizeof(buff), &my_charset_latin1);
364 LEX_STRING unused;
365
366 if (!result)
367 result= &unused;
368
369 tmp.length(0);
370
371 for (uint i= 0; set; i++, set >>= 1)
372 if (set & 1) {
373 tmp.append(lib[i]);
374 tmp.append(',');
375 }
376
377 if (tmp.length())
378 {
379 result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
380 result->length= tmp.length()-1;
381 }
382 else
383 {
384 result->str= const_cast<char*>("");
385 result->length= 0;
386 }
387 return result->str;
388 }
389
flagset_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])390 char *flagset_to_string(THD *thd, LEX_STRING *result, ulonglong set,
391 const char *lib[])
392 {
393 char buff[STRING_BUFFER_USUAL_SIZE*8];
394 String tmp(buff, sizeof(buff), &my_charset_latin1);
395 LEX_STRING unused;
396
397 if (!result) result= &unused;
398
399 tmp.length(0);
400
401 // note that the last element is always "default", and it's ignored below
402 for (uint i= 0; lib[i+1]; i++, set >>= 1)
403 {
404 tmp.append(lib[i]);
405 tmp.append(set & 1 ? "=on," : "=off,");
406 }
407
408 result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
409 result->length= tmp.length()-1;
410
411 return result->str;
412 }
413
414