1 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /* Some useful string utility functions used by the MySQL server */
24
25 #include "sql_priv.h"
26 #include "unireg.h"
27 #include "strfunc.h"
28 #include "sql_class.h"
29 #include "typelib.h" // TYPELIB
30 #include "m_ctype.h" // my_charset_latin1
31 #include "mysqld.h" // system_charset_info
32
33 /*
34 Return bitmap for strings used in a set
35
36 SYNOPSIS
37 find_set()
38 lib Strings in set
39 str Strings of set-strings separated by ','
40 err_pos If error, set to point to start of wrong set string
41 err_len If error, set to the length of wrong set string
42 set_warning Set to 1 if some string in set couldn't be used
43
44 NOTE
45 We delete all end space from str before comparison
46
47 RETURN
48 bitmap of all sets found in x.
49 set_warning is set to 1 if there was any sets that couldn't be set
50 */
51
52 static const char field_separator=',';
53
find_set(TYPELIB * lib,const char * str,uint length,const CHARSET_INFO * cs,char ** err_pos,uint * err_len,bool * set_warning)54 ulonglong find_set(TYPELIB *lib, const char *str, uint length,
55 const CHARSET_INFO *cs,
56 char **err_pos, uint *err_len, bool *set_warning)
57 {
58 const CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
59 const char *end= str + strip->cset->lengthsp(strip, str, length);
60 ulonglong found= 0;
61 *err_pos= 0; // No error yet
62 *err_len= 0;
63 if (str != end)
64 {
65 const char *start= str;
66 for (;;)
67 {
68 const char *pos= start;
69 uint var_len;
70 int mblen= 1;
71
72 if (cs && cs->mbminlen > 1)
73 {
74 for ( ; pos < end; pos+= mblen)
75 {
76 my_wc_t wc;
77 if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
78 (const uchar *) end)) < 1)
79 mblen= 1; // Not to hang on a wrong multibyte sequence
80 if (wc == (my_wc_t) field_separator)
81 break;
82 }
83 }
84 else
85 for (; pos != end && *pos != field_separator; pos++) ;
86 var_len= (uint) (pos - start);
87 uint find= cs ? find_type2(lib, start, var_len, cs) :
88 find_type(lib, start, var_len, (bool) 0);
89 if (!find && *err_len == 0) // report the first error with length > 0
90 {
91 *err_pos= (char*) start;
92 *err_len= var_len;
93 *set_warning= 1;
94 }
95 else
96 found|= 1ULL << (find - 1);
97 if (pos >= end)
98 break;
99 start= pos + mblen;
100 }
101 }
102 return found;
103 }
104
105 /*
106 Function to find a string in a TYPELIB
107 (similar to find_type() of mysys/typelib.c)
108
109 SYNOPSIS
110 find_type()
111 lib TYPELIB (struct of pointer to values + count)
112 find String to find
113 length Length of string to find
114 part_match Allow part matching of value
115
116 RETURN
117 0 error
118 > 0 position in TYPELIB->type_names +1
119 */
120
find_type(const TYPELIB * lib,const char * find,uint length,bool part_match)121 uint find_type(const TYPELIB *lib, const char *find, uint length,
122 bool part_match)
123 {
124 uint found_count=0, found_pos=0;
125 const char *end= find+length;
126 const char *i;
127 const char *j;
128 for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
129 {
130 for (i=find ; i != end &&
131 my_toupper(system_charset_info,*i) ==
132 my_toupper(system_charset_info,*j) ; i++, j++) ;
133 if (i == end)
134 {
135 if (! *j)
136 return(pos);
137 found_count++;
138 found_pos= pos;
139 }
140 }
141 return(found_count == 1 && part_match ? found_pos : 0);
142 }
143
144
145 /*
146 Find a string in a list of strings according to collation
147
148 SYNOPSIS
149 find_type2()
150 lib TYPELIB (struct of pointer to values + count)
151 x String to find
152 length String length
153 cs Character set + collation to use for comparison
154
155 NOTES
156
157 RETURN
158 0 No matching value
159 >0 Offset+1 in typelib for matched string
160 */
161
find_type2(const TYPELIB * typelib,const char * x,uint length,const CHARSET_INFO * cs)162 uint find_type2(const TYPELIB *typelib, const char *x, uint length,
163 const CHARSET_INFO *cs)
164 {
165 int pos;
166 const char *j;
167 DBUG_ENTER("find_type2");
168 DBUG_PRINT("enter",("x: '%.*s' lib: 0x%lx", length, x, (long) typelib));
169
170 if (!typelib->count)
171 {
172 DBUG_PRINT("exit",("no count"));
173 DBUG_RETURN(0);
174 }
175
176 for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
177 {
178 if (!my_strnncoll(cs, (const uchar*) x, length,
179 (const uchar*) j, typelib->type_lengths[pos]))
180 DBUG_RETURN(pos+1);
181 }
182 DBUG_PRINT("exit",("Couldn't find type"));
183 DBUG_RETURN(0);
184 } /* find_type */
185
186
187 /*
188 Un-hex all elements in a typelib
189
190 SYNOPSIS
191 unhex_type2()
192 interval TYPELIB (struct of pointer to values + lengths + count)
193
194 NOTES
195
196 RETURN
197 N/A
198 */
199
unhex_type2(TYPELIB * interval)200 void unhex_type2(TYPELIB *interval)
201 {
202 for (uint pos= 0; pos < interval->count; pos++)
203 {
204 char *from, *to;
205 for (from= to= (char*) interval->type_names[pos]; *from; )
206 {
207 /*
208 Note, hexchar_to_int(*from++) doesn't work
209 one some compilers, e.g. IRIX. Looks like a compiler
210 bug in inline functions in combination with arguments
211 that have a side effect. So, let's use from[0] and from[1]
212 and increment 'from' by two later.
213 */
214
215 *to++= (char) (hexchar_to_int(from[0]) << 4) +
216 hexchar_to_int(from[1]);
217 from+= 2;
218 }
219 interval->type_lengths[pos] /= 2;
220 }
221 }
222
223
224 /*
225 Check if the first word in a string is one of the ones in TYPELIB
226
227 SYNOPSIS
228 check_word()
229 lib TYPELIB
230 val String to check
231 end End of input
232 end_of_word Store value of last used byte here if we found word
233
234 RETURN
235 0 No matching value
236 > 1 lib->type_names[#-1] matched
237 end_of_word will point to separator character/end in 'val'
238 */
239
check_word(TYPELIB * lib,const char * val,const char * end,const char ** end_of_word)240 uint check_word(TYPELIB *lib, const char *val, const char *end,
241 const char **end_of_word)
242 {
243 int res;
244 const char *ptr;
245
246 /* Fiend end of word */
247 for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
248 ;
249 if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
250 *end_of_word= ptr;
251 return res;
252 }
253
254
255 /*
256 Converts a string between character sets
257
258 SYNOPSIS
259 strconvert()
260 from_cs source character set
261 from source, a null terminated string
262 to destination buffer
263 to_length destination buffer length
264
265 NOTES
266 'to' is always terminated with a '\0' character.
267 If there is no enough space to convert whole string,
268 only prefix is converted, and terminated with '\0'.
269
270 RETURN VALUES
271 result string length
272 */
273
274
strconvert(CHARSET_INFO * from_cs,const char * from,CHARSET_INFO * to_cs,char * to,uint to_length,uint * errors)275 uint strconvert(CHARSET_INFO *from_cs, const char *from,
276 CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
277 {
278 int cnvres;
279 my_wc_t wc;
280 char *to_start= to;
281 uchar *to_end= (uchar*) to + to_length - 1;
282 my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
283 my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
284 uint error_count= 0;
285
286 while (1)
287 {
288 /*
289 Using 'from + 10' is safe:
290 - it is enough to scan a single character in any character set.
291 - if remaining string is shorter than 10, then mb_wc will return
292 with error because of unexpected '\0' character.
293 */
294 if ((cnvres= (*mb_wc)(from_cs, &wc,
295 (uchar*) from, (uchar*) from + 10)) > 0)
296 {
297 if (!wc)
298 break;
299 from+= cnvres;
300 }
301 else if (cnvres == MY_CS_ILSEQ)
302 {
303 error_count++;
304 from++;
305 wc= '?';
306 }
307 else
308 break; // Impossible char.
309
310 outp:
311
312 if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
313 to+= cnvres;
314 else if (cnvres == MY_CS_ILUNI && wc != '?')
315 {
316 error_count++;
317 wc= '?';
318 goto outp;
319 }
320 else
321 break;
322 }
323 *to= '\0';
324 *errors= error_count;
325 return (uint32) (to - to_start);
326
327 }
328
329
330 /*
331 Searches for a LEX_STRING in an LEX_STRING array.
332
333 SYNOPSIS
334 find_string_in_array()
335 heap The array
336 needle The string to search for
337
338 NOTE
339 The last LEX_STRING in the array should have str member set to NULL
340
341 RETURN VALUES
342 -1 Not found
343 >=0 Ordinal position
344 */
345
find_string_in_array(LEX_STRING * const haystack,LEX_STRING * const needle,CHARSET_INFO * const cs)346 int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
347 CHARSET_INFO * const cs)
348 {
349 const LEX_STRING *pos;
350 for (pos= haystack; pos->str; pos++)
351 if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
352 (uchar *) needle->str, needle->length, 0))
353 {
354 return (pos - haystack);
355 }
356 return -1;
357 }
358
359
set_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])360 char *set_to_string(THD *thd, LEX_STRING *result, ulonglong set,
361 const char *lib[])
362 {
363 char buff[STRING_BUFFER_USUAL_SIZE*8];
364 String tmp(buff, sizeof(buff), &my_charset_latin1);
365 LEX_STRING unused;
366
367 if (!result)
368 result= &unused;
369
370 tmp.length(0);
371
372 for (uint i= 0; set; i++, set >>= 1)
373 if (set & 1) {
374 tmp.append(lib[i]);
375 tmp.append(',');
376 }
377
378 if (tmp.length())
379 {
380 result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
381 result->length= tmp.length()-1;
382 }
383 else
384 {
385 result->str= const_cast<char*>("");
386 result->length= 0;
387 }
388 return result->str;
389 }
390
flagset_to_string(THD * thd,LEX_STRING * result,ulonglong set,const char * lib[])391 char *flagset_to_string(THD *thd, LEX_STRING *result, ulonglong set,
392 const char *lib[])
393 {
394 char buff[STRING_BUFFER_USUAL_SIZE*8];
395 String tmp(buff, sizeof(buff), &my_charset_latin1);
396 LEX_STRING unused;
397
398 if (!result) result= &unused;
399
400 tmp.length(0);
401
402 // note that the last element is always "default", and it's ignored below
403 for (uint i= 0; lib[i+1]; i++, set >>= 1)
404 {
405 tmp.append(lib[i]);
406 tmp.append(set & 1 ? "=on," : "=off,");
407 }
408
409 result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
410 result->length= tmp.length()-1;
411
412 return result->str;
413 }
414
415