1 /****************************************************************************
2    Copyright (C) 2012, 2020, MariaDB Corporation.
3 
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License as published by the Free Software Foundation; either
7    version 2 of the License, or (at your option) any later version.
8 
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13 
14    You should have received a copy of the GNU Library General Public
15    License along with this library; if not see <http://www.gnu.org/licenses>
16    or write to the Free Software Foundation, Inc.,
17    51 Franklin St., Fifth Floor, Boston, MA 02110, USA
18 
19    Part of this code includes code from the PHP project which
20    is freely available from http://www.php.net
21 *****************************************************************************/
22 
23 /* The implementation for character set support was ported from PHP's mysqlnd
24    extension, written by Andrey Hristov, Georg Richter and Ulf Wendel
25 
26    Original file header:
27   +----------------------------------------------------------------------+
28   | PHP Version 5                                                        |
29   +----------------------------------------------------------------------+
30   | Copyright (c) 2006-2011 The PHP Group                                |
31   +----------------------------------------------------------------------+
32   | This source file is subject to version 3.01 of the PHP license,      |
33   | that is bundled with this package in the file LICENSE, and is        |
34   | available through the world-wide-web at the following url:           |
35   | http://www.php.net/license/3_01.txt                                  |
36   | If you did not receive a copy of the PHP license and are unable to   |
37   | obtain it through the world-wide-web, please send a note to          |
38   | license@php.net so we can mail you a copy immediately.               |
39   +----------------------------------------------------------------------+
40   | Authors: Georg Richter <georg@mysql.com>                             |
41   |          Andrey Hristov <andrey@mysql.com>                           |
42   |          Ulf Wendel <uwendel@mysql.com>                              |
43   +----------------------------------------------------------------------+
44 */
45 
46 #ifndef _WIN32
47 #include <strings.h>
48 #include <string.h>
49 #else
50 #include <string.h>
51 #endif
52 #include <ma_global.h>
53 #include <mariadb_ctype.h>
54 #include <ma_string.h>
55 
56 #ifdef HAVE_ICONV
57 #ifdef _WIN32
58 #include "../win-iconv/iconv.h"
59 #else
60 #include <iconv.h>
61 #endif
62 #endif
63 
64 
65 #if defined(HAVE_NL_LANGINFO) && defined(HAVE_SETLOCALE)
66 #include <locale.h>
67 #include <langinfo.h>
68 #endif
69 
70 /*
71   +----------------------------------------------------------------------+
72   | PHP Version 5                                                        |
73   +----------------------------------------------------------------------+
74   | Copyright (c) 2006-2011 The PHP Group                                |
75   +----------------------------------------------------------------------+
76   | This source file is subject to version 3.01 of the PHP license,      |
77   | that is bundled with this package in the file LICENSE, and is        |
78   | available through the world-wide-web at the following url:           |
79   | http://www.php.net/license/3_01.txt                                  |
80   | If you did not receive a copy of the PHP license and are unable to   |
81   | obtain it through the world-wide-web, please send a note to          |
82   | license@php.net so we can mail you a copy immediately.               |
83   +----------------------------------------------------------------------+
84   | Authors: Georg Richter <georg@mysql.com>                             |
85   |          Andrey Hristov <andrey@mysql.com>                           |
86   |          Ulf Wendel <uwendel@mysql.com>                              |
87   +----------------------------------------------------------------------+
88 */
89 
90 /* {{{ utf8 functions */
check_mb_utf8mb3_sequence(const char * start,const char * end)91 static unsigned int check_mb_utf8mb3_sequence(const char *start, const char *end)
92 {
93   uchar  c;
94 
95   if (start >= end) {
96     return 0;
97   }
98 
99   c = (uchar) start[0];
100 
101   if (c < 0x80) {
102     return 1;    /* single byte character */
103   }
104   if (c < 0xC2) {
105     return 0;    /* invalid mb character */
106   }
107   if (c < 0xE0) {
108     if (start + 2 > end) {
109       return 0;  /* too small */
110     }
111     if (!(((uchar)start[1] ^ 0x80) < 0x40)) {
112       return 0;
113     }
114     return 2;
115   }
116   if (c < 0xF0) {
117     if (start + 3 > end) {
118       return 0;  /* too small */
119     }
120     if (!(((uchar)start[1] ^ 0x80) < 0x40 && ((uchar)start[2] ^ 0x80) < 0x40 &&
121       (c >= 0xE1 || (uchar)start[1] >= 0xA0))) {
122       return 0;  /* invalid utf8 character */
123     }
124     return 3;
125   }
126   return 0;
127 }
128 
129 
check_mb_utf8_sequence(const char * start,const char * end)130 static unsigned int check_mb_utf8_sequence(const char *start, const char *end)
131 {
132   uchar  c;
133 
134   if (start >= end) {
135     return 0;
136   }
137 
138   c = (uchar) start[0];
139 
140   if (c < 0x80) {
141     return 1;    /* single byte character */
142   }
143   if (c < 0xC2) {
144     return 0;    /* invalid mb character */
145   }
146   if (c < 0xE0) {
147     if (start + 2 > end) {
148       return 0;  /* too small */
149     }
150     if (!(((uchar)start[1] ^ 0x80) < 0x40)) {
151       return 0;
152     }
153     return 2;
154   }
155   if (c < 0xF0) {
156     if (start + 3 > end) {
157       return 0;  /* too small */
158     }
159     if (!(((uchar)start[1] ^ 0x80) < 0x40 && ((uchar)start[2] ^ 0x80) < 0x40 &&
160       (c >= 0xE1 || (uchar)start[1] >= 0xA0))) {
161       return 0;  /* invalid utf8 character */
162     }
163     return 3;
164   }
165   if (c < 0xF5) {
166     if (start + 4 > end) { /* We need 4 characters */
167       return 0;  /* too small */
168     }
169 
170     /*
171       UTF-8 quick four-byte mask:
172       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
173       Encoding allows to encode U+00010000..U+001FFFFF
174 
175       The maximum character defined in the Unicode standard is U+0010FFFF.
176       Higher characters U+00110000..U+001FFFFF are not used.
177 
178       11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
179       11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
180 
181       Valid codes:
182       [F0][90..BF][80..BF][80..BF]
183       [F1][80..BF][80..BF][80..BF]
184       [F2][80..BF][80..BF][80..BF]
185       [F3][80..BF][80..BF][80..BF]
186       [F4][80..8F][80..BF][80..BF]
187     */
188 
189     if (!(((uchar)start[1] ^ 0x80) < 0x40 &&
190       ((uchar)start[2] ^ 0x80) < 0x40 &&
191       ((uchar)start[3] ^ 0x80) < 0x40 &&
192         (c >= 0xf1 || (uchar)start[1] >= 0x90) &&
193         (c <= 0xf3 || (uchar)start[1] <= 0x8F)))
194     {
195       return 0;  /* invalid utf8 character */
196     }
197     return 4;
198   }
199   return 0;
200 }
201 
check_mb_utf8mb3_valid(const char * start,const char * end)202 static unsigned int check_mb_utf8mb3_valid(const char *start, const char *end)
203 {
204   unsigned int len = check_mb_utf8mb3_sequence(start, end);
205   return (len > 1)? len:0;
206 }
207 
check_mb_utf8_valid(const char * start,const char * end)208 static unsigned int check_mb_utf8_valid(const char *start, const char *end)
209 {
210   unsigned int len = check_mb_utf8_sequence(start, end);
211   return (len > 1)? len:0;
212 }
213 
214 
mysql_mbcharlen_utf8mb3(unsigned int utf8)215 static unsigned int mysql_mbcharlen_utf8mb3(unsigned int utf8)
216 {
217   if (utf8 < 0x80) {
218     return 1;    /* single byte character */
219   }
220   if (utf8 < 0xC2) {
221     return 0;    /* invalid multibyte header */
222   }
223   if (utf8 < 0xE0) {
224     return 2;    /* double byte character */
225   }
226   if (utf8 < 0xF0) {
227     return 3;    /* triple byte character */
228   }
229   return 0;
230 }
231 
232 
mysql_mbcharlen_utf8(unsigned int utf8)233 static unsigned int mysql_mbcharlen_utf8(unsigned int utf8)
234 {
235   if (utf8 < 0x80) {
236     return 1;    /* single byte character */
237   }
238   if (utf8 < 0xC2) {
239     return 0;    /* invalid multibyte header */
240   }
241   if (utf8 < 0xE0) {
242     return 2;    /* double byte character */
243   }
244   if (utf8 < 0xF0) {
245     return 3;    /* triple byte character */
246   }
247   if (utf8 < 0xF8) {
248     return 4;    /* four byte character */
249   }
250   return 0;
251 }
252 /* }}} */
253 
254 
255 /* {{{ big5 functions */
256 #define valid_big5head(c)  (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xF9)
257 #define valid_big5tail(c)  ((0x40 <= (unsigned int)(c) && (unsigned int)(c) <= 0x7E) || \
258               (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xFE))
259 
260 #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
261 
check_mb_big5(const char * start,const char * end)262 static unsigned int check_mb_big5(const char *start, const char *end)
263 {
264   return (valid_big5head(*((const uchar*) start)) && (end - start) > 1 && valid_big5tail(*((const uchar*) start + 1)) ? 2 : 0);
265 }
266 
267 
mysql_mbcharlen_big5(unsigned int big5)268 static unsigned int mysql_mbcharlen_big5(unsigned int big5)
269 {
270   return (valid_big5head(big5)) ? 2 : 1;
271 }
272 /* }}} */
273 
274 
275 /* {{{ cp932 functions */
276 #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
277 #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
278 
279 
check_mb_cp932(const char * start,const char * end)280 static unsigned int check_mb_cp932(const char *start, const char *end)
281 {
282   return (valid_cp932head((uchar)start[0]) && (end - start >  1) &&
283       valid_cp932tail((uchar)start[1])) ? 2 : 0;
284 }
285 
286 
mysql_mbcharlen_cp932(unsigned int cp932)287 static unsigned int mysql_mbcharlen_cp932(unsigned int cp932)
288 {
289   return (valid_cp932head((uchar)cp932)) ? 2 : 1;
290 }
291 /* }}} */
292 
293 
294 /* {{{ euckr functions */
295 #define valid_euckr(c)  ((0xA1 <= (uchar)(c) && (uchar)(c) <= 0xFE))
296 
check_mb_euckr(const char * start,const char * end)297 static unsigned int check_mb_euckr(const char *start, const char *end)
298 {
299   if (end - start <= 1) {
300     return 0;  /* invalid length */
301   }
302   if (*(uchar *)start < 0x80) {
303     return 0;  /* invalid euckr character */
304   }
305   if (valid_euckr(start[1])) {
306     return 2;
307   }
308   return 0;
309 }
310 
311 
mysql_mbcharlen_euckr(unsigned int kr)312 static unsigned int mysql_mbcharlen_euckr(unsigned int kr)
313 {
314   return (valid_euckr(kr)) ? 2 : 1;
315 }
316 /* }}} */
317 
318 
319 /* {{{ eucjpms functions */
320 #define valid_eucjpms(c)     (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
321 #define valid_eucjpms_kata(c)  (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
322 #define valid_eucjpms_ss2(c)  (((c) & 0xFF) == 0x8E)
323 #define valid_eucjpms_ss3(c)  (((c) & 0xFF) == 0x8F)
324 
check_mb_eucjpms(const char * start,const char * end)325 static unsigned int check_mb_eucjpms(const char *start, const char *end)
326 {
327   if (*((uchar *)start) < 0x80) {
328     return 0;  /* invalid eucjpms character */
329   }
330   if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
331     return 2;
332   }
333   if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
334     return 2;
335   }
336   if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
337     valid_eucjpms(start[2])) {
338     return 2;
339   }
340   return 0;
341 }
342 
343 
mysql_mbcharlen_eucjpms(unsigned int jpms)344 static unsigned int mysql_mbcharlen_eucjpms(unsigned int jpms)
345 {
346   if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
347     return 2;
348   }
349   if (valid_eucjpms_ss3(jpms)) {
350     return 3;
351   }
352   return 1;
353 }
354 /* }}} */
355 
356 
357 /* {{{ gb2312 functions */
358 #define valid_gb2312_head(c)  (0xA1 <= (uchar)(c) && (uchar)(c) <= 0xF7)
359 #define valid_gb2312_tail(c)  (0xA1 <= (uchar)(c) && (uchar)(c) <= 0xFE)
360 
361 
check_mb_gb2312(const char * start,const char * end)362 static unsigned int check_mb_gb2312(const char *start, const char *end)
363 {
364   return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
365       valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
366 }
367 
368 
mysql_mbcharlen_gb2312(unsigned int gb)369 static unsigned int mysql_mbcharlen_gb2312(unsigned int gb)
370 {
371   return (valid_gb2312_head(gb)) ? 2 : 1;
372 }
373 /* }}} */
374 
375 
376 /* {{{ gbk functions */
377 #define valid_gbk_head(c)  (0x81<=(uchar)(c) && (uchar)(c)<=0xFE)
378 #define valid_gbk_tail(c)  ((0x40<=(uchar)(c) && (uchar)(c)<=0x7E) || (0x80<=(uchar)(c) && (uchar)(c)<=0xFE))
379 
check_mb_gbk(const char * start,const char * end)380 static unsigned int check_mb_gbk(const char *start, const char *end)
381 {
382   return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
383 }
384 
mysql_mbcharlen_gbk(unsigned int gbk)385 static unsigned int mysql_mbcharlen_gbk(unsigned int gbk)
386 {
387   return (valid_gbk_head(gbk) ? 2 : 1);
388 }
389 /* }}} */
390 
391 
392 /* {{{ sjis functions */
393 #define valid_sjis_head(c)  ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
394 #define valid_sjis_tail(c)  ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
395 
396 
check_mb_sjis(const char * start,const char * end)397 static unsigned int check_mb_sjis(const char *start, const char *end)
398 {
399   return (valid_sjis_head((uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((uchar)start[1])) ? 2 : 0;
400 }
401 
402 
mysql_mbcharlen_sjis(unsigned int sjis)403 static unsigned int mysql_mbcharlen_sjis(unsigned int sjis)
404 {
405   return (valid_sjis_head((uchar)sjis)) ? 2 : 1;
406 }
407 /* }}} */
408 
409 
410 /* {{{ ucs2 functions */
check_mb_ucs2(const char * start __attribute ((unused)),const char * end __attribute ((unused)))411 static unsigned int check_mb_ucs2(const char *start __attribute((unused)), const char *end __attribute((unused)))
412 {
413   return 2; /* always 2 */
414 }
415 
mysql_mbcharlen_ucs2(unsigned int ucs2 __attribute ((unused)))416 static unsigned int mysql_mbcharlen_ucs2(unsigned int ucs2 __attribute((unused)))
417 {
418   return 2; /* always 2 */
419 }
420 /* }}} */
421 
422 
423 /* {{{ ujis functions */
424 #define valid_ujis(c)       ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
425 #define valid_ujis_kata(c)  ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
426 #define valid_ujis_ss2(c)   (((c)&0xFF) == 0x8E)
427 #define valid_ujis_ss3(c)   (((c)&0xFF) == 0x8F)
428 
check_mb_ujis(const char * start,const char * end)429 static unsigned int check_mb_ujis(const char *start, const char *end)
430 {
431   if (*(uchar*)start < 0x80) {
432     return 0;  /* invalid ujis character */
433   }
434   if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
435     return 2;
436   }
437   if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
438     return 2;
439   }
440   if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
441     return 3;
442   }
443   return 0;
444 }
445 
446 
mysql_mbcharlen_ujis(unsigned int ujis)447 static unsigned int mysql_mbcharlen_ujis(unsigned int ujis)
448 {
449   return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
450 }
451 /* }}} */
452 
453 
454 
455 /* {{{ utf16 functions */
456 #define UTF16_HIGH_HEAD(x)  ((((uchar) (x)) & 0xFC) == 0xD8)
457 #define UTF16_LOW_HEAD(x)   ((((uchar) (x)) & 0xFC) == 0xDC)
458 
check_mb_utf16(const char * start,const char * end)459 static unsigned int check_mb_utf16(const char *start, const char *end)
460 {
461   if (start + 2 > end) {
462     return 0;
463   }
464 
465   if (UTF16_HIGH_HEAD(*start)) {
466     return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
467   }
468 
469   if (UTF16_LOW_HEAD(*start)) {
470     return 0;
471   }
472   return 2;
473 }
474 
475 
mysql_mbcharlen_utf16(unsigned int utf16)476 static uint mysql_mbcharlen_utf16(unsigned int utf16)
477 {
478   return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
479 }
480 /* }}} */
481 
482 
483 /* {{{ utf32 functions */
484 static uint
check_mb_utf32(const char * start __attribute ((unused)),const char * end __attribute ((unused)))485 check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))
486 {
487   return 4;
488 }
489 
490 
491 static uint
mysql_mbcharlen_utf32(unsigned int utf32 __attribute ((unused)))492 mysql_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
493 {
494   return 4;
495 }
496 /* }}} */
497 
498 /* {{{ gb18030 functions */
499 #define is_gb18030_odd(c)          (0x81 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE)
500 #define is_gb18030_even_2(c)       ((0x40 <= (unsigned char) (c) && (unsigned char) (c) <= 0x7E) || (0x80 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE))
501 #define is_gb18030_even_4(c)       (0x30 <= (unsigned char) (c) && (unsigned char) (c) <= 0x39)
502 
503 
mysql_mbcharlen_gb18030(unsigned int c)504 static unsigned int mysql_mbcharlen_gb18030(unsigned int c)
505 {
506 	if (c <= 0xFF) {
507 		return !is_gb18030_odd(c);
508 	}
509 	if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) {
510 		return 0;
511 	}
512 	if (is_gb18030_even_2((c & 0xFF))) {
513 	    return 2;
514 	}
515 	if (is_gb18030_even_4((c & 0xFF))) {
516 		return 4;
517 	}
518 
519 	return 0;
520 }
521 
check_mb_gb18030_valid(const char * start,const char * end)522 static unsigned int check_mb_gb18030_valid(const char * start, const char * end)
523 {
524 	if (end - start <= 1 || !is_gb18030_odd(start[0])) {
525 		return 0;
526 	}
527 
528 	if (is_gb18030_even_2(start[1])) {
529 		return 2;
530 	} else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) {
531 		return 4;
532 	}
533 
534 	return 0;
535 }
536 /* }}} */
537 
538 /*
539   The server compiles sometimes the full utf-8 (the mb4) as utf8mb4, and the old as utf8,
540   for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
541   Change easily now, with a macro, could be made compilastion dependable.
542 */
543 
544 #define UTF8_MB4 "utf8mb4"
545 #define UTF8_MB3 "utf8"
546 
547 /* {{{ mysql_charsets */
548 const MARIADB_CHARSET_INFO mariadb_compiled_charsets[] =
549 {
550   {   1, 1, "big5","big5_chinese_ci", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
551   {   3, 1, "dec8", "dec8_swedish_ci", "", 0, "DEC", 1, 1, NULL, NULL},
552   {   4, 1, "cp850", "cp850_general_ci", "", 850, "CP850", 1, 1, NULL, NULL},
553   {   6, 1, "hp8", "hp8_english_ci", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
554   {   7, 1, "koi8r", "koi8r_general_ci", "", 20866, "KOI8R", 1, 1, NULL, NULL},
555   {   8, 1, "latin1", "latin1_swedish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
556   {   9, 1, "latin2", "latin2_general_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
557   {  10, 1, "swe7", "swe7_swedish_ci", "", 20107, "", 1, 1, NULL, NULL},
558   {  11, 1, "ascii", "ascii_general_ci", "", 1252, "ASCII", 1, 1, NULL, NULL},
559   {  12, 1, "ujis", "ujis_japanese_ci", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
560   {  13, 1, "sjis", "sjis_japanese_ci", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
561   {  16, 1, "hebrew", "hebrew_general_ci", "", 1255, "HEBREW", 1, 1, NULL, NULL},
562   {  18, 1, "tis620", "tis620_thai_ci", "", 874, "TIS620", 1, 1, NULL, NULL},
563   {  19, 1, "euckr", "euckr_korean_ci", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
564   {  22, 1, "koi8u", "koi8u_general_ci", "", 21866, "KOI8U", 1, 1, NULL, NULL},
565   {  24, 1, "gb2312", "gb2312_chinese_ci", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
566   {  25, 1, "greek", "greek_general_ci", "", 28597, "GREEK", 1, 1, NULL, NULL},
567   {  26, 1, "cp1250", "cp1250_general_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
568   {  28, 1, "gbk", "gbk_chinese_ci", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
569   {  30, 1, "latin5", "latin5_turkish_ci", "", 1254, "LATIN5", 1, 1, NULL, NULL},
570   {  32, 1, "armscii8", "armscii8_general_ci", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
571   {  33, 1, UTF8_MB3, UTF8_MB3"_general_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
572   {  35, 1, "ucs2", "ucs2_general_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
573   {  36, 1, "cp866", "cp866_general_ci", "", 866, "CP866", 1, 1, NULL, NULL},
574   {  37, 1, "keybcs2", "keybcs2_general_ci", "", 0, "", 1, 1, NULL, NULL},
575   {  38, 1, "macce", "macce_general_ci", "", 10029, "CP1282", 1, 1, NULL, NULL},
576   {  39, 1, "macroman", "macroman_general_ci", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
577   {  40, 1, "cp852", "cp852_general_ci", "", 852, "CP852", 1, 1, NULL, NULL},
578   {  41, 1, "latin7", "latin7_general_ci", "", 28603, "LATIN7", 1, 1, NULL, NULL},
579   {  51, 1, "cp1251", "cp1251_general_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
580   {  57, 1, "cp1256", "cp1256_general_ci", "", 1256, "CP1256", 1, 1, NULL, NULL},
581   {  59, 1, "cp1257", "cp1257_general_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
582   {  63, 1, "binary", "binary", "", 0, "ASCII", 1, 1, NULL, NULL},
583   {  64, 1, "armscii8", "armscii8_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
584   {  92, 1, "geostd8", "geostd8_general_ci", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
585   {  95, 1, "cp932", "cp932_japanese_ci", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
586   {  97, 1, "eucjpms", "eucjpms_japanese_ci", "", 932, "EUC-JP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
587   {   2, 1, "latin2", "latin2_czech_cs", "", 852, "LATIN2", 1, 1, NULL, NULL},
588   {   5, 1, "latin1", "latin1_german1_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
589   {  14, 1, "cp1251", "cp1251_bulgarian_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
590   {  15, 1, "latin1", "latin1_danish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
591   {  17, 1, "filename", "filename", "", 0, "", 1, 5, NULL, NULL},
592   {  20, 1, "latin7", "latin7_estonian_cs", "", 28603, "LATIN7", 1, 1, NULL, NULL},
593   {  21, 1, "latin2", "latin2_hungarian_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
594   {  23, 1, "cp1251", "cp1251_ukrainian_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
595   {  27, 1, "latin2", "latin2_croatian_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
596   {  29, 1, "cp1257", "cp1257_lithuanian_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
597   {  31, 1, "latin1", "latin1_german2_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
598   {  34, 1, "cp1250", "cp1250_czech_cs", "", 1250, "CP1250", 1, 1, NULL, NULL},
599   {  42, 1, "latin7", "latin7_general_cs", "", 28603, "LATIN7", 1, 1, NULL, NULL},
600   {  43, 1, "macce", "macce_bin", "", 10029, "CP1282", 1, 1, NULL, NULL},
601   {  44, 1, "cp1250", "cp1250_croatian_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
602   {  45, 1, UTF8_MB4, UTF8_MB4"_general_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8,  check_mb_utf8_valid},
603   {  46, 1, UTF8_MB4, UTF8_MB4"_bin", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8,  check_mb_utf8_valid},
604   {  47, 1, "latin1", "latin1_bin", "", 1250, "LATIN1", 1, 1, NULL, NULL},
605   {  48, 1, "latin1", "latin1_general_ci", "", 1250, "LATIN1", 1, 1, NULL, NULL},
606   {  49, 1, "latin1", "latin1_general_cs", "", 1250, "LATIN1", 1, 1, NULL, NULL},
607   {  50, 1, "cp1251", "cp1251_bin", "", 1251, "CP1251", 1, 1, NULL, NULL},
608   {  52, 1, "cp1251", "cp1251_general_cs", "", 1251, "CP1251", 1, 1, NULL, NULL},
609   {  53, 1, "macroman", "macroman_bin", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
610   {  54, 1, "utf16", "utf16_general_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
611   {  55, 1, "utf16", "utf16_bin", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
612   {  56, 1, "utf16le", "utf16_general_ci", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
613   {  58, 1, "cp1257", "cp1257_bin", "", 1257, "CP1257", 1, 1, NULL, NULL},
614 #ifdef USED_TO_BE_SO_BEFORE_MYSQL_5_5
615   {  60, 1, "armascii8", "armascii8_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
616 #endif
617   {  60, 1, "utf32", "utf32_general_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
618   {  61, 1, "utf32", "utf32_bin", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
619   {  62, 1, "utf16le", "utf16_bin", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
620   {  65, 1, "ascii", "ascii_bin", "", 1252, "ASCII", 1, 1, NULL, NULL},
621   {  66, 1, "cp1250", "cp1250_bin", "", 1250, "CP1250", 1, 1, NULL, NULL},
622   {  67, 1, "cp1256", "cp1256_bin", "", 1256, "CP1256", 1, 1, NULL, NULL},
623   {  68, 1, "cp866", "cp866_bin", "", 866, "CP866", 1, 1, NULL, NULL},
624   {  69, 1, "dec8", "dec8_bin", "", 0, "DEC", 1, 1, NULL, NULL},
625   {  70, 1, "greek", "greek_bin", "", 28597, "GREEK", 1, 1, NULL, NULL},
626   {  71, 1, "hebrew", "hebrew_bin", "", 1255, "hebrew", 1, 1, NULL, NULL},
627   {  72, 1, "hp8", "hp8_bin", "", 0, "HPROMAN-8", 1, 1, NULL, NULL},
628   {  73, 1, "keybcs2", "keybcs2_bin", "", 0, "", 1, 1, NULL, NULL},
629   {  74, 1, "koi8r", "koi8r_bin", "", 20866, "KOI8R", 1, 1, NULL, NULL},
630   {  75, 1, "koi8u", "koi8u_bin", "", 21866, "KOI8U", 1, 1, NULL, NULL},
631   {  76, 1, UTF8_MB3, UTF8_MB3"_tolower_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
632   {  77, 1, "latin2", "latin2_bin", "", 28592, "LATIN2", 1, 1, NULL, NULL},
633   {  78, 1, "latin5", "latin5_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
634   {  79, 1, "latin7", "latin7_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
635   {  80, 1, "cp850", "cp850_bin", "", 850, "CP850", 1, 1, NULL, NULL},
636   {  81, 1, "cp852", "cp852_bin", "", 852, "CP852", 1, 1, NULL, NULL},
637   {  82, 1, "swe7", "swe7_bin", "", 0, "", 1, 1, NULL, NULL},
638   {  93, 1, "geostd8", "geostd8_bin", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
639   {  83, 1, UTF8_MB3, UTF8_MB3"_bin", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
640   {  84, 1, "big5", "big5_bin", "", 65000, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
641   {  85, 1, "euckr", "euckr_bin", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
642   {  86, 1, "gb2312", "gb2312_bin", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
643   {  87, 1, "gbk", "gbk_bin", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
644   {  88, 1, "sjis", "sjis_bin", "", 932,  "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
645   {  89, 1, "tis620", "tis620_bin", "", 874, "TIS620", 1, 1, NULL, NULL},
646   {  90, 1, "ucs2", "ucs2_bin", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
647   {  91, 1, "ujis", "ujis_bin", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
648   {  94, 1, "latin1", "latin1_spanish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
649   {  96, 1, "cp932", "cp932_bin", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
650   {  99, 1, "cp1250", "cp1250_polish_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
651   {  98, 1, "eucjpms", "eucjpms_bin", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
652   { 101, 1, "utf16", "utf16_unicode_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
653   { 102, 1, "utf16", "utf16_icelandic_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
654   { 103, 1, "utf16", "utf16_latvian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
655   { 104, 1, "utf16", "utf16_romanian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
656   { 105, 1, "utf16", "utf16_slovenian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
657   { 106, 1, "utf16", "utf16_polish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
658   { 107, 1, "utf16", "utf16_estonian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
659   { 108, 1, "utf16", "utf16_spanish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
660   { 109, 1, "utf16", "utf16_swedish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
661   { 110, 1, "utf16", "utf16_turkish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
662   { 111, 1, "utf16", "utf16_czech_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
663   { 112, 1, "utf16", "utf16_danish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
664   { 113, 1, "utf16", "utf16_lithunian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
665   { 114, 1, "utf16", "utf16_slovak_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
666   { 115, 1, "utf16", "utf16_spanish2_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
667   { 116, 1, "utf16", "utf16_roman_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
668   { 117, 1, "utf16", "utf16_persian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
669   { 118, 1, "utf16", "utf16_esperanto_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
670   { 120, 1, "utf16", "utf16_sinhala_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
671   { 121, 1, "utf16", "utf16_german2_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
672   { 122, 1, "utf16", "utf16_croatian_mysql561_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
673   { 123, 1, "utf16", "utf16_unicode_520_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
674   { 124, 1, "utf16", "utf16_vietnamese_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
675   { 128, 1, "ucs2", "ucs2_unicode_ci", "", 1200, "UCS-2BE", 2, 2,  mysql_mbcharlen_ucs2, check_mb_ucs2},
676   { 129, 1, "ucs2", "ucs2_icelandic_ci", "", 1200, "UCS-2BE", 2, 2,  mysql_mbcharlen_ucs2, check_mb_ucs2},
677   { 130, 1, "ucs2", "ucs2_latvian_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
678   { 131, 1, "ucs2", "ucs2_romanian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
679   { 132, 1, "ucs2", "ucs2_slovenian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
680   { 133, 1, "ucs2", "ucs2_polish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
681   { 134, 1, "ucs2", "ucs2_estonian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
682   { 135, 1, "ucs2", "ucs2_spanish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
683   { 136, 1, "ucs2", "ucs2_swedish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
684   { 137, 1, "ucs2", "ucs2_turkish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
685   { 138, 1, "ucs2", "ucs2_czech_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
686   { 139, 1, "ucs2", "ucs2_danish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
687   { 140, 1, "ucs2", "ucs2_lithuanian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
688   { 141, 1, "ucs2", "ucs2_slovak_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
689   { 142, 1, "ucs2", "ucs2_spanish2_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
690   { 143, 1, "ucs2", "ucs2_roman_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
691   { 144, 1, "ucs2", "ucs2_persian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
692   { 145, 1, "ucs2", "ucs2_esperanto_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
693   { 146, 1, "ucs2", "ucs2_hungarian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
694   { 147, 1, "ucs2", "ucs2_sinhala_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
695   { 148, 1, "ucs2", "ucs2_german2_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
696   { 149, 1, "ucs2", "ucs2_croatian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
697   { 150, 1, "ucs2", "ucs2_unicode_520_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
698   { 151, 1, "ucs2", "ucs2_vietnamese_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
699   { 159, 1, "ucs2", "ucs2_general_mysql500_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
700   { 160, 1, "utf32", "utf32_unicode_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
701   { 161, 1, "utf32", "utf32_icelandic_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
702   { 162, 1, "utf32", "utf32_latvian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
703   { 163, 1, "utf32", "utf32_romanian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
704   { 164, 1, "utf32", "utf32_slovenian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
705   { 165, 1, "utf32", "utf32_polish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
706   { 166, 1, "utf32", "utf32_estonian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
707   { 167, 1, "utf32", "utf32_spanish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
708   { 168, 1, "utf32", "utf32_swedish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
709   { 169, 1, "utf32", "utf32_turkish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
710   { 170, 1, "utf32", "utf32_czech_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
711   { 171, 1, "utf32", "utf32_danish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
712   { 172, 1, "utf32", "utf32_lithuanian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
713   { 173, 1, "utf32", "utf32_slovak_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
714   { 174, 1, "utf32", "utf32_spanish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
715   { 175, 1, "utf32", "utf32_roman_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
716   { 176, 1, "utf32", "utf32_persian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
717   { 177, 1, "utf32", "utf32_esperanto_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
718   { 178, 1, "utf32", "utf32_hungarian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
719   { 179, 1, "utf32", "utf32_sinhala_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
720   { 180, 1, "utf32", "utf32_german2_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
721   { 181, 1, "utf32", "utf32_croatian_mysql561_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
722   { 182, 1, "utf32", "utf32_unicode_520_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
723   { 183, 1, "utf32", "utf32_vietnamese_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
724 
725   { 192, 1, UTF8_MB3, UTF8_MB3"_general_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
726   { 193, 1, UTF8_MB3, UTF8_MB3"_icelandic_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
727   { 194, 1, UTF8_MB3, UTF8_MB3"_latvian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
728   { 195, 1, UTF8_MB3, UTF8_MB3"_romanian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
729   { 196, 1, UTF8_MB3, UTF8_MB3"_slovenian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
730   { 197, 1, UTF8_MB3, UTF8_MB3"_polish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
731   { 198, 1, UTF8_MB3, UTF8_MB3"_estonian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
732   { 199, 1, UTF8_MB3, UTF8_MB3"_spanish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
733   { 119, 1, UTF8_MB3, UTF8_MB3"_spanish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
734   { 200, 1, UTF8_MB3, UTF8_MB3"_swedish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
735   { 201, 1, UTF8_MB3, UTF8_MB3"_turkish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
736   { 202, 1, UTF8_MB3, UTF8_MB3"_czech_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
737   { 203, 1, UTF8_MB3, UTF8_MB3"_danish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
738   { 204, 1, UTF8_MB3, UTF8_MB3"_lithuanian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
739   { 205, 1, UTF8_MB3, UTF8_MB3"_slovak_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
740   { 206, 1, UTF8_MB3, UTF8_MB3"_spanish2_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
741   { 207, 1, UTF8_MB3, UTF8_MB3"_roman_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
742   { 208, 1, UTF8_MB3, UTF8_MB3"_persian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
743   { 209, 1, UTF8_MB3, UTF8_MB3"_esperanto_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
744   { 210, 1, UTF8_MB3, UTF8_MB3"_hungarian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
745   { 211, 1, UTF8_MB3, UTF8_MB3"_sinhala_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
746   { 212, 1, UTF8_MB3, UTF8_MB3"_german2_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
747   { 214, 1, UTF8_MB3, UTF8_MB3"_unicode_520_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
748   { 215, 1, UTF8_MB3, UTF8_MB3"_vietnamese_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
749   { 213, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
750   { 223, 1, UTF8_MB3, UTF8_MB3"_general_mysql500_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
751 
752   { 224, 1, UTF8_MB4, UTF8_MB4"_unicode_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
753   { 225, 1, UTF8_MB4, UTF8_MB4"_icelandic_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
754   { 226, 1, UTF8_MB4, UTF8_MB4"_latvian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
755   { 227, 1, UTF8_MB4, UTF8_MB4"_romanian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
756   { 228, 1, UTF8_MB4, UTF8_MB4"_slovenian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
757   { 229, 1, UTF8_MB4, UTF8_MB4"_polish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
758   { 230, 1, UTF8_MB4, UTF8_MB4"_estonian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
759   { 231, 1, UTF8_MB4, UTF8_MB4"_spanish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
760   { 232, 1, UTF8_MB4, UTF8_MB4"_swedish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
761   { 233, 1, UTF8_MB4, UTF8_MB4"_turkish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
762   { 234, 1, UTF8_MB4, UTF8_MB4"_czech_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
763   { 235, 1, UTF8_MB4, UTF8_MB4"_danish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
764   { 236, 1, UTF8_MB4, UTF8_MB4"_lithuanian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
765   { 237, 1, UTF8_MB4, UTF8_MB4"_slovak_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
766   { 238, 1, UTF8_MB4, UTF8_MB4"_spanish2_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
767   { 239, 1, UTF8_MB4, UTF8_MB4"_roman_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
768   { 240, 1, UTF8_MB4, UTF8_MB4"_persian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
769   { 241, 1, UTF8_MB4, UTF8_MB4"_esperanto_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
770   { 242, 1, UTF8_MB4, UTF8_MB4"_hungarian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
771   { 243, 1, UTF8_MB4, UTF8_MB4"_sinhala_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
772   { 244, 1, UTF8_MB4, UTF8_MB4"_german2_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
773   { 245, 1, UTF8_MB4, UTF8_MB4"_croatian_mysql561_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
774   { 246, 1, UTF8_MB4, UTF8_MB4"_unicode_520_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
775   { 247, 1, UTF8_MB4, UTF8_MB4"_vietnamese_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
776   { 248, 1, "gb18030", "gb18030_chinese_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
777   { 249, 1, "gb18030", "gb18030_bin", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
778   { 250, 1, "gb18030", "gb18030_unicode_520_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
779 
780 
781   { 254, 1, UTF8_MB3, UTF8_MB3"_general_cs", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8, check_mb_utf8_valid},
782 
783   { 255, 1, UTF8_MB4, UTF8_MB4"_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
784   { 256, 1, UTF8_MB4, UTF8_MB4"_de_pb_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
785   { 257, 1, UTF8_MB4, UTF8_MB4"_is_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
786   { 258, 1, UTF8_MB4, UTF8_MB4"_lv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
787   { 259, 1, UTF8_MB4, UTF8_MB4"_ro_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
788   { 260, 1, UTF8_MB4, UTF8_MB4"_sl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
789   { 261, 1, UTF8_MB4, UTF8_MB4"_pl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
790   { 262, 1, UTF8_MB4, UTF8_MB4"_et_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
791   { 263, 1, UTF8_MB4, UTF8_MB4"_es_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
792   { 264, 1, UTF8_MB4, UTF8_MB4"_sv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
793   { 265, 1, UTF8_MB4, UTF8_MB4"_tr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
794   { 266, 1, UTF8_MB4, UTF8_MB4"_cs_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
795   { 267, 1, UTF8_MB4, UTF8_MB4"_da_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
796   { 268, 1, UTF8_MB4, UTF8_MB4"_lt_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
797   { 269, 1, UTF8_MB4, UTF8_MB4"_sk_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
798   { 270, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
799   { 271, 1, UTF8_MB4, UTF8_MB4"_la_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
800   { 273, 1, UTF8_MB4, UTF8_MB4"_eo_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
801   { 274, 1, UTF8_MB4, UTF8_MB4"_hu_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
802   { 275, 1, UTF8_MB4, UTF8_MB4"_hr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
803   { 277, 1, UTF8_MB4, UTF8_MB4"_vi_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
804   { 278, 1, UTF8_MB4, UTF8_MB4"_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
805   { 279, 1, UTF8_MB4, UTF8_MB4"_de_pb__0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
806   { 280, 1, UTF8_MB4, UTF8_MB4"_is_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
807   { 281, 1, UTF8_MB4, UTF8_MB4"_lv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
808   { 282, 1, UTF8_MB4, UTF8_MB4"_ro_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
809   { 283, 1, UTF8_MB4, UTF8_MB4"_sl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
810   { 284, 1, UTF8_MB4, UTF8_MB4"_pl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
811   { 285, 1, UTF8_MB4, UTF8_MB4"_et_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
812   { 286, 1, UTF8_MB4, UTF8_MB4"_es_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
813   { 287, 1, UTF8_MB4, UTF8_MB4"_sv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
814   { 288, 1, UTF8_MB4, UTF8_MB4"_tr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
815   { 289, 1, UTF8_MB4, UTF8_MB4"_cs_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
816   { 290, 1, UTF8_MB4, UTF8_MB4"_da_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
817   { 291, 1, UTF8_MB4, UTF8_MB4"_lt_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
818   { 292, 1, UTF8_MB4, UTF8_MB4"_sk_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
819   { 293, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
820   { 294, 1, UTF8_MB4, UTF8_MB4"_la_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
821   { 296, 1, UTF8_MB4, UTF8_MB4"_eo_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
822   { 297, 1, UTF8_MB4, UTF8_MB4"_hu_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
823   { 298, 1, UTF8_MB4, UTF8_MB4"_hr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
824   { 300, 1, UTF8_MB4, UTF8_MB4"_vi_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
825   { 303, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
826   { 304, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs_ks", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
827   { 305, 1, UTF8_MB4, UTF8_MB4"_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
828   { 306, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
829   { 307, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
830   { 576, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
831   { 577, 1, UTF8_MB3, UTF8_MB3"_myanmar_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
832   { 578, 1, UTF8_MB3, UTF8_MB3"_thai_520_w2", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
833   { 608, 1, UTF8_MB4, UTF8_MB4"_croatian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
834   { 609, 1, UTF8_MB4, UTF8_MB4"_myanmar_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
835   { 610, 1, UTF8_MB4, UTF8_MB4"_thai_520_w2", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
836   { 640, 1, "ucs2", "ucs2_croatian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
837   { 641, 1, "ucs2", "ucs2_myanmar_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
838   { 642, 1, "ucs2", "ucs2_thai_520_w2", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
839   { 672, 1, "utf16", "utf16_croatian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
840   { 673, 1, "utf16", "utf16_myanmar_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
841   { 674, 1, "utf16", "utf16_thai_520_w2", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
842   { 736, 1, "utf32", "utf32_croatian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
843   { 737, 1, "utf32", "utf32_myanmar_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
844   { 738, 1, "utf32", "utf32_thai_520_w2", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
845   {1025, 1, "big5","big5_chinese_nopad_ci", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
846   {1027, 1, "dec8", "dec8_swedisch_nopad_ci", "", 0, "DEC", 1, 1, NULL, NULL},
847   {1028, 1, "cp850", "cp850_general_nopad_ci", "", 850, "CP850", 1, 1, NULL, NULL},
848   {1030, 1, "hp8", "hp8_english_nopad_ci", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
849   {1031, 1, "koi8r", "koi8r_general_nopad_ci", "", 878, "KOI8R", 1, 1, NULL, NULL},
850   {1032, 1, "latin1", "latin1_swedish_nopad_ci", "", 850, "LATIN1", 1, 1, NULL, NULL},
851   {1033, 1, "latin2", "latin2_general_nopad_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
852   {1034, 1, "swe7", "swe7_swedish_nopad_ci", "", 20107, "", 1, 1, NULL, NULL},
853   {1035, 1, "ascii", "ascii_general_nopad_ci", "", 1252, "ASCII", 1, 1, NULL, NULL},
854   {1036, 1, "ujis", "ujis_japanese_nopad_ci", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
855   {1037, 1, "sjis", "sjis_japanese_nopad_ci", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
856   {1040, 1, "hebrew", "hebrew_general_nopad_ci", "", 1255, "HEBREW", 1, 1, NULL, NULL},
857   {1042, 1, "tis620", "tis620_thai_nopad_ci", "", 874, "TIS620", 1, 1, NULL, NULL},
858   {1043, 1, "euckr", "euckr_korean_nopad_ci", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
859   {1046, 1, "koi8u", "koi8u_general_nopad_ci", "", 20866, "KOI8U", 1, 1, NULL, NULL},
860   {1048, 1, "gb2312", "gb2312_chinese_nopad_ci", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
861   {1049, 1, "greek", "greek_general_nopad_ci", "", 28597, "GREEK", 1, 1, NULL, NULL},
862   {1050, 1, "cp1250", "cp1250_general_nopad_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
863   {1052, 1, "gbk", "gbk_chinese_nopad_ci", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
864   {1054, 1, "latin5", "latin5_turkish_nopad_ci", "", 1254, "LATIN5", 1, 1, NULL, NULL},
865   {1056, 1, "armscii8", "armscii8_general_nopad_ci", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
866   {1057, 1, UTF8_MB3, UTF8_MB3"_general_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
867   {1059, 1, "ucs2", "ucs2_general_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
868   {1060, 1, "cp866", "cp866_general_nopad_ci", "", 866, "CP866", 1, 1, NULL, NULL},
869   {1061, 1, "keybcs2", "keybcs2_general_nopad_ci", "", 0, "", 1, 1, NULL, NULL},
870   {1062, 1, "macce", "macce_general_nopad_ci", "", 10029, "CP1282", 1, 1, NULL, NULL},
871   {1063, 1, "macroman", "macroman_general_nopad_ci", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
872   {1064, 1, "cp852", "cp852_general_nopad_ci", "", 852, "CP852", 1, 1, NULL, NULL},
873   {1065, 1, "latin7", "latin7_general_nopad_ci", "", 28603, "LATIN7", 1, 1, NULL, NULL},
874   {1067, 1, "macce", "macce_nopad_bin", "", 10029, "CP1282", 1, 1, NULL, NULL},
875   {1069, 1, UTF8_MB4, UTF8_MB4"_general_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
876   {1070, 1, UTF8_MB4, UTF8_MB4"_general_nopad_bin", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
877   {1071, 1, "latin1", "latin1_nopad_bin", "", 850, "LATIN1", 1, 1, NULL, NULL},
878   {1074, 1, "cp1251", "cp1251_nopad_bin", "", 1251, "CP1251", 1, 1, NULL, NULL},
879   {1075, 1, "cp1251", "cp1251_general_nopad_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
880   {1077, 1, "macroman", "macroman_nopad_bin", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
881   {1078, 1, "utf16", "utf16_general_nopad_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
882   {1079, 1, "utf16", "utf16_nopad_bin", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
883   {1080, 1, "utf16le", "utf16le_general_nopad_ci", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
884   {1081, 1, "cp1256", "cp1256_general_nopad_ci", "", 1256, "CP1256", 1, 1, NULL, NULL},
885   {1082, 1, "cp1257", "cp1257_nopad_bin", "", 1257, "CP1257", 1, 1, NULL, NULL},
886   {1083, 1, "cp1257", "cp1257_general_nopad_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
887   {1084, 1, "utf32", "utf32_general_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
888   {1085, 1, "utf32", "utf32_nopad_bin", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
889   {1086, 1, "utf16le", "utf16le_nopad_bin", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
890   {1088, 1, "armscii8", "armscii8_nopad_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
891   {1089, 1, "ascii", "ascii_nopad_bin", "", 1252, "ASCII", 1, 1, NULL, NULL},
892   {1090, 1, "cp1250", "cp1250_nopad_bin", "", 1250, "CP1250", 1, 1, NULL, NULL},
893   {1091, 1, "cp1256", "cp1256_nopad_bin", "", 1256, "CP1256", 1, 1, NULL, NULL},
894   {1092, 1, "cp866", "cp866_nopad_bin", "", 866, "CP866", 1, 1, NULL, NULL},
895   {1093, 1, "dec8", "dec8_nopad_bin", "", 0, "DEC", 1, 1, NULL, NULL},
896   {1094, 1, "greek", "greek_nopad_bin", "", 28597, "GREEK", 1, 1, NULL, NULL},
897   {1095, 1, "hebrew", "hebrew_nopad_bin", "", 1255, "HEBREW", 1, 1, NULL, NULL},
898   {1096, 1, "hp8", "hp8_nopad_bin", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
899   {1097, 1, "keybcs2", "keybcs2_nopad_bin", "", 0, "", 1, 1, NULL, NULL},
900   {1098, 1, "koi8r", "koi8r_nopad_bin", "", 878, "KOI8R", 1, 1, NULL, NULL},
901   {1099, 1, "koi8u", "koi8u_nopad_bin", "", 20866, "KOI8U", 1, 1, NULL, NULL},
902   {1101, 1, "latin2", "latin2_nopad_bin", "", 852, "LATIN2", 1, 1, NULL, NULL},
903   {1102, 1, "latin5", "latin5_nopad_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
904   {1103, 1, "latin7", "latin7_nopad_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
905   {1104, 1, "cp850", "cp850_nopad_bin", "", 850, "CP850", 1, 1, NULL, NULL},
906   {1105, 1, "cp852", "cp852_nopad_bin", "", 852, "CP852", 1, 1, NULL, NULL},
907   {1106, 1, "swe7", "swe7_nopad_bin", "", 20107, "", 1, 1, NULL, NULL},
908   {1107, 1, UTF8_MB3, UTF8_MB3"_nopad_bin", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
909   {1108, 1, "big5","big5_nopad_bin", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
910   {1109, 1, "euckr", "euckr_nopad_bin", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
911   {1110, 1, "gb2312", "gb2312_nopad_bin", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
912   {1111, 1, "gbk", "gbk_nopad_bin", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
913   {1112, 1, "sjis", "sjis_nopad_bin", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
914   {1113, 1, "tis620", "tis620_nopad_bin", "", 874, "TIS620", 1, 1, NULL, NULL},
915   {1114, 1, "ucs2", "ucs2_nopad_bin", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
916   {1115, 1, "ujis", "ujis_nopad_bin", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
917   {1116, 1, "geostd8", "geostd8_general_nopad_ci", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
918   {1117, 1, "geostd8", "geostd8_nopad_bin", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
919   {1119, 1, "cp932", "cp932_japanese_nopad_ci", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
920   {1120, 1, "cp932", "cp932_nopad_bin", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
921   {1121, 1, "eucjpms", "eucjpms_japanese_nopad_ci", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
922   {1122, 1, "eucjpms", "eucjpms_nopad_bin", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
923   {1125, 1, "utf16", "utf16_unicode_nopad_ci", "", 1200, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
924   {1147, 1, "utf16", "utf16_unicode_520_nopad_ci", "", 1200, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
925   {1152, 1, "ucs2", "ucs2_unicode_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
926   {1174, 1, "ucs2", "ucs2_unicode_520_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
927   {1184, 1, "utf32", "utf32_unicode_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
928   {1206, 1, "utf32", "utf32_unicode_520_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
929   {1216, 1, UTF8_MB3, UTF8_MB3"_unicode_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
930   {1238, 1, UTF8_MB3, UTF8_MB3"_unicode_520_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
931   {1248, 1, UTF8_MB4, UTF8_MB4"_unicode_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
932   {1270, 1, UTF8_MB4, UTF8_MB4"_unicode_520_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
933   {   0, 0, NULL, NULL, NULL, 0, NULL, 0, 0, NULL, NULL}
934 };
935 /* }}} */
936 
937 
938 /* {{{ mysql_find_charset_nr */
mysql_find_charset_nr(unsigned int charsetnr)939 const MARIADB_CHARSET_INFO * mysql_find_charset_nr(unsigned int charsetnr)
940 {
941   const MARIADB_CHARSET_INFO * c = mariadb_compiled_charsets;
942 
943   do {
944     if (c->nr == charsetnr) {
945       return(c);
946     }
947     ++c;
948   } while (c[0].nr != 0);
949   return(NULL);
950 }
951 /* }}} */
952 
953 
954 /* {{{ mysql_find_charset_name */
mysql_find_charset_name(const char * name)955 MARIADB_CHARSET_INFO * mysql_find_charset_name(const char *name)
956 {
957   MARIADB_CHARSET_INFO *c = (MARIADB_CHARSET_INFO *)mariadb_compiled_charsets;
958   const char *csname;
959 
960   if (!strcasecmp(name, MADB_AUTODETECT_CHARSET_NAME))
961     csname= madb_get_os_character_set();
962   else
963     csname= (char *)name;
964 
965   do {
966     if (!strcasecmp(c->csname, csname)) {
967       return(c);
968     }
969     ++c;
970   } while (c[0].nr != 0);
971   return(NULL);
972 }
973 /* }}} */
974 
975 
976 /* {{{ mysql_cset_escape_quotes */
mysql_cset_escape_quotes(const MARIADB_CHARSET_INFO * cset,char * newstr,const char * escapestr,size_t escapestr_len)977 size_t mysql_cset_escape_quotes(const MARIADB_CHARSET_INFO *cset, char *newstr,
978                     const char * escapestr, size_t escapestr_len )
979 {
980   const char   *newstr_s = newstr;
981   const char   *newstr_e = newstr + 2 * escapestr_len;
982   const char   *end = escapestr + escapestr_len;
983   my_bool  escape_overflow = FALSE;
984 
985   for (;escapestr < end; escapestr++) {
986     unsigned int len = 0;
987     /* check unicode characters */
988 
989     if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
990 
991       /* check possible overflow */
992       if ((newstr + len) > newstr_e) {
993         escape_overflow = TRUE;
994         break;
995       }
996       /* copy mb char without escaping it */
997       while (len--) {
998         *newstr++ = *escapestr++;
999       }
1000       escapestr--;
1001       continue;
1002     }
1003     if (*escapestr == '\'') {
1004       if (newstr + 2 > newstr_e) {
1005         escape_overflow = TRUE;
1006         break;
1007       }
1008       *newstr++ = '\'';
1009       *newstr++ = '\'';
1010     } else {
1011       if (newstr + 1 > newstr_e) {
1012         escape_overflow = TRUE;
1013         break;
1014       }
1015       *newstr++ = *escapestr;
1016     }
1017   }
1018   *newstr = '\0';
1019 
1020   if (escape_overflow) {
1021     return((size_t)~0);
1022   }
1023   return((size_t)(newstr - newstr_s));
1024 }
1025 /* }}} */
1026 
1027 
1028 /* {{{ mysql_cset_escape_slashes */
mysql_cset_escape_slashes(const MARIADB_CHARSET_INFO * cset,char * newstr,const char * escapestr,size_t escapestr_len)1029 size_t mysql_cset_escape_slashes(const MARIADB_CHARSET_INFO * cset, char *newstr,
1030                      const char * escapestr, size_t escapestr_len )
1031 {
1032   const char   *newstr_s = newstr;
1033   const char   *newstr_e = newstr + 2 * escapestr_len;
1034   const char   *end = escapestr + escapestr_len;
1035   my_bool  escape_overflow = FALSE;
1036 
1037   for (;escapestr < end; escapestr++) {
1038     char esc = '\0';
1039     unsigned int len = 0;
1040 
1041     /* check unicode characters */
1042     if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
1043       /* check possible overflow */
1044       if ((newstr + len) > newstr_e) {
1045         escape_overflow = TRUE;
1046         break;
1047       }
1048       /* copy mb char without escaping it */
1049       while (len--) {
1050         *newstr++ = *escapestr++;
1051       }
1052       escapestr--;
1053       continue;
1054     }
1055     if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
1056       esc = *escapestr;
1057     } else {
1058       switch (*escapestr) {
1059         case 0:
1060           esc = '0';
1061           break;
1062         case '\n':
1063           esc = 'n';
1064           break;
1065         case '\r':
1066           esc = 'r';
1067           break;
1068         case '\\':
1069         case '\'':
1070         case '"':
1071           esc = *escapestr;
1072           break;
1073         case '\032':
1074           esc = 'Z';
1075           break;
1076       }
1077     }
1078     if (esc) {
1079       if (newstr + 2 > newstr_e) {
1080         escape_overflow = TRUE;
1081         break;
1082       }
1083       /* copy escaped character */
1084       *newstr++ = '\\';
1085       *newstr++ = esc;
1086     } else {
1087       if (newstr + 1 > newstr_e) {
1088         escape_overflow = TRUE;
1089         break;
1090       }
1091       /* copy non escaped character */
1092       *newstr++ = *escapestr;
1093     }
1094   }
1095   *newstr = '\0';
1096 
1097   if (escape_overflow) {
1098     return((size_t)~0);
1099   }
1100   return((size_t)(newstr - newstr_s));
1101 }
1102 /* }}} */
1103 
1104 /* {{{ MADB_OS_CHARSET */
1105 struct st_madb_os_charset {
1106   const char *identifier;
1107   const char *description;
1108   const char *charset;
1109   const char *iconv_cs;
1110   unsigned char supported;
1111 };
1112 
1113 #define MADB_CS_UNSUPPORTED 0
1114 #define MADB_CS_APPROX 1
1115 #define MADB_CS_EXACT 2
1116 
1117 /* Please add new character sets at the end. */
1118 struct st_madb_os_charset MADB_OS_CHARSET[]=
1119 {
1120 #ifdef _WIN32
1121   /* Windows code pages */
1122   {"037", "IBM EBCDIC US-Canada", NULL, NULL, MADB_CS_UNSUPPORTED},
1123   {"437", "OEM United States", "cp850", NULL, MADB_CS_APPROX},
1124   {"500", "IBM EBCDIC International", NULL, NULL, MADB_CS_UNSUPPORTED},
1125   {"708", "Arabic (ASMO 708)", NULL, NULL, MADB_CS_UNSUPPORTED},
1126   {"709", "Arabic (ASMO-449+, BCON V4)", NULL, NULL, MADB_CS_UNSUPPORTED},
1127   {"710", "Transparent Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1128   {"720", "Arabic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1129   {"737", "Greek (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1130   {"775", "Baltic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1131   {"850", "Western European (DOS)", "cp850", NULL, MADB_CS_EXACT},
1132   {"852", "Central European (DOS)", "cp852", NULL, MADB_CS_EXACT},
1133   {"855", "Cyrillic (primarily Russian)", NULL, NULL, MADB_CS_UNSUPPORTED},
1134   {"857", "Turkish (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1135   {"858", "OEM Multilingual Latin 1 + Euro symbol", "cp850", NULL, MADB_CS_EXACT},
1136   {"860", "Portuguese (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1137   {"861", "Icelandic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1138   {"862", "Hebrew (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1139   {"863", "French Canadian (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1140   {"864", "Arabic (864)", NULL, NULL, MADB_CS_UNSUPPORTED},
1141   {"865", "Nordic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1142   {"866", "Cyrillic (DOS)", "cp866", NULL, MADB_CS_EXACT},
1143   {"869", "Greek, Modern (DOS)", "greek", NULL, MADB_CS_EXACT},
1144   {"870", "IBM EBCDIC Multilingual Latin 2", NULL, NULL, MADB_CS_UNSUPPORTED},
1145   {"874", "Thai (Windows)", "tis620", NULL, MADB_CS_UNSUPPORTED},
1146   {"875", "Greek Modern", NULL, NULL, MADB_CS_UNSUPPORTED},
1147   {"932", "Japanese (Shift-JIS)", "cp932", NULL, MADB_CS_EXACT},
1148   {"936", "Chinese Simplified (GB2312)", "gbk", NULL, MADB_CS_EXACT},
1149   {"949", "ANSI/OEM Korean (Unified Hangul Code)", "euckr", NULL, MADB_CS_EXACT},
1150   {"950", "Chinese Traditional (Big5)", "big5", NULL, MADB_CS_EXACT},
1151   {"1026", "EBCDIC Turkish (Latin 5)", NULL, NULL, MADB_CS_UNSUPPORTED},
1152   {"1047", "EBCDIC Latin 1/Open System", NULL, NULL, MADB_CS_UNSUPPORTED},
1153   {"1140", "IBM EBCDIC (US-Canada-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1154   {"1141", "IBM EBCDIC (Germany-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1155   {"1142", "IBM EBCDIC (Denmark-Norway-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1156   {"1143", "IBM EBCDIC (Finland-Sweden-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1157   {"1144", "IBM EBCDIC (Italy-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1158   {"1145", "IBM EBCDIC (Spain-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1159   {"1146", "IBM EBCDIC (UK-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1160   {"1147", "IBM EBCDIC (France-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1161   {"1148", "IBM EBCDIC (International-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1162   {"1149", "IBM EBCDIC (Icelandic-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1163   {"1200", "UTF-16, little endian byte order", NULL, NULL, MADB_CS_UNSUPPORTED},
1164   {"1201", "UTF-16, big endian byte order", "utf16", NULL, MADB_CS_UNSUPPORTED},
1165   {"1250", "Central European (Windows)", "cp1250", NULL, MADB_CS_EXACT},
1166   {"1251", "Cyrillic (Windows)", "cp1251", NULL, MADB_CS_EXACT},
1167   {"1252", "Western European (Windows)", "latin1", NULL, MADB_CS_EXACT},
1168   {"1253", "Greek (Windows)", "greek", NULL, MADB_CS_EXACT},
1169   {"1254", "Turkish (Windows)", "latin5", NULL, MADB_CS_EXACT},
1170   {"1255", "Hebrew (Windows)", "hewbrew", NULL, MADB_CS_EXACT},
1171   {"1256", "Arabic (Windows)", "cp1256", NULL, MADB_CS_EXACT},
1172   {"1257", "Baltic (Windows)","cp1257", NULL, MADB_CS_EXACT},
1173   {"1258", "Vietnamese (Windows)", NULL, NULL, MADB_CS_UNSUPPORTED},
1174   {"1361", "Korean (Johab)", NULL, NULL, MADB_CS_UNSUPPORTED},
1175   {"10000", "Western European (Mac)", "macroman", NULL, MADB_CS_EXACT},
1176   {"10001", "Japanese (Mac)", "sjis", NULL, MADB_CS_EXACT},
1177   {"10002", "Chinese Traditional (Mac)", "big5", NULL, MADB_CS_EXACT},
1178   {"10003", "Korean (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1179   {"10004", "Arabic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1180   {"10005", "Hebrew (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1181   {"10006", "Greek (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1182   {"10007", "Cyrillic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1183   {"10008", "Chinese Simplified (Mac)", "gb2312", NULL, MADB_CS_EXACT},
1184   {"10010", "Romanian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1185   {"10017", "Ukrainian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1186   {"10021", "Thai (Mac)", "tis620", NULL, MADB_CS_EXACT},
1187   {"10029", "Central European (Mac)", "macce", NULL, MADB_CS_EXACT},
1188   {"10079", "Icelandic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1189   {"10081", "Turkish (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1190   {"10082", "Croatian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1191   {"12000", "Unicode UTF-32, little endian byte order", NULL, NULL, MADB_CS_UNSUPPORTED},
1192   {"12001", "Unicode UTF-32, big endian byte order", "utf32", NULL, MADB_CS_UNSUPPORTED},
1193   {"20000", "Chinese Traditional (CNS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1194   {"20001", "TCA Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1195   {"20002", "Chinese Traditional (Eten)", NULL, NULL, MADB_CS_UNSUPPORTED},
1196   {"20003", "IBM5550 Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1197   {"20004", "TeleText Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1198   {"20005", "Wang Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1199   {"20105", "Western European (IA5)", NULL, NULL, MADB_CS_UNSUPPORTED},
1200   {"20106", "IA5 German (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1201   {"20107", "Swedish (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1202   {"20108", "Norwegian (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1203   {"20127", "US-ASCII (7-bit)", "ascii", NULL, MADB_CS_EXACT},
1204   {"20261", "T.61", NULL, NULL, MADB_CS_UNSUPPORTED},
1205   {"20269", "Non-Spacing Accent", NULL, NULL, MADB_CS_UNSUPPORTED},
1206   {"20273", "EBCDIC Germany", NULL, NULL, MADB_CS_UNSUPPORTED},
1207   {"20277", "EBCDIC Denmark-Norway", NULL, NULL, MADB_CS_UNSUPPORTED},
1208   {"20278", "EBCDIC Finland-Sweden", NULL, NULL, MADB_CS_UNSUPPORTED},
1209   {"20280", "EBCDIC Italy", NULL, NULL, MADB_CS_UNSUPPORTED},
1210   {"20284", "EBCDIC Latin America-Spain", NULL, NULL, MADB_CS_UNSUPPORTED},
1211   {"20285", "EBCDIC United Kingdom", NULL, NULL, MADB_CS_UNSUPPORTED},
1212   {"20290", "EBCDIC Japanese Katakana Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1213   {"20297", "EBCDIC France", NULL, NULL, MADB_CS_UNSUPPORTED},
1214   {"20420", "EBCDIC Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1215   {"20423", "EBCDIC Greek", NULL, NULL, MADB_CS_UNSUPPORTED},
1216   {"20424", "EBCDIC Hebrew", NULL, NULL, MADB_CS_UNSUPPORTED},
1217   {"20833", "EBCDIC Korean Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1218   {"20838", "EBCDIC Thai", NULL, NULL, MADB_CS_UNSUPPORTED},
1219   {"20866", "Cyrillic (KOI8-R)", "koi8r", NULL, MADB_CS_EXACT},
1220   {"20871", "EBCDIC Icelandic", NULL, NULL, MADB_CS_UNSUPPORTED},
1221   {"20880", "EBCDIC Cyrillic Russian", NULL, NULL, MADB_CS_UNSUPPORTED},
1222   {"20905", "EBCDIC Turkish", NULL, NULL, MADB_CS_UNSUPPORTED},
1223   {"20924", "EBCDIC Latin 1/Open System (1047 + Euro symbol)", NULL, NULL, MADB_CS_UNSUPPORTED},
1224   {"20932", "Japanese (JIS 0208-1990 and 0121-1990)", "ujis", NULL, MADB_CS_EXACT},
1225   {"20936", "Chinese Simplified (GB2312-80)", "gb2312", NULL, MADB_CS_APPROX},
1226   {"20949", "Korean Wansung", "euckr", NULL, MADB_CS_APPROX},
1227   {"21025", "EBCDIC Cyrillic Serbian-Bulgarian", NULL, NULL, MADB_CS_UNSUPPORTED},
1228   {"21866", "Cyrillic (KOI8-U)", "koi8u", NULL, MADB_CS_EXACT},
1229   {"28591", "Western European (ISO)", "latin1", NULL, MADB_CS_APPROX},
1230   {"28592", "Central European (ISO)", "latin2", NULL, MADB_CS_EXACT},
1231   {"28593", "Latin 3", NULL, NULL, MADB_CS_UNSUPPORTED},
1232   {"28594", "Baltic", NULL, NULL, MADB_CS_UNSUPPORTED},
1233   {"28595", "ISO 8859-5 Cyrillic", NULL, NULL, MADB_CS_UNSUPPORTED},
1234   {"28596", "ISO 8859-6 Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1235   {"28597", "ISO 8859-7 Greek", "greek", NULL, MADB_CS_EXACT},
1236   {"28598", "Hebrew (ISO-Visual)", "hebrew", NULL, MADB_CS_EXACT},
1237   {"28599", "ISO 8859-9 Turkish", "latin5", NULL, MADB_CS_EXACT},
1238   {"28603", "ISO 8859-13 Estonian", "latin7", NULL, MADB_CS_EXACT},
1239   {"28605", "8859-15 Latin 9", NULL, NULL, MADB_CS_UNSUPPORTED},
1240   {"29001", "Europa 3", NULL, NULL, MADB_CS_UNSUPPORTED},
1241   {"38598", "ISO 8859-8 Hebrew; Hebrew (ISO-Logical)", "hebrew", NULL, MADB_CS_EXACT},
1242   {"50220", "ISO 2022 Japanese with no halfwidth Katakana", NULL, NULL, MADB_CS_UNSUPPORTED},
1243   {"50221", "ISO 2022 Japanese with halfwidth Katakana", NULL, NULL, MADB_CS_UNSUPPORTED},
1244   {"50222", "ISO 2022 Japanese JIS X 0201-1989", NULL, NULL, MADB_CS_UNSUPPORTED},
1245   {"50225", "ISO 2022 Korean", NULL, NULL, MADB_CS_UNSUPPORTED},
1246   {"50227", "ISO 2022 Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1247   {"50229", "ISO 2022 Traditional Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1248   {"50930", "EBCDIC Japanese (Katakana) Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1249   {"50931", "EBCDIC US-Canada and Japanese", NULL, NULL, MADB_CS_UNSUPPORTED},
1250   {"50933", "EBCDIC Korean Extended and Korean", NULL, NULL, MADB_CS_UNSUPPORTED},
1251   {"50935", "EBCDIC Simplified Chinese Extended and Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1252   {"50936", "EBCDIC Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1253   {"50937", "EBCDIC US-Canada and Traditional Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1254   {"50939", "EBCDIC Japanese (Latin) Extended and Japanese", NULL, NULL, MADB_CS_UNSUPPORTED},
1255   {"51932", "EUC Japanese", "ujis", NULL, MADB_CS_EXACT},
1256   {"51936", "EUC Simplified Chinese; Chinese Simplified (EUC)", "gb2312", NULL, MADB_CS_EXACT},
1257   {"51949", "EUC Korean", "euckr", NULL, MADB_CS_EXACT},
1258   {"51950", "EUC Traditional Chinese", "big5", NULL, MADB_CS_EXACT},
1259   {"52936", "Chinese Simplified (HZ)", NULL, NULL, MADB_CS_UNSUPPORTED},
1260   {"54936", "Chinese Simplified (GB18030)", NULL, NULL, MADB_CS_UNSUPPORTED},
1261   {"57002", "ISCII Devanagari", NULL, NULL, MADB_CS_UNSUPPORTED},
1262   {"57003", "ISCII Bengali", NULL, NULL, MADB_CS_UNSUPPORTED},
1263   {"57004", "ISCII Tamil", NULL, NULL, MADB_CS_UNSUPPORTED},
1264   {"57005", "ISCII Telugu", NULL, NULL, MADB_CS_UNSUPPORTED},
1265   {"57006", "ISCII Assamese", NULL, NULL, MADB_CS_UNSUPPORTED},
1266   {"57007", "ISCII Oriya", NULL, NULL, MADB_CS_UNSUPPORTED},
1267   {"57008", "ISCII Kannada", NULL, NULL, MADB_CS_UNSUPPORTED},
1268   {"57009", "ISCII Malayalam", NULL, NULL, MADB_CS_UNSUPPORTED},
1269   {"57010", "ISCII Gujarati", NULL, NULL, MADB_CS_UNSUPPORTED},
1270   {"57011", "ISCII Punjabi", NULL, NULL, MADB_CS_UNSUPPORTED},
1271   {"65000", "utf-7 Unicode (UTF-7)", NULL, NULL, MADB_CS_UNSUPPORTED},
1272   {"65001", "utf-8 Unicode (UTF-8)", "utf8", NULL, MADB_CS_EXACT},
1273   /* non Windows */
1274 #else
1275   /* iconv encodings */
1276   {"ASCII", "US-ASCII", "ascii", "ASCII", MADB_CS_APPROX},
1277   {"US-ASCII", "US-ASCII", "ascii", "ASCII", MADB_CS_APPROX},
1278   {"Big5", "Chinese for Taiwan Multi-byte set", "big5", "BIG5", MADB_CS_EXACT},
1279   {"CP866", "IBM 866", "cp866", "CP866", MADB_CS_EXACT},
1280   {"IBM-1252", "Catalan Spain", "cp1252", "CP1252", MADB_CS_EXACT},
1281   {"ISCII-DEV", "Hindi", NULL, NULL, MADB_CS_UNSUPPORTED},
1282   {"ISO-8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1283   {"ISO8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1284   {"ISO_8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1285   {"ISO88591", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1286   {"ISO-8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1287   {"ISO8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1288   {"ISO_8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1289   {"ISO885913", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1290   {"ISO-8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1291   {"ISO8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1292   {"ISO_8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1293   {"ISO885915", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1294   {"ISO-8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1295   {"ISO8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1296   {"ISO_8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1297   {"ISO88592", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1298   {"ISO-8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1299   {"ISO8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1300   {"ISO_8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1301   {"ISO88597", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1302   {"ISO-8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1303   {"ISO8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1304   {"ISO_8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1305   {"ISO88598", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1306   {"ISO-8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1307   {"ISO8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1308   {"ISO_8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1309   {"ISO88599", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1310   {"ISO-8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1311   {"ISO8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1312   {"ISO_8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1313   {"ISO88594", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1314   {"ISO-8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1315   {"ISO8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1316   {"ISO_8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1317   {"ISO88595", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1318   {"KOI8-R", "KOI8-R", "koi8r", "KOI8R", MADB_CS_EXACT},
1319   {"koi8r", "KOI8-R", "koi8r", "KOI8R", MADB_CS_EXACT},
1320   {"KOI8-U", "KOI8-U", "koi8u", "KOI8U", MADB_CS_EXACT},
1321   {"koi8u", "KOI8-U", "koi8u", "KOI8U", MADB_CS_EXACT},
1322   {"koi8t", "KOI8-T", NULL, "KOI8-T", MADB_CS_UNSUPPORTED},
1323   {"KOI8-T", "KOI8-T", NULL, "KOI8-T", MADB_CS_UNSUPPORTED},
1324   {"SJIS", "SHIFT_JIS", "sjis", "SJIS", MADB_CS_EXACT},
1325   {"Shift-JIS", "SHIFT_JIS", "sjis", "SJIS", MADB_CS_EXACT},
1326   {"ansi1251", "Cyrillic", "cp1251", "CP1251", MADB_CS_EXACT},
1327   {"cp1251", "Cyrillic", "cp1251", "CP1251", MADB_CS_EXACT},
1328   {"armscii8", "Armenian", "armscii8", "ASMSCII-8", MADB_CS_EXACT},
1329   {"armscii-8", "Armenian", "armscii8", "ASMSCII-8", MADB_CS_EXACT},
1330   {"big5hkscs", "Big5-HKSCS", NULL, NULL, MADB_CS_UNSUPPORTED},
1331   {"cp1255", "Hebrew", "cp1255", "CP1255", MADB_CS_EXACT},
1332   {"eucCN", "GB-2312", "gb2312", "GB2312", MADB_CS_EXACT},
1333   {"eucJP", "UJIS", "ujis", "UJIS", MADB_CS_EXACT},
1334   {"eucKR", "EUC-KR", "euckr", "EUCKR", MADB_CS_EXACT},
1335   {"euctw", "EUC-TW", NULL, NULL, MADB_CS_UNSUPPORTED},
1336   {"gb18030", "GB 18030-2000", "gb18030", "GB18030", MADB_CS_UNSUPPORTED},
1337   {"gb2312", "GB2312", "gb2312", "GB2312", MADB_CS_EXACT},
1338   {"gbk", "GBK", "gbk", "GBK", MADB_CS_EXACT},
1339   {"georgianps", "Georgian", "geostd8", "GEORGIAN-PS", MADB_CS_EXACT},
1340   {"utf8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
1341   {"utf-8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
1342 #endif
1343   {NULL, NULL, NULL, NULL, 0}
1344 };
1345 /* }}} */
1346 
1347 /* {{{ madb_get_os_character_set */
madb_get_os_character_set()1348 const char *madb_get_os_character_set()
1349 {
1350   unsigned int i= 0;
1351   char *p= NULL;
1352 #ifdef _WIN32
1353   char codepage[FN_REFLEN];
1354   snprintf(codepage, FN_REFLEN, "%u", GetConsoleCP() ? GetConsoleCP() : GetACP());
1355   p= codepage;
1356 #elif defined(HAVE_NL_LANGINFO) && defined(HAVE_SETLOCALE)
1357   if (setlocale(LC_CTYPE, ""))
1358     p= nl_langinfo(CODESET);
1359 #endif
1360   if (!p)
1361     return MADB_DEFAULT_CHARSET_NAME;
1362   while (MADB_OS_CHARSET[i].identifier)
1363   {
1364     if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
1365         strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0)
1366       return MADB_OS_CHARSET[i].charset;
1367     i++;
1368   }
1369   return MADB_DEFAULT_CHARSET_NAME;
1370 }
1371 /* }}} */
1372 
1373 /* {{{ madb_get_code_page */
1374 #ifdef _WIN32
madb_get_windows_cp(const char * charset)1375 int madb_get_windows_cp(const char *charset)
1376 {
1377   unsigned int i= 0;
1378   while (MADB_OS_CHARSET[i].identifier)
1379   {
1380     if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
1381         strcmp(MADB_OS_CHARSET[i].charset, charset) == 0)
1382       return atoi(MADB_OS_CHARSET[i].identifier);
1383     i++;
1384   }
1385   return -1;
1386 }
1387 #endif
1388 /* }}} */
1389 
1390 #ifdef HAVE_ICONV
1391 /* {{{ map_charset_name
1392    Changing charset name into something iconv understands, if necessary.
1393    Another purpose it to avoid BOMs in result string, adding BE if necessary
1394    e.g.UTF16 does not work form iconv, while UTF-16 does.
1395  */
map_charset_name(const char * cs_name,my_bool target_cs,char * buffer,size_t buff_len)1396 static void map_charset_name(const char *cs_name, my_bool target_cs, char *buffer, size_t buff_len)
1397 {
1398   char digits[3], endianness[3]= "BE";
1399 
1400   if (sscanf(cs_name, "UTF%2[0-9]%2[LBE]", digits, endianness))
1401   {
1402     /* We should have at least digits. Endianness we write either default(BE), or what we found in the string */
1403     snprintf(buffer, buff_len, "UTF-%s%s", digits, endianness);
1404   }
1405   else
1406   {
1407     /* Not our client - copy as is*/
1408     strncpy(buffer, cs_name, buff_len - 1);
1409     buffer[buff_len - 1]= '\0';
1410   }
1411 
1412   if (target_cs)
1413   {
1414     strncat(buffer, "//TRANSLIT", buff_len - strlen(buffer));
1415   }
1416 }
1417 /* }}} */
1418 #endif
1419 
1420 /* {{{ mariadb_convert_string
1421    Converts string from one charset to another, and writes converted string to given buffer
1422    @param[in]     from
1423    @param[in/out] from_len
1424    @param[in]     from_cs
1425    @param[out]    to
1426    @param[in/out] to_len
1427    @param[in]     to_cs
1428    @param[out]    errorcode
1429 
1430    @return -1 in case of error, bytes used in the "to" buffer, otherwise
1431  */
mariadb_convert_string(const char * from,size_t * from_len,MARIADB_CHARSET_INFO * from_cs,char * to,size_t * to_len,MARIADB_CHARSET_INFO * to_cs,int * errorcode)1432 size_t STDCALL mariadb_convert_string(const char *from __attribute__((unused)),
1433                                       size_t *from_len __attribute__((unused)),
1434                                       MARIADB_CHARSET_INFO *from_cs __attribute__((unused)),
1435                                       char *to __attribute__((unused)),
1436                                       size_t *to_len __attribute__((unused)),
1437                                       MARIADB_CHARSET_INFO *to_cs __attribute__((unused)), int *errorcode)
1438 {
1439 #ifndef HAVE_ICONV
1440   *errorcode= ENOTSUP;
1441   return -1;
1442 #else
1443   iconv_t conv= 0;
1444   size_t rc= -1;
1445   size_t save_len= *to_len;
1446   char to_encoding[128], from_encoding[128];
1447 
1448   *errorcode= 0;
1449 
1450   /* check if conversion is supported */
1451   if (!from_cs || !from_cs->encoding || !from_cs->encoding[0] ||
1452       !to_cs || !to_cs->encoding || !to_cs->encoding[0])
1453   {
1454     *errorcode= EINVAL;
1455     return rc;
1456   }
1457 
1458   map_charset_name(to_cs->encoding, 1, to_encoding, sizeof(to_encoding));
1459   map_charset_name(from_cs->encoding, 0, from_encoding, sizeof(from_encoding));
1460 
1461   if ((conv= iconv_open(to_encoding, from_encoding)) == (iconv_t)-1)
1462   {
1463     *errorcode= errno;
1464     goto error;
1465   }
1466   if ((rc= iconv(conv, IF_WIN(,IF_SOLARIS(,(char **)))&from, from_len, &to, to_len)) == (size_t)-1)
1467   {
1468     *errorcode= errno;
1469     goto error;
1470   }
1471   rc= save_len - *to_len;
1472 error:
1473   if (conv != (iconv_t)-1)
1474     iconv_close(conv);
1475   return rc;
1476 #endif
1477 }
1478 /* }}} */
1479 
1480