1 /****************************************************************************
2 Copyright (C) 2012, 2020, MariaDB Corporation.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either
7 version 2 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; if not see <http://www.gnu.org/licenses>
16 or write to the Free Software Foundation, Inc.,
17 51 Franklin St., Fifth Floor, Boston, MA 02110, USA
18
19 Part of this code includes code from the PHP project which
20 is freely available from http://www.php.net
21 *****************************************************************************/
22
23 /* The implementation for character set support was ported from PHP's mysqlnd
24 extension, written by Andrey Hristov, Georg Richter and Ulf Wendel
25
26 Original file header:
27 +----------------------------------------------------------------------+
28 | PHP Version 5 |
29 +----------------------------------------------------------------------+
30 | Copyright (c) 2006-2011 The PHP Group |
31 +----------------------------------------------------------------------+
32 | This source file is subject to version 3.01 of the PHP license, |
33 | that is bundled with this package in the file LICENSE, and is |
34 | available through the world-wide-web at the following url: |
35 | http://www.php.net/license/3_01.txt |
36 | If you did not receive a copy of the PHP license and are unable to |
37 | obtain it through the world-wide-web, please send a note to |
38 | license@php.net so we can mail you a copy immediately. |
39 +----------------------------------------------------------------------+
40 | Authors: Georg Richter <georg@mysql.com> |
41 | Andrey Hristov <andrey@mysql.com> |
42 | Ulf Wendel <uwendel@mysql.com> |
43 +----------------------------------------------------------------------+
44 */
45
46 #ifndef _WIN32
47 #include <strings.h>
48 #include <string.h>
49 #else
50 #include <string.h>
51 #endif
52 #include <ma_global.h>
53 #include <mariadb_ctype.h>
54 #include <ma_string.h>
55
56 #ifdef HAVE_ICONV
57 #ifdef _WIN32
58 #include "../win-iconv/iconv.h"
59 #else
60 #include <iconv.h>
61 #endif
62 #endif
63
64
65 #if defined(HAVE_NL_LANGINFO) && defined(HAVE_SETLOCALE)
66 #include <locale.h>
67 #include <langinfo.h>
68 #endif
69
70 /*
71 +----------------------------------------------------------------------+
72 | PHP Version 5 |
73 +----------------------------------------------------------------------+
74 | Copyright (c) 2006-2011 The PHP Group |
75 +----------------------------------------------------------------------+
76 | This source file is subject to version 3.01 of the PHP license, |
77 | that is bundled with this package in the file LICENSE, and is |
78 | available through the world-wide-web at the following url: |
79 | http://www.php.net/license/3_01.txt |
80 | If you did not receive a copy of the PHP license and are unable to |
81 | obtain it through the world-wide-web, please send a note to |
82 | license@php.net so we can mail you a copy immediately. |
83 +----------------------------------------------------------------------+
84 | Authors: Georg Richter <georg@mysql.com> |
85 | Andrey Hristov <andrey@mysql.com> |
86 | Ulf Wendel <uwendel@mysql.com> |
87 +----------------------------------------------------------------------+
88 */
89
90 /* {{{ utf8 functions */
check_mb_utf8mb3_sequence(const char * start,const char * end)91 static unsigned int check_mb_utf8mb3_sequence(const char *start, const char *end)
92 {
93 uchar c;
94
95 if (start >= end) {
96 return 0;
97 }
98
99 c = (uchar) start[0];
100
101 if (c < 0x80) {
102 return 1; /* single byte character */
103 }
104 if (c < 0xC2) {
105 return 0; /* invalid mb character */
106 }
107 if (c < 0xE0) {
108 if (start + 2 > end) {
109 return 0; /* too small */
110 }
111 if (!(((uchar)start[1] ^ 0x80) < 0x40)) {
112 return 0;
113 }
114 return 2;
115 }
116 if (c < 0xF0) {
117 if (start + 3 > end) {
118 return 0; /* too small */
119 }
120 if (!(((uchar)start[1] ^ 0x80) < 0x40 && ((uchar)start[2] ^ 0x80) < 0x40 &&
121 (c >= 0xE1 || (uchar)start[1] >= 0xA0))) {
122 return 0; /* invalid utf8 character */
123 }
124 return 3;
125 }
126 return 0;
127 }
128
129
check_mb_utf8_sequence(const char * start,const char * end)130 static unsigned int check_mb_utf8_sequence(const char *start, const char *end)
131 {
132 uchar c;
133
134 if (start >= end) {
135 return 0;
136 }
137
138 c = (uchar) start[0];
139
140 if (c < 0x80) {
141 return 1; /* single byte character */
142 }
143 if (c < 0xC2) {
144 return 0; /* invalid mb character */
145 }
146 if (c < 0xE0) {
147 if (start + 2 > end) {
148 return 0; /* too small */
149 }
150 if (!(((uchar)start[1] ^ 0x80) < 0x40)) {
151 return 0;
152 }
153 return 2;
154 }
155 if (c < 0xF0) {
156 if (start + 3 > end) {
157 return 0; /* too small */
158 }
159 if (!(((uchar)start[1] ^ 0x80) < 0x40 && ((uchar)start[2] ^ 0x80) < 0x40 &&
160 (c >= 0xE1 || (uchar)start[1] >= 0xA0))) {
161 return 0; /* invalid utf8 character */
162 }
163 return 3;
164 }
165 if (c < 0xF5) {
166 if (start + 4 > end) { /* We need 4 characters */
167 return 0; /* too small */
168 }
169
170 /*
171 UTF-8 quick four-byte mask:
172 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
173 Encoding allows to encode U+00010000..U+001FFFFF
174
175 The maximum character defined in the Unicode standard is U+0010FFFF.
176 Higher characters U+00110000..U+001FFFFF are not used.
177
178 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
179 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
180
181 Valid codes:
182 [F0][90..BF][80..BF][80..BF]
183 [F1][80..BF][80..BF][80..BF]
184 [F2][80..BF][80..BF][80..BF]
185 [F3][80..BF][80..BF][80..BF]
186 [F4][80..8F][80..BF][80..BF]
187 */
188
189 if (!(((uchar)start[1] ^ 0x80) < 0x40 &&
190 ((uchar)start[2] ^ 0x80) < 0x40 &&
191 ((uchar)start[3] ^ 0x80) < 0x40 &&
192 (c >= 0xf1 || (uchar)start[1] >= 0x90) &&
193 (c <= 0xf3 || (uchar)start[1] <= 0x8F)))
194 {
195 return 0; /* invalid utf8 character */
196 }
197 return 4;
198 }
199 return 0;
200 }
201
check_mb_utf8mb3_valid(const char * start,const char * end)202 static unsigned int check_mb_utf8mb3_valid(const char *start, const char *end)
203 {
204 unsigned int len = check_mb_utf8mb3_sequence(start, end);
205 return (len > 1)? len:0;
206 }
207
check_mb_utf8_valid(const char * start,const char * end)208 static unsigned int check_mb_utf8_valid(const char *start, const char *end)
209 {
210 unsigned int len = check_mb_utf8_sequence(start, end);
211 return (len > 1)? len:0;
212 }
213
214
mysql_mbcharlen_utf8mb3(unsigned int utf8)215 static unsigned int mysql_mbcharlen_utf8mb3(unsigned int utf8)
216 {
217 if (utf8 < 0x80) {
218 return 1; /* single byte character */
219 }
220 if (utf8 < 0xC2) {
221 return 0; /* invalid multibyte header */
222 }
223 if (utf8 < 0xE0) {
224 return 2; /* double byte character */
225 }
226 if (utf8 < 0xF0) {
227 return 3; /* triple byte character */
228 }
229 return 0;
230 }
231
232
mysql_mbcharlen_utf8(unsigned int utf8)233 static unsigned int mysql_mbcharlen_utf8(unsigned int utf8)
234 {
235 if (utf8 < 0x80) {
236 return 1; /* single byte character */
237 }
238 if (utf8 < 0xC2) {
239 return 0; /* invalid multibyte header */
240 }
241 if (utf8 < 0xE0) {
242 return 2; /* double byte character */
243 }
244 if (utf8 < 0xF0) {
245 return 3; /* triple byte character */
246 }
247 if (utf8 < 0xF8) {
248 return 4; /* four byte character */
249 }
250 return 0;
251 }
252 /* }}} */
253
254
255 /* {{{ big5 functions */
256 #define valid_big5head(c) (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xF9)
257 #define valid_big5tail(c) ((0x40 <= (unsigned int)(c) && (unsigned int)(c) <= 0x7E) || \
258 (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xFE))
259
260 #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
261
check_mb_big5(const char * start,const char * end)262 static unsigned int check_mb_big5(const char *start, const char *end)
263 {
264 return (valid_big5head(*((const uchar*) start)) && (end - start) > 1 && valid_big5tail(*((const uchar*) start + 1)) ? 2 : 0);
265 }
266
267
mysql_mbcharlen_big5(unsigned int big5)268 static unsigned int mysql_mbcharlen_big5(unsigned int big5)
269 {
270 return (valid_big5head(big5)) ? 2 : 1;
271 }
272 /* }}} */
273
274
275 /* {{{ cp932 functions */
276 #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
277 #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
278
279
check_mb_cp932(const char * start,const char * end)280 static unsigned int check_mb_cp932(const char *start, const char *end)
281 {
282 return (valid_cp932head((uchar)start[0]) && (end - start > 1) &&
283 valid_cp932tail((uchar)start[1])) ? 2 : 0;
284 }
285
286
mysql_mbcharlen_cp932(unsigned int cp932)287 static unsigned int mysql_mbcharlen_cp932(unsigned int cp932)
288 {
289 return (valid_cp932head((uchar)cp932)) ? 2 : 1;
290 }
291 /* }}} */
292
293
294 /* {{{ euckr functions */
295 #define valid_euckr(c) ((0xA1 <= (uchar)(c) && (uchar)(c) <= 0xFE))
296
check_mb_euckr(const char * start,const char * end)297 static unsigned int check_mb_euckr(const char *start, const char *end)
298 {
299 if (end - start <= 1) {
300 return 0; /* invalid length */
301 }
302 if (*(uchar *)start < 0x80) {
303 return 0; /* invalid euckr character */
304 }
305 if (valid_euckr(start[1])) {
306 return 2;
307 }
308 return 0;
309 }
310
311
mysql_mbcharlen_euckr(unsigned int kr)312 static unsigned int mysql_mbcharlen_euckr(unsigned int kr)
313 {
314 return (valid_euckr(kr)) ? 2 : 1;
315 }
316 /* }}} */
317
318
319 /* {{{ eucjpms functions */
320 #define valid_eucjpms(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
321 #define valid_eucjpms_kata(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
322 #define valid_eucjpms_ss2(c) (((c) & 0xFF) == 0x8E)
323 #define valid_eucjpms_ss3(c) (((c) & 0xFF) == 0x8F)
324
check_mb_eucjpms(const char * start,const char * end)325 static unsigned int check_mb_eucjpms(const char *start, const char *end)
326 {
327 if (*((uchar *)start) < 0x80) {
328 return 0; /* invalid eucjpms character */
329 }
330 if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
331 return 2;
332 }
333 if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
334 return 2;
335 }
336 if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
337 valid_eucjpms(start[2])) {
338 return 2;
339 }
340 return 0;
341 }
342
343
mysql_mbcharlen_eucjpms(unsigned int jpms)344 static unsigned int mysql_mbcharlen_eucjpms(unsigned int jpms)
345 {
346 if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
347 return 2;
348 }
349 if (valid_eucjpms_ss3(jpms)) {
350 return 3;
351 }
352 return 1;
353 }
354 /* }}} */
355
356
357 /* {{{ gb2312 functions */
358 #define valid_gb2312_head(c) (0xA1 <= (uchar)(c) && (uchar)(c) <= 0xF7)
359 #define valid_gb2312_tail(c) (0xA1 <= (uchar)(c) && (uchar)(c) <= 0xFE)
360
361
check_mb_gb2312(const char * start,const char * end)362 static unsigned int check_mb_gb2312(const char *start, const char *end)
363 {
364 return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
365 valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
366 }
367
368
mysql_mbcharlen_gb2312(unsigned int gb)369 static unsigned int mysql_mbcharlen_gb2312(unsigned int gb)
370 {
371 return (valid_gb2312_head(gb)) ? 2 : 1;
372 }
373 /* }}} */
374
375
376 /* {{{ gbk functions */
377 #define valid_gbk_head(c) (0x81<=(uchar)(c) && (uchar)(c)<=0xFE)
378 #define valid_gbk_tail(c) ((0x40<=(uchar)(c) && (uchar)(c)<=0x7E) || (0x80<=(uchar)(c) && (uchar)(c)<=0xFE))
379
check_mb_gbk(const char * start,const char * end)380 static unsigned int check_mb_gbk(const char *start, const char *end)
381 {
382 return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
383 }
384
mysql_mbcharlen_gbk(unsigned int gbk)385 static unsigned int mysql_mbcharlen_gbk(unsigned int gbk)
386 {
387 return (valid_gbk_head(gbk) ? 2 : 1);
388 }
389 /* }}} */
390
391
392 /* {{{ sjis functions */
393 #define valid_sjis_head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
394 #define valid_sjis_tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
395
396
check_mb_sjis(const char * start,const char * end)397 static unsigned int check_mb_sjis(const char *start, const char *end)
398 {
399 return (valid_sjis_head((uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((uchar)start[1])) ? 2 : 0;
400 }
401
402
mysql_mbcharlen_sjis(unsigned int sjis)403 static unsigned int mysql_mbcharlen_sjis(unsigned int sjis)
404 {
405 return (valid_sjis_head((uchar)sjis)) ? 2 : 1;
406 }
407 /* }}} */
408
409
410 /* {{{ ucs2 functions */
check_mb_ucs2(const char * start __attribute ((unused)),const char * end __attribute ((unused)))411 static unsigned int check_mb_ucs2(const char *start __attribute((unused)), const char *end __attribute((unused)))
412 {
413 return 2; /* always 2 */
414 }
415
mysql_mbcharlen_ucs2(unsigned int ucs2 __attribute ((unused)))416 static unsigned int mysql_mbcharlen_ucs2(unsigned int ucs2 __attribute((unused)))
417 {
418 return 2; /* always 2 */
419 }
420 /* }}} */
421
422
423 /* {{{ ujis functions */
424 #define valid_ujis(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
425 #define valid_ujis_kata(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
426 #define valid_ujis_ss2(c) (((c)&0xFF) == 0x8E)
427 #define valid_ujis_ss3(c) (((c)&0xFF) == 0x8F)
428
check_mb_ujis(const char * start,const char * end)429 static unsigned int check_mb_ujis(const char *start, const char *end)
430 {
431 if (*(uchar*)start < 0x80) {
432 return 0; /* invalid ujis character */
433 }
434 if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
435 return 2;
436 }
437 if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
438 return 2;
439 }
440 if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
441 return 3;
442 }
443 return 0;
444 }
445
446
mysql_mbcharlen_ujis(unsigned int ujis)447 static unsigned int mysql_mbcharlen_ujis(unsigned int ujis)
448 {
449 return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
450 }
451 /* }}} */
452
453
454
455 /* {{{ utf16 functions */
456 #define UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
457 #define UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
458
check_mb_utf16(const char * start,const char * end)459 static unsigned int check_mb_utf16(const char *start, const char *end)
460 {
461 if (start + 2 > end) {
462 return 0;
463 }
464
465 if (UTF16_HIGH_HEAD(*start)) {
466 return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
467 }
468
469 if (UTF16_LOW_HEAD(*start)) {
470 return 0;
471 }
472 return 2;
473 }
474
475
mysql_mbcharlen_utf16(unsigned int utf16)476 static uint mysql_mbcharlen_utf16(unsigned int utf16)
477 {
478 return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
479 }
480 /* }}} */
481
482
483 /* {{{ utf32 functions */
484 static uint
check_mb_utf32(const char * start __attribute ((unused)),const char * end __attribute ((unused)))485 check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))
486 {
487 return 4;
488 }
489
490
491 static uint
mysql_mbcharlen_utf32(unsigned int utf32 __attribute ((unused)))492 mysql_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
493 {
494 return 4;
495 }
496 /* }}} */
497
498 /* {{{ gb18030 functions */
499 #define is_gb18030_odd(c) (0x81 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE)
500 #define is_gb18030_even_2(c) ((0x40 <= (unsigned char) (c) && (unsigned char) (c) <= 0x7E) || (0x80 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE))
501 #define is_gb18030_even_4(c) (0x30 <= (unsigned char) (c) && (unsigned char) (c) <= 0x39)
502
503
mysql_mbcharlen_gb18030(unsigned int c)504 static unsigned int mysql_mbcharlen_gb18030(unsigned int c)
505 {
506 if (c <= 0xFF) {
507 return !is_gb18030_odd(c);
508 }
509 if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) {
510 return 0;
511 }
512 if (is_gb18030_even_2((c & 0xFF))) {
513 return 2;
514 }
515 if (is_gb18030_even_4((c & 0xFF))) {
516 return 4;
517 }
518
519 return 0;
520 }
521
check_mb_gb18030_valid(const char * start,const char * end)522 static unsigned int check_mb_gb18030_valid(const char * start, const char * end)
523 {
524 if (end - start <= 1 || !is_gb18030_odd(start[0])) {
525 return 0;
526 }
527
528 if (is_gb18030_even_2(start[1])) {
529 return 2;
530 } else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) {
531 return 4;
532 }
533
534 return 0;
535 }
536 /* }}} */
537
538 /*
539 The server compiles sometimes the full utf-8 (the mb4) as utf8mb4, and the old as utf8,
540 for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
541 Change easily now, with a macro, could be made compilastion dependable.
542 */
543
544 #define UTF8_MB4 "utf8mb4"
545 #define UTF8_MB3 "utf8"
546
547 /* {{{ mysql_charsets */
548 const MARIADB_CHARSET_INFO mariadb_compiled_charsets[] =
549 {
550 { 1, 1, "big5","big5_chinese_ci", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
551 { 3, 1, "dec8", "dec8_swedish_ci", "", 0, "DEC", 1, 1, NULL, NULL},
552 { 4, 1, "cp850", "cp850_general_ci", "", 850, "CP850", 1, 1, NULL, NULL},
553 { 6, 1, "hp8", "hp8_english_ci", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
554 { 7, 1, "koi8r", "koi8r_general_ci", "", 20866, "KOI8R", 1, 1, NULL, NULL},
555 { 8, 1, "latin1", "latin1_swedish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
556 { 9, 1, "latin2", "latin2_general_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
557 { 10, 1, "swe7", "swe7_swedish_ci", "", 20107, "", 1, 1, NULL, NULL},
558 { 11, 1, "ascii", "ascii_general_ci", "", 1252, "ASCII", 1, 1, NULL, NULL},
559 { 12, 1, "ujis", "ujis_japanese_ci", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
560 { 13, 1, "sjis", "sjis_japanese_ci", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
561 { 16, 1, "hebrew", "hebrew_general_ci", "", 1255, "HEBREW", 1, 1, NULL, NULL},
562 { 18, 1, "tis620", "tis620_thai_ci", "", 874, "TIS620", 1, 1, NULL, NULL},
563 { 19, 1, "euckr", "euckr_korean_ci", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
564 { 22, 1, "koi8u", "koi8u_general_ci", "", 21866, "KOI8U", 1, 1, NULL, NULL},
565 { 24, 1, "gb2312", "gb2312_chinese_ci", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
566 { 25, 1, "greek", "greek_general_ci", "", 28597, "GREEK", 1, 1, NULL, NULL},
567 { 26, 1, "cp1250", "cp1250_general_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
568 { 28, 1, "gbk", "gbk_chinese_ci", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
569 { 30, 1, "latin5", "latin5_turkish_ci", "", 1254, "LATIN5", 1, 1, NULL, NULL},
570 { 32, 1, "armscii8", "armscii8_general_ci", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
571 { 33, 1, UTF8_MB3, UTF8_MB3"_general_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
572 { 35, 1, "ucs2", "ucs2_general_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
573 { 36, 1, "cp866", "cp866_general_ci", "", 866, "CP866", 1, 1, NULL, NULL},
574 { 37, 1, "keybcs2", "keybcs2_general_ci", "", 0, "", 1, 1, NULL, NULL},
575 { 38, 1, "macce", "macce_general_ci", "", 10029, "CP1282", 1, 1, NULL, NULL},
576 { 39, 1, "macroman", "macroman_general_ci", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
577 { 40, 1, "cp852", "cp852_general_ci", "", 852, "CP852", 1, 1, NULL, NULL},
578 { 41, 1, "latin7", "latin7_general_ci", "", 28603, "LATIN7", 1, 1, NULL, NULL},
579 { 51, 1, "cp1251", "cp1251_general_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
580 { 57, 1, "cp1256", "cp1256_general_ci", "", 1256, "CP1256", 1, 1, NULL, NULL},
581 { 59, 1, "cp1257", "cp1257_general_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
582 { 63, 1, "binary", "binary", "", 0, "ASCII", 1, 1, NULL, NULL},
583 { 64, 1, "armscii8", "armscii8_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
584 { 92, 1, "geostd8", "geostd8_general_ci", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
585 { 95, 1, "cp932", "cp932_japanese_ci", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
586 { 97, 1, "eucjpms", "eucjpms_japanese_ci", "", 932, "EUC-JP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
587 { 2, 1, "latin2", "latin2_czech_cs", "", 852, "LATIN2", 1, 1, NULL, NULL},
588 { 5, 1, "latin1", "latin1_german1_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
589 { 14, 1, "cp1251", "cp1251_bulgarian_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
590 { 15, 1, "latin1", "latin1_danish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
591 { 17, 1, "filename", "filename", "", 0, "", 1, 5, NULL, NULL},
592 { 20, 1, "latin7", "latin7_estonian_cs", "", 28603, "LATIN7", 1, 1, NULL, NULL},
593 { 21, 1, "latin2", "latin2_hungarian_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
594 { 23, 1, "cp1251", "cp1251_ukrainian_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
595 { 27, 1, "latin2", "latin2_croatian_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
596 { 29, 1, "cp1257", "cp1257_lithuanian_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
597 { 31, 1, "latin1", "latin1_german2_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
598 { 34, 1, "cp1250", "cp1250_czech_cs", "", 1250, "CP1250", 1, 1, NULL, NULL},
599 { 42, 1, "latin7", "latin7_general_cs", "", 28603, "LATIN7", 1, 1, NULL, NULL},
600 { 43, 1, "macce", "macce_bin", "", 10029, "CP1282", 1, 1, NULL, NULL},
601 { 44, 1, "cp1250", "cp1250_croatian_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
602 { 45, 1, UTF8_MB4, UTF8_MB4"_general_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
603 { 46, 1, UTF8_MB4, UTF8_MB4"_bin", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
604 { 47, 1, "latin1", "latin1_bin", "", 1250, "LATIN1", 1, 1, NULL, NULL},
605 { 48, 1, "latin1", "latin1_general_ci", "", 1250, "LATIN1", 1, 1, NULL, NULL},
606 { 49, 1, "latin1", "latin1_general_cs", "", 1250, "LATIN1", 1, 1, NULL, NULL},
607 { 50, 1, "cp1251", "cp1251_bin", "", 1251, "CP1251", 1, 1, NULL, NULL},
608 { 52, 1, "cp1251", "cp1251_general_cs", "", 1251, "CP1251", 1, 1, NULL, NULL},
609 { 53, 1, "macroman", "macroman_bin", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
610 { 54, 1, "utf16", "utf16_general_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
611 { 55, 1, "utf16", "utf16_bin", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
612 { 56, 1, "utf16le", "utf16_general_ci", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
613 { 58, 1, "cp1257", "cp1257_bin", "", 1257, "CP1257", 1, 1, NULL, NULL},
614 #ifdef USED_TO_BE_SO_BEFORE_MYSQL_5_5
615 { 60, 1, "armascii8", "armascii8_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
616 #endif
617 { 60, 1, "utf32", "utf32_general_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
618 { 61, 1, "utf32", "utf32_bin", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
619 { 62, 1, "utf16le", "utf16_bin", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
620 { 65, 1, "ascii", "ascii_bin", "", 1252, "ASCII", 1, 1, NULL, NULL},
621 { 66, 1, "cp1250", "cp1250_bin", "", 1250, "CP1250", 1, 1, NULL, NULL},
622 { 67, 1, "cp1256", "cp1256_bin", "", 1256, "CP1256", 1, 1, NULL, NULL},
623 { 68, 1, "cp866", "cp866_bin", "", 866, "CP866", 1, 1, NULL, NULL},
624 { 69, 1, "dec8", "dec8_bin", "", 0, "DEC", 1, 1, NULL, NULL},
625 { 70, 1, "greek", "greek_bin", "", 28597, "GREEK", 1, 1, NULL, NULL},
626 { 71, 1, "hebrew", "hebrew_bin", "", 1255, "hebrew", 1, 1, NULL, NULL},
627 { 72, 1, "hp8", "hp8_bin", "", 0, "HPROMAN-8", 1, 1, NULL, NULL},
628 { 73, 1, "keybcs2", "keybcs2_bin", "", 0, "", 1, 1, NULL, NULL},
629 { 74, 1, "koi8r", "koi8r_bin", "", 20866, "KOI8R", 1, 1, NULL, NULL},
630 { 75, 1, "koi8u", "koi8u_bin", "", 21866, "KOI8U", 1, 1, NULL, NULL},
631 { 76, 1, UTF8_MB3, UTF8_MB3"_tolower_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
632 { 77, 1, "latin2", "latin2_bin", "", 28592, "LATIN2", 1, 1, NULL, NULL},
633 { 78, 1, "latin5", "latin5_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
634 { 79, 1, "latin7", "latin7_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
635 { 80, 1, "cp850", "cp850_bin", "", 850, "CP850", 1, 1, NULL, NULL},
636 { 81, 1, "cp852", "cp852_bin", "", 852, "CP852", 1, 1, NULL, NULL},
637 { 82, 1, "swe7", "swe7_bin", "", 0, "", 1, 1, NULL, NULL},
638 { 93, 1, "geostd8", "geostd8_bin", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
639 { 83, 1, UTF8_MB3, UTF8_MB3"_bin", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
640 { 84, 1, "big5", "big5_bin", "", 65000, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
641 { 85, 1, "euckr", "euckr_bin", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
642 { 86, 1, "gb2312", "gb2312_bin", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
643 { 87, 1, "gbk", "gbk_bin", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
644 { 88, 1, "sjis", "sjis_bin", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
645 { 89, 1, "tis620", "tis620_bin", "", 874, "TIS620", 1, 1, NULL, NULL},
646 { 90, 1, "ucs2", "ucs2_bin", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
647 { 91, 1, "ujis", "ujis_bin", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
648 { 94, 1, "latin1", "latin1_spanish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
649 { 96, 1, "cp932", "cp932_bin", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
650 { 99, 1, "cp1250", "cp1250_polish_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
651 { 98, 1, "eucjpms", "eucjpms_bin", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
652 { 101, 1, "utf16", "utf16_unicode_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
653 { 102, 1, "utf16", "utf16_icelandic_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
654 { 103, 1, "utf16", "utf16_latvian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
655 { 104, 1, "utf16", "utf16_romanian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
656 { 105, 1, "utf16", "utf16_slovenian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
657 { 106, 1, "utf16", "utf16_polish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
658 { 107, 1, "utf16", "utf16_estonian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
659 { 108, 1, "utf16", "utf16_spanish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
660 { 109, 1, "utf16", "utf16_swedish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
661 { 110, 1, "utf16", "utf16_turkish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
662 { 111, 1, "utf16", "utf16_czech_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
663 { 112, 1, "utf16", "utf16_danish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
664 { 113, 1, "utf16", "utf16_lithunian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
665 { 114, 1, "utf16", "utf16_slovak_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
666 { 115, 1, "utf16", "utf16_spanish2_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
667 { 116, 1, "utf16", "utf16_roman_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
668 { 117, 1, "utf16", "utf16_persian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
669 { 118, 1, "utf16", "utf16_esperanto_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
670 { 120, 1, "utf16", "utf16_sinhala_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
671 { 121, 1, "utf16", "utf16_german2_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
672 { 122, 1, "utf16", "utf16_croatian_mysql561_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
673 { 123, 1, "utf16", "utf16_unicode_520_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
674 { 124, 1, "utf16", "utf16_vietnamese_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
675 { 128, 1, "ucs2", "ucs2_unicode_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
676 { 129, 1, "ucs2", "ucs2_icelandic_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
677 { 130, 1, "ucs2", "ucs2_latvian_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
678 { 131, 1, "ucs2", "ucs2_romanian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
679 { 132, 1, "ucs2", "ucs2_slovenian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
680 { 133, 1, "ucs2", "ucs2_polish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
681 { 134, 1, "ucs2", "ucs2_estonian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
682 { 135, 1, "ucs2", "ucs2_spanish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
683 { 136, 1, "ucs2", "ucs2_swedish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
684 { 137, 1, "ucs2", "ucs2_turkish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
685 { 138, 1, "ucs2", "ucs2_czech_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
686 { 139, 1, "ucs2", "ucs2_danish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
687 { 140, 1, "ucs2", "ucs2_lithuanian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
688 { 141, 1, "ucs2", "ucs2_slovak_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
689 { 142, 1, "ucs2", "ucs2_spanish2_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
690 { 143, 1, "ucs2", "ucs2_roman_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
691 { 144, 1, "ucs2", "ucs2_persian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
692 { 145, 1, "ucs2", "ucs2_esperanto_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
693 { 146, 1, "ucs2", "ucs2_hungarian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
694 { 147, 1, "ucs2", "ucs2_sinhala_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
695 { 148, 1, "ucs2", "ucs2_german2_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
696 { 149, 1, "ucs2", "ucs2_croatian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
697 { 150, 1, "ucs2", "ucs2_unicode_520_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
698 { 151, 1, "ucs2", "ucs2_vietnamese_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
699 { 159, 1, "ucs2", "ucs2_general_mysql500_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
700 { 160, 1, "utf32", "utf32_unicode_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
701 { 161, 1, "utf32", "utf32_icelandic_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
702 { 162, 1, "utf32", "utf32_latvian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
703 { 163, 1, "utf32", "utf32_romanian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
704 { 164, 1, "utf32", "utf32_slovenian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
705 { 165, 1, "utf32", "utf32_polish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
706 { 166, 1, "utf32", "utf32_estonian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
707 { 167, 1, "utf32", "utf32_spanish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
708 { 168, 1, "utf32", "utf32_swedish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
709 { 169, 1, "utf32", "utf32_turkish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
710 { 170, 1, "utf32", "utf32_czech_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
711 { 171, 1, "utf32", "utf32_danish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
712 { 172, 1, "utf32", "utf32_lithuanian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
713 { 173, 1, "utf32", "utf32_slovak_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
714 { 174, 1, "utf32", "utf32_spanish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
715 { 175, 1, "utf32", "utf32_roman_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
716 { 176, 1, "utf32", "utf32_persian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
717 { 177, 1, "utf32", "utf32_esperanto_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
718 { 178, 1, "utf32", "utf32_hungarian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
719 { 179, 1, "utf32", "utf32_sinhala_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
720 { 180, 1, "utf32", "utf32_german2_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
721 { 181, 1, "utf32", "utf32_croatian_mysql561_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
722 { 182, 1, "utf32", "utf32_unicode_520_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
723 { 183, 1, "utf32", "utf32_vietnamese_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
724
725 { 192, 1, UTF8_MB3, UTF8_MB3"_general_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
726 { 193, 1, UTF8_MB3, UTF8_MB3"_icelandic_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
727 { 194, 1, UTF8_MB3, UTF8_MB3"_latvian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
728 { 195, 1, UTF8_MB3, UTF8_MB3"_romanian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
729 { 196, 1, UTF8_MB3, UTF8_MB3"_slovenian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
730 { 197, 1, UTF8_MB3, UTF8_MB3"_polish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
731 { 198, 1, UTF8_MB3, UTF8_MB3"_estonian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
732 { 199, 1, UTF8_MB3, UTF8_MB3"_spanish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
733 { 119, 1, UTF8_MB3, UTF8_MB3"_spanish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
734 { 200, 1, UTF8_MB3, UTF8_MB3"_swedish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
735 { 201, 1, UTF8_MB3, UTF8_MB3"_turkish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
736 { 202, 1, UTF8_MB3, UTF8_MB3"_czech_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
737 { 203, 1, UTF8_MB3, UTF8_MB3"_danish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
738 { 204, 1, UTF8_MB3, UTF8_MB3"_lithuanian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
739 { 205, 1, UTF8_MB3, UTF8_MB3"_slovak_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
740 { 206, 1, UTF8_MB3, UTF8_MB3"_spanish2_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
741 { 207, 1, UTF8_MB3, UTF8_MB3"_roman_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
742 { 208, 1, UTF8_MB3, UTF8_MB3"_persian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
743 { 209, 1, UTF8_MB3, UTF8_MB3"_esperanto_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
744 { 210, 1, UTF8_MB3, UTF8_MB3"_hungarian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
745 { 211, 1, UTF8_MB3, UTF8_MB3"_sinhala_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
746 { 212, 1, UTF8_MB3, UTF8_MB3"_german2_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
747 { 214, 1, UTF8_MB3, UTF8_MB3"_unicode_520_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
748 { 215, 1, UTF8_MB3, UTF8_MB3"_vietnamese_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
749 { 213, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
750 { 223, 1, UTF8_MB3, UTF8_MB3"_general_mysql500_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
751
752 { 224, 1, UTF8_MB4, UTF8_MB4"_unicode_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
753 { 225, 1, UTF8_MB4, UTF8_MB4"_icelandic_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
754 { 226, 1, UTF8_MB4, UTF8_MB4"_latvian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
755 { 227, 1, UTF8_MB4, UTF8_MB4"_romanian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
756 { 228, 1, UTF8_MB4, UTF8_MB4"_slovenian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
757 { 229, 1, UTF8_MB4, UTF8_MB4"_polish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
758 { 230, 1, UTF8_MB4, UTF8_MB4"_estonian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
759 { 231, 1, UTF8_MB4, UTF8_MB4"_spanish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
760 { 232, 1, UTF8_MB4, UTF8_MB4"_swedish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
761 { 233, 1, UTF8_MB4, UTF8_MB4"_turkish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
762 { 234, 1, UTF8_MB4, UTF8_MB4"_czech_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
763 { 235, 1, UTF8_MB4, UTF8_MB4"_danish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
764 { 236, 1, UTF8_MB4, UTF8_MB4"_lithuanian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
765 { 237, 1, UTF8_MB4, UTF8_MB4"_slovak_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
766 { 238, 1, UTF8_MB4, UTF8_MB4"_spanish2_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
767 { 239, 1, UTF8_MB4, UTF8_MB4"_roman_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
768 { 240, 1, UTF8_MB4, UTF8_MB4"_persian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
769 { 241, 1, UTF8_MB4, UTF8_MB4"_esperanto_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
770 { 242, 1, UTF8_MB4, UTF8_MB4"_hungarian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
771 { 243, 1, UTF8_MB4, UTF8_MB4"_sinhala_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
772 { 244, 1, UTF8_MB4, UTF8_MB4"_german2_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
773 { 245, 1, UTF8_MB4, UTF8_MB4"_croatian_mysql561_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
774 { 246, 1, UTF8_MB4, UTF8_MB4"_unicode_520_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
775 { 247, 1, UTF8_MB4, UTF8_MB4"_vietnamese_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
776 { 248, 1, "gb18030", "gb18030_chinese_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
777 { 249, 1, "gb18030", "gb18030_bin", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
778 { 250, 1, "gb18030", "gb18030_unicode_520_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
779
780
781 { 254, 1, UTF8_MB3, UTF8_MB3"_general_cs", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8, check_mb_utf8_valid},
782
783 { 255, 1, UTF8_MB4, UTF8_MB4"_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
784 { 256, 1, UTF8_MB4, UTF8_MB4"_de_pb_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
785 { 257, 1, UTF8_MB4, UTF8_MB4"_is_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
786 { 258, 1, UTF8_MB4, UTF8_MB4"_lv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
787 { 259, 1, UTF8_MB4, UTF8_MB4"_ro_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
788 { 260, 1, UTF8_MB4, UTF8_MB4"_sl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
789 { 261, 1, UTF8_MB4, UTF8_MB4"_pl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
790 { 262, 1, UTF8_MB4, UTF8_MB4"_et_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
791 { 263, 1, UTF8_MB4, UTF8_MB4"_es_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
792 { 264, 1, UTF8_MB4, UTF8_MB4"_sv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
793 { 265, 1, UTF8_MB4, UTF8_MB4"_tr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
794 { 266, 1, UTF8_MB4, UTF8_MB4"_cs_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
795 { 267, 1, UTF8_MB4, UTF8_MB4"_da_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
796 { 268, 1, UTF8_MB4, UTF8_MB4"_lt_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
797 { 269, 1, UTF8_MB4, UTF8_MB4"_sk_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
798 { 270, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
799 { 271, 1, UTF8_MB4, UTF8_MB4"_la_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
800 { 273, 1, UTF8_MB4, UTF8_MB4"_eo_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
801 { 274, 1, UTF8_MB4, UTF8_MB4"_hu_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
802 { 275, 1, UTF8_MB4, UTF8_MB4"_hr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
803 { 277, 1, UTF8_MB4, UTF8_MB4"_vi_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
804 { 278, 1, UTF8_MB4, UTF8_MB4"_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
805 { 279, 1, UTF8_MB4, UTF8_MB4"_de_pb__0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
806 { 280, 1, UTF8_MB4, UTF8_MB4"_is_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
807 { 281, 1, UTF8_MB4, UTF8_MB4"_lv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
808 { 282, 1, UTF8_MB4, UTF8_MB4"_ro_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
809 { 283, 1, UTF8_MB4, UTF8_MB4"_sl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
810 { 284, 1, UTF8_MB4, UTF8_MB4"_pl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
811 { 285, 1, UTF8_MB4, UTF8_MB4"_et_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
812 { 286, 1, UTF8_MB4, UTF8_MB4"_es_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
813 { 287, 1, UTF8_MB4, UTF8_MB4"_sv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
814 { 288, 1, UTF8_MB4, UTF8_MB4"_tr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
815 { 289, 1, UTF8_MB4, UTF8_MB4"_cs_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
816 { 290, 1, UTF8_MB4, UTF8_MB4"_da_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
817 { 291, 1, UTF8_MB4, UTF8_MB4"_lt_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
818 { 292, 1, UTF8_MB4, UTF8_MB4"_sk_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
819 { 293, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
820 { 294, 1, UTF8_MB4, UTF8_MB4"_la_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
821 { 296, 1, UTF8_MB4, UTF8_MB4"_eo_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
822 { 297, 1, UTF8_MB4, UTF8_MB4"_hu_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
823 { 298, 1, UTF8_MB4, UTF8_MB4"_hr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
824 { 300, 1, UTF8_MB4, UTF8_MB4"_vi_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
825 { 303, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
826 { 304, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs_ks", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
827 { 305, 1, UTF8_MB4, UTF8_MB4"_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
828 { 306, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
829 { 307, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
830 { 576, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
831 { 577, 1, UTF8_MB3, UTF8_MB3"_myanmar_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
832 { 578, 1, UTF8_MB3, UTF8_MB3"_thai_520_w2", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
833 { 608, 1, UTF8_MB4, UTF8_MB4"_croatian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
834 { 609, 1, UTF8_MB4, UTF8_MB4"_myanmar_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
835 { 610, 1, UTF8_MB4, UTF8_MB4"_thai_520_w2", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
836 { 640, 1, "ucs2", "ucs2_croatian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
837 { 641, 1, "ucs2", "ucs2_myanmar_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
838 { 642, 1, "ucs2", "ucs2_thai_520_w2", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
839 { 672, 1, "utf16", "utf16_croatian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
840 { 673, 1, "utf16", "utf16_myanmar_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
841 { 674, 1, "utf16", "utf16_thai_520_w2", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
842 { 736, 1, "utf32", "utf32_croatian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
843 { 737, 1, "utf32", "utf32_myanmar_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
844 { 738, 1, "utf32", "utf32_thai_520_w2", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
845 {1025, 1, "big5","big5_chinese_nopad_ci", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
846 {1027, 1, "dec8", "dec8_swedisch_nopad_ci", "", 0, "DEC", 1, 1, NULL, NULL},
847 {1028, 1, "cp850", "cp850_general_nopad_ci", "", 850, "CP850", 1, 1, NULL, NULL},
848 {1030, 1, "hp8", "hp8_english_nopad_ci", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
849 {1031, 1, "koi8r", "koi8r_general_nopad_ci", "", 878, "KOI8R", 1, 1, NULL, NULL},
850 {1032, 1, "latin1", "latin1_swedish_nopad_ci", "", 850, "LATIN1", 1, 1, NULL, NULL},
851 {1033, 1, "latin2", "latin2_general_nopad_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
852 {1034, 1, "swe7", "swe7_swedish_nopad_ci", "", 20107, "", 1, 1, NULL, NULL},
853 {1035, 1, "ascii", "ascii_general_nopad_ci", "", 1252, "ASCII", 1, 1, NULL, NULL},
854 {1036, 1, "ujis", "ujis_japanese_nopad_ci", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
855 {1037, 1, "sjis", "sjis_japanese_nopad_ci", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
856 {1040, 1, "hebrew", "hebrew_general_nopad_ci", "", 1255, "HEBREW", 1, 1, NULL, NULL},
857 {1042, 1, "tis620", "tis620_thai_nopad_ci", "", 874, "TIS620", 1, 1, NULL, NULL},
858 {1043, 1, "euckr", "euckr_korean_nopad_ci", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
859 {1046, 1, "koi8u", "koi8u_general_nopad_ci", "", 20866, "KOI8U", 1, 1, NULL, NULL},
860 {1048, 1, "gb2312", "gb2312_chinese_nopad_ci", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
861 {1049, 1, "greek", "greek_general_nopad_ci", "", 28597, "GREEK", 1, 1, NULL, NULL},
862 {1050, 1, "cp1250", "cp1250_general_nopad_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
863 {1052, 1, "gbk", "gbk_chinese_nopad_ci", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
864 {1054, 1, "latin5", "latin5_turkish_nopad_ci", "", 1254, "LATIN5", 1, 1, NULL, NULL},
865 {1056, 1, "armscii8", "armscii8_general_nopad_ci", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
866 {1057, 1, UTF8_MB3, UTF8_MB3"_general_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
867 {1059, 1, "ucs2", "ucs2_general_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
868 {1060, 1, "cp866", "cp866_general_nopad_ci", "", 866, "CP866", 1, 1, NULL, NULL},
869 {1061, 1, "keybcs2", "keybcs2_general_nopad_ci", "", 0, "", 1, 1, NULL, NULL},
870 {1062, 1, "macce", "macce_general_nopad_ci", "", 10029, "CP1282", 1, 1, NULL, NULL},
871 {1063, 1, "macroman", "macroman_general_nopad_ci", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
872 {1064, 1, "cp852", "cp852_general_nopad_ci", "", 852, "CP852", 1, 1, NULL, NULL},
873 {1065, 1, "latin7", "latin7_general_nopad_ci", "", 28603, "LATIN7", 1, 1, NULL, NULL},
874 {1067, 1, "macce", "macce_nopad_bin", "", 10029, "CP1282", 1, 1, NULL, NULL},
875 {1069, 1, UTF8_MB4, UTF8_MB4"_general_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
876 {1070, 1, UTF8_MB4, UTF8_MB4"_general_nopad_bin", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
877 {1071, 1, "latin1", "latin1_nopad_bin", "", 850, "LATIN1", 1, 1, NULL, NULL},
878 {1074, 1, "cp1251", "cp1251_nopad_bin", "", 1251, "CP1251", 1, 1, NULL, NULL},
879 {1075, 1, "cp1251", "cp1251_general_nopad_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
880 {1077, 1, "macroman", "macroman_nopad_bin", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
881 {1078, 1, "utf16", "utf16_general_nopad_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
882 {1079, 1, "utf16", "utf16_nopad_bin", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
883 {1080, 1, "utf16le", "utf16le_general_nopad_ci", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
884 {1081, 1, "cp1256", "cp1256_general_nopad_ci", "", 1256, "CP1256", 1, 1, NULL, NULL},
885 {1082, 1, "cp1257", "cp1257_nopad_bin", "", 1257, "CP1257", 1, 1, NULL, NULL},
886 {1083, 1, "cp1257", "cp1257_general_nopad_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
887 {1084, 1, "utf32", "utf32_general_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
888 {1085, 1, "utf32", "utf32_nopad_bin", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
889 {1086, 1, "utf16le", "utf16le_nopad_bin", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
890 {1088, 1, "armscii8", "armscii8_nopad_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
891 {1089, 1, "ascii", "ascii_nopad_bin", "", 1252, "ASCII", 1, 1, NULL, NULL},
892 {1090, 1, "cp1250", "cp1250_nopad_bin", "", 1250, "CP1250", 1, 1, NULL, NULL},
893 {1091, 1, "cp1256", "cp1256_nopad_bin", "", 1256, "CP1256", 1, 1, NULL, NULL},
894 {1092, 1, "cp866", "cp866_nopad_bin", "", 866, "CP866", 1, 1, NULL, NULL},
895 {1093, 1, "dec8", "dec8_nopad_bin", "", 0, "DEC", 1, 1, NULL, NULL},
896 {1094, 1, "greek", "greek_nopad_bin", "", 28597, "GREEK", 1, 1, NULL, NULL},
897 {1095, 1, "hebrew", "hebrew_nopad_bin", "", 1255, "HEBREW", 1, 1, NULL, NULL},
898 {1096, 1, "hp8", "hp8_nopad_bin", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
899 {1097, 1, "keybcs2", "keybcs2_nopad_bin", "", 0, "", 1, 1, NULL, NULL},
900 {1098, 1, "koi8r", "koi8r_nopad_bin", "", 878, "KOI8R", 1, 1, NULL, NULL},
901 {1099, 1, "koi8u", "koi8u_nopad_bin", "", 20866, "KOI8U", 1, 1, NULL, NULL},
902 {1101, 1, "latin2", "latin2_nopad_bin", "", 852, "LATIN2", 1, 1, NULL, NULL},
903 {1102, 1, "latin5", "latin5_nopad_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
904 {1103, 1, "latin7", "latin7_nopad_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
905 {1104, 1, "cp850", "cp850_nopad_bin", "", 850, "CP850", 1, 1, NULL, NULL},
906 {1105, 1, "cp852", "cp852_nopad_bin", "", 852, "CP852", 1, 1, NULL, NULL},
907 {1106, 1, "swe7", "swe7_nopad_bin", "", 20107, "", 1, 1, NULL, NULL},
908 {1107, 1, UTF8_MB3, UTF8_MB3"_nopad_bin", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
909 {1108, 1, "big5","big5_nopad_bin", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
910 {1109, 1, "euckr", "euckr_nopad_bin", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
911 {1110, 1, "gb2312", "gb2312_nopad_bin", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
912 {1111, 1, "gbk", "gbk_nopad_bin", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
913 {1112, 1, "sjis", "sjis_nopad_bin", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
914 {1113, 1, "tis620", "tis620_nopad_bin", "", 874, "TIS620", 1, 1, NULL, NULL},
915 {1114, 1, "ucs2", "ucs2_nopad_bin", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
916 {1115, 1, "ujis", "ujis_nopad_bin", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
917 {1116, 1, "geostd8", "geostd8_general_nopad_ci", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
918 {1117, 1, "geostd8", "geostd8_nopad_bin", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
919 {1119, 1, "cp932", "cp932_japanese_nopad_ci", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
920 {1120, 1, "cp932", "cp932_nopad_bin", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
921 {1121, 1, "eucjpms", "eucjpms_japanese_nopad_ci", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
922 {1122, 1, "eucjpms", "eucjpms_nopad_bin", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
923 {1125, 1, "utf16", "utf16_unicode_nopad_ci", "", 1200, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
924 {1147, 1, "utf16", "utf16_unicode_520_nopad_ci", "", 1200, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
925 {1152, 1, "ucs2", "ucs2_unicode_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
926 {1174, 1, "ucs2", "ucs2_unicode_520_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
927 {1184, 1, "utf32", "utf32_unicode_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
928 {1206, 1, "utf32", "utf32_unicode_520_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
929 {1216, 1, UTF8_MB3, UTF8_MB3"_unicode_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
930 {1238, 1, UTF8_MB3, UTF8_MB3"_unicode_520_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
931 {1248, 1, UTF8_MB4, UTF8_MB4"_unicode_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
932 {1270, 1, UTF8_MB4, UTF8_MB4"_unicode_520_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
933 { 0, 0, NULL, NULL, NULL, 0, NULL, 0, 0, NULL, NULL}
934 };
935 /* }}} */
936
937
938 /* {{{ mysql_find_charset_nr */
mysql_find_charset_nr(unsigned int charsetnr)939 const MARIADB_CHARSET_INFO * mysql_find_charset_nr(unsigned int charsetnr)
940 {
941 const MARIADB_CHARSET_INFO * c = mariadb_compiled_charsets;
942
943 do {
944 if (c->nr == charsetnr) {
945 return(c);
946 }
947 ++c;
948 } while (c[0].nr != 0);
949 return(NULL);
950 }
951 /* }}} */
952
953
954 /* {{{ mysql_find_charset_name */
mysql_find_charset_name(const char * name)955 MARIADB_CHARSET_INFO * mysql_find_charset_name(const char *name)
956 {
957 MARIADB_CHARSET_INFO *c = (MARIADB_CHARSET_INFO *)mariadb_compiled_charsets;
958 const char *csname;
959
960 if (!strcasecmp(name, MADB_AUTODETECT_CHARSET_NAME))
961 csname= madb_get_os_character_set();
962 else
963 csname= (char *)name;
964
965 do {
966 if (!strcasecmp(c->csname, csname)) {
967 return(c);
968 }
969 ++c;
970 } while (c[0].nr != 0);
971 return(NULL);
972 }
973 /* }}} */
974
975
976 /* {{{ mysql_cset_escape_quotes */
mysql_cset_escape_quotes(const MARIADB_CHARSET_INFO * cset,char * newstr,const char * escapestr,size_t escapestr_len)977 size_t mysql_cset_escape_quotes(const MARIADB_CHARSET_INFO *cset, char *newstr,
978 const char * escapestr, size_t escapestr_len )
979 {
980 const char *newstr_s = newstr;
981 const char *newstr_e = newstr + 2 * escapestr_len;
982 const char *end = escapestr + escapestr_len;
983 my_bool escape_overflow = FALSE;
984
985 for (;escapestr < end; escapestr++) {
986 unsigned int len = 0;
987 /* check unicode characters */
988
989 if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
990
991 /* check possible overflow */
992 if ((newstr + len) > newstr_e) {
993 escape_overflow = TRUE;
994 break;
995 }
996 /* copy mb char without escaping it */
997 while (len--) {
998 *newstr++ = *escapestr++;
999 }
1000 escapestr--;
1001 continue;
1002 }
1003 if (*escapestr == '\'') {
1004 if (newstr + 2 > newstr_e) {
1005 escape_overflow = TRUE;
1006 break;
1007 }
1008 *newstr++ = '\'';
1009 *newstr++ = '\'';
1010 } else {
1011 if (newstr + 1 > newstr_e) {
1012 escape_overflow = TRUE;
1013 break;
1014 }
1015 *newstr++ = *escapestr;
1016 }
1017 }
1018 *newstr = '\0';
1019
1020 if (escape_overflow) {
1021 return((size_t)~0);
1022 }
1023 return((size_t)(newstr - newstr_s));
1024 }
1025 /* }}} */
1026
1027
1028 /* {{{ mysql_cset_escape_slashes */
mysql_cset_escape_slashes(const MARIADB_CHARSET_INFO * cset,char * newstr,const char * escapestr,size_t escapestr_len)1029 size_t mysql_cset_escape_slashes(const MARIADB_CHARSET_INFO * cset, char *newstr,
1030 const char * escapestr, size_t escapestr_len )
1031 {
1032 const char *newstr_s = newstr;
1033 const char *newstr_e = newstr + 2 * escapestr_len;
1034 const char *end = escapestr + escapestr_len;
1035 my_bool escape_overflow = FALSE;
1036
1037 for (;escapestr < end; escapestr++) {
1038 char esc = '\0';
1039 unsigned int len = 0;
1040
1041 /* check unicode characters */
1042 if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
1043 /* check possible overflow */
1044 if ((newstr + len) > newstr_e) {
1045 escape_overflow = TRUE;
1046 break;
1047 }
1048 /* copy mb char without escaping it */
1049 while (len--) {
1050 *newstr++ = *escapestr++;
1051 }
1052 escapestr--;
1053 continue;
1054 }
1055 if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
1056 esc = *escapestr;
1057 } else {
1058 switch (*escapestr) {
1059 case 0:
1060 esc = '0';
1061 break;
1062 case '\n':
1063 esc = 'n';
1064 break;
1065 case '\r':
1066 esc = 'r';
1067 break;
1068 case '\\':
1069 case '\'':
1070 case '"':
1071 esc = *escapestr;
1072 break;
1073 case '\032':
1074 esc = 'Z';
1075 break;
1076 }
1077 }
1078 if (esc) {
1079 if (newstr + 2 > newstr_e) {
1080 escape_overflow = TRUE;
1081 break;
1082 }
1083 /* copy escaped character */
1084 *newstr++ = '\\';
1085 *newstr++ = esc;
1086 } else {
1087 if (newstr + 1 > newstr_e) {
1088 escape_overflow = TRUE;
1089 break;
1090 }
1091 /* copy non escaped character */
1092 *newstr++ = *escapestr;
1093 }
1094 }
1095 *newstr = '\0';
1096
1097 if (escape_overflow) {
1098 return((size_t)~0);
1099 }
1100 return((size_t)(newstr - newstr_s));
1101 }
1102 /* }}} */
1103
1104 /* {{{ MADB_OS_CHARSET */
1105 struct st_madb_os_charset {
1106 const char *identifier;
1107 const char *description;
1108 const char *charset;
1109 const char *iconv_cs;
1110 unsigned char supported;
1111 };
1112
1113 #define MADB_CS_UNSUPPORTED 0
1114 #define MADB_CS_APPROX 1
1115 #define MADB_CS_EXACT 2
1116
1117 /* Please add new character sets at the end. */
1118 struct st_madb_os_charset MADB_OS_CHARSET[]=
1119 {
1120 #ifdef _WIN32
1121 /* Windows code pages */
1122 {"037", "IBM EBCDIC US-Canada", NULL, NULL, MADB_CS_UNSUPPORTED},
1123 {"437", "OEM United States", "cp850", NULL, MADB_CS_APPROX},
1124 {"500", "IBM EBCDIC International", NULL, NULL, MADB_CS_UNSUPPORTED},
1125 {"708", "Arabic (ASMO 708)", NULL, NULL, MADB_CS_UNSUPPORTED},
1126 {"709", "Arabic (ASMO-449+, BCON V4)", NULL, NULL, MADB_CS_UNSUPPORTED},
1127 {"710", "Transparent Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1128 {"720", "Arabic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1129 {"737", "Greek (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1130 {"775", "Baltic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1131 {"850", "Western European (DOS)", "cp850", NULL, MADB_CS_EXACT},
1132 {"852", "Central European (DOS)", "cp852", NULL, MADB_CS_EXACT},
1133 {"855", "Cyrillic (primarily Russian)", NULL, NULL, MADB_CS_UNSUPPORTED},
1134 {"857", "Turkish (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1135 {"858", "OEM Multilingual Latin 1 + Euro symbol", "cp850", NULL, MADB_CS_EXACT},
1136 {"860", "Portuguese (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1137 {"861", "Icelandic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1138 {"862", "Hebrew (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1139 {"863", "French Canadian (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1140 {"864", "Arabic (864)", NULL, NULL, MADB_CS_UNSUPPORTED},
1141 {"865", "Nordic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1142 {"866", "Cyrillic (DOS)", "cp866", NULL, MADB_CS_EXACT},
1143 {"869", "Greek, Modern (DOS)", "greek", NULL, MADB_CS_EXACT},
1144 {"870", "IBM EBCDIC Multilingual Latin 2", NULL, NULL, MADB_CS_UNSUPPORTED},
1145 {"874", "Thai (Windows)", "tis620", NULL, MADB_CS_UNSUPPORTED},
1146 {"875", "Greek Modern", NULL, NULL, MADB_CS_UNSUPPORTED},
1147 {"932", "Japanese (Shift-JIS)", "cp932", NULL, MADB_CS_EXACT},
1148 {"936", "Chinese Simplified (GB2312)", "gbk", NULL, MADB_CS_EXACT},
1149 {"949", "ANSI/OEM Korean (Unified Hangul Code)", "euckr", NULL, MADB_CS_EXACT},
1150 {"950", "Chinese Traditional (Big5)", "big5", NULL, MADB_CS_EXACT},
1151 {"1026", "EBCDIC Turkish (Latin 5)", NULL, NULL, MADB_CS_UNSUPPORTED},
1152 {"1047", "EBCDIC Latin 1/Open System", NULL, NULL, MADB_CS_UNSUPPORTED},
1153 {"1140", "IBM EBCDIC (US-Canada-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1154 {"1141", "IBM EBCDIC (Germany-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1155 {"1142", "IBM EBCDIC (Denmark-Norway-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1156 {"1143", "IBM EBCDIC (Finland-Sweden-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1157 {"1144", "IBM EBCDIC (Italy-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1158 {"1145", "IBM EBCDIC (Spain-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1159 {"1146", "IBM EBCDIC (UK-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1160 {"1147", "IBM EBCDIC (France-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1161 {"1148", "IBM EBCDIC (International-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1162 {"1149", "IBM EBCDIC (Icelandic-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1163 {"1200", "UTF-16, little endian byte order", NULL, NULL, MADB_CS_UNSUPPORTED},
1164 {"1201", "UTF-16, big endian byte order", "utf16", NULL, MADB_CS_UNSUPPORTED},
1165 {"1250", "Central European (Windows)", "cp1250", NULL, MADB_CS_EXACT},
1166 {"1251", "Cyrillic (Windows)", "cp1251", NULL, MADB_CS_EXACT},
1167 {"1252", "Western European (Windows)", "latin1", NULL, MADB_CS_EXACT},
1168 {"1253", "Greek (Windows)", "greek", NULL, MADB_CS_EXACT},
1169 {"1254", "Turkish (Windows)", "latin5", NULL, MADB_CS_EXACT},
1170 {"1255", "Hebrew (Windows)", "hewbrew", NULL, MADB_CS_EXACT},
1171 {"1256", "Arabic (Windows)", "cp1256", NULL, MADB_CS_EXACT},
1172 {"1257", "Baltic (Windows)","cp1257", NULL, MADB_CS_EXACT},
1173 {"1258", "Vietnamese (Windows)", NULL, NULL, MADB_CS_UNSUPPORTED},
1174 {"1361", "Korean (Johab)", NULL, NULL, MADB_CS_UNSUPPORTED},
1175 {"10000", "Western European (Mac)", "macroman", NULL, MADB_CS_EXACT},
1176 {"10001", "Japanese (Mac)", "sjis", NULL, MADB_CS_EXACT},
1177 {"10002", "Chinese Traditional (Mac)", "big5", NULL, MADB_CS_EXACT},
1178 {"10003", "Korean (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1179 {"10004", "Arabic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1180 {"10005", "Hebrew (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1181 {"10006", "Greek (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1182 {"10007", "Cyrillic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1183 {"10008", "Chinese Simplified (Mac)", "gb2312", NULL, MADB_CS_EXACT},
1184 {"10010", "Romanian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1185 {"10017", "Ukrainian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1186 {"10021", "Thai (Mac)", "tis620", NULL, MADB_CS_EXACT},
1187 {"10029", "Central European (Mac)", "macce", NULL, MADB_CS_EXACT},
1188 {"10079", "Icelandic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1189 {"10081", "Turkish (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1190 {"10082", "Croatian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1191 {"12000", "Unicode UTF-32, little endian byte order", NULL, NULL, MADB_CS_UNSUPPORTED},
1192 {"12001", "Unicode UTF-32, big endian byte order", "utf32", NULL, MADB_CS_UNSUPPORTED},
1193 {"20000", "Chinese Traditional (CNS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1194 {"20001", "TCA Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1195 {"20002", "Chinese Traditional (Eten)", NULL, NULL, MADB_CS_UNSUPPORTED},
1196 {"20003", "IBM5550 Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1197 {"20004", "TeleText Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1198 {"20005", "Wang Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1199 {"20105", "Western European (IA5)", NULL, NULL, MADB_CS_UNSUPPORTED},
1200 {"20106", "IA5 German (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1201 {"20107", "Swedish (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1202 {"20108", "Norwegian (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1203 {"20127", "US-ASCII (7-bit)", "ascii", NULL, MADB_CS_EXACT},
1204 {"20261", "T.61", NULL, NULL, MADB_CS_UNSUPPORTED},
1205 {"20269", "Non-Spacing Accent", NULL, NULL, MADB_CS_UNSUPPORTED},
1206 {"20273", "EBCDIC Germany", NULL, NULL, MADB_CS_UNSUPPORTED},
1207 {"20277", "EBCDIC Denmark-Norway", NULL, NULL, MADB_CS_UNSUPPORTED},
1208 {"20278", "EBCDIC Finland-Sweden", NULL, NULL, MADB_CS_UNSUPPORTED},
1209 {"20280", "EBCDIC Italy", NULL, NULL, MADB_CS_UNSUPPORTED},
1210 {"20284", "EBCDIC Latin America-Spain", NULL, NULL, MADB_CS_UNSUPPORTED},
1211 {"20285", "EBCDIC United Kingdom", NULL, NULL, MADB_CS_UNSUPPORTED},
1212 {"20290", "EBCDIC Japanese Katakana Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1213 {"20297", "EBCDIC France", NULL, NULL, MADB_CS_UNSUPPORTED},
1214 {"20420", "EBCDIC Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1215 {"20423", "EBCDIC Greek", NULL, NULL, MADB_CS_UNSUPPORTED},
1216 {"20424", "EBCDIC Hebrew", NULL, NULL, MADB_CS_UNSUPPORTED},
1217 {"20833", "EBCDIC Korean Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1218 {"20838", "EBCDIC Thai", NULL, NULL, MADB_CS_UNSUPPORTED},
1219 {"20866", "Cyrillic (KOI8-R)", "koi8r", NULL, MADB_CS_EXACT},
1220 {"20871", "EBCDIC Icelandic", NULL, NULL, MADB_CS_UNSUPPORTED},
1221 {"20880", "EBCDIC Cyrillic Russian", NULL, NULL, MADB_CS_UNSUPPORTED},
1222 {"20905", "EBCDIC Turkish", NULL, NULL, MADB_CS_UNSUPPORTED},
1223 {"20924", "EBCDIC Latin 1/Open System (1047 + Euro symbol)", NULL, NULL, MADB_CS_UNSUPPORTED},
1224 {"20932", "Japanese (JIS 0208-1990 and 0121-1990)", "ujis", NULL, MADB_CS_EXACT},
1225 {"20936", "Chinese Simplified (GB2312-80)", "gb2312", NULL, MADB_CS_APPROX},
1226 {"20949", "Korean Wansung", "euckr", NULL, MADB_CS_APPROX},
1227 {"21025", "EBCDIC Cyrillic Serbian-Bulgarian", NULL, NULL, MADB_CS_UNSUPPORTED},
1228 {"21866", "Cyrillic (KOI8-U)", "koi8u", NULL, MADB_CS_EXACT},
1229 {"28591", "Western European (ISO)", "latin1", NULL, MADB_CS_APPROX},
1230 {"28592", "Central European (ISO)", "latin2", NULL, MADB_CS_EXACT},
1231 {"28593", "Latin 3", NULL, NULL, MADB_CS_UNSUPPORTED},
1232 {"28594", "Baltic", NULL, NULL, MADB_CS_UNSUPPORTED},
1233 {"28595", "ISO 8859-5 Cyrillic", NULL, NULL, MADB_CS_UNSUPPORTED},
1234 {"28596", "ISO 8859-6 Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1235 {"28597", "ISO 8859-7 Greek", "greek", NULL, MADB_CS_EXACT},
1236 {"28598", "Hebrew (ISO-Visual)", "hebrew", NULL, MADB_CS_EXACT},
1237 {"28599", "ISO 8859-9 Turkish", "latin5", NULL, MADB_CS_EXACT},
1238 {"28603", "ISO 8859-13 Estonian", "latin7", NULL, MADB_CS_EXACT},
1239 {"28605", "8859-15 Latin 9", NULL, NULL, MADB_CS_UNSUPPORTED},
1240 {"29001", "Europa 3", NULL, NULL, MADB_CS_UNSUPPORTED},
1241 {"38598", "ISO 8859-8 Hebrew; Hebrew (ISO-Logical)", "hebrew", NULL, MADB_CS_EXACT},
1242 {"50220", "ISO 2022 Japanese with no halfwidth Katakana", NULL, NULL, MADB_CS_UNSUPPORTED},
1243 {"50221", "ISO 2022 Japanese with halfwidth Katakana", NULL, NULL, MADB_CS_UNSUPPORTED},
1244 {"50222", "ISO 2022 Japanese JIS X 0201-1989", NULL, NULL, MADB_CS_UNSUPPORTED},
1245 {"50225", "ISO 2022 Korean", NULL, NULL, MADB_CS_UNSUPPORTED},
1246 {"50227", "ISO 2022 Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1247 {"50229", "ISO 2022 Traditional Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1248 {"50930", "EBCDIC Japanese (Katakana) Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1249 {"50931", "EBCDIC US-Canada and Japanese", NULL, NULL, MADB_CS_UNSUPPORTED},
1250 {"50933", "EBCDIC Korean Extended and Korean", NULL, NULL, MADB_CS_UNSUPPORTED},
1251 {"50935", "EBCDIC Simplified Chinese Extended and Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1252 {"50936", "EBCDIC Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1253 {"50937", "EBCDIC US-Canada and Traditional Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1254 {"50939", "EBCDIC Japanese (Latin) Extended and Japanese", NULL, NULL, MADB_CS_UNSUPPORTED},
1255 {"51932", "EUC Japanese", "ujis", NULL, MADB_CS_EXACT},
1256 {"51936", "EUC Simplified Chinese; Chinese Simplified (EUC)", "gb2312", NULL, MADB_CS_EXACT},
1257 {"51949", "EUC Korean", "euckr", NULL, MADB_CS_EXACT},
1258 {"51950", "EUC Traditional Chinese", "big5", NULL, MADB_CS_EXACT},
1259 {"52936", "Chinese Simplified (HZ)", NULL, NULL, MADB_CS_UNSUPPORTED},
1260 {"54936", "Chinese Simplified (GB18030)", NULL, NULL, MADB_CS_UNSUPPORTED},
1261 {"57002", "ISCII Devanagari", NULL, NULL, MADB_CS_UNSUPPORTED},
1262 {"57003", "ISCII Bengali", NULL, NULL, MADB_CS_UNSUPPORTED},
1263 {"57004", "ISCII Tamil", NULL, NULL, MADB_CS_UNSUPPORTED},
1264 {"57005", "ISCII Telugu", NULL, NULL, MADB_CS_UNSUPPORTED},
1265 {"57006", "ISCII Assamese", NULL, NULL, MADB_CS_UNSUPPORTED},
1266 {"57007", "ISCII Oriya", NULL, NULL, MADB_CS_UNSUPPORTED},
1267 {"57008", "ISCII Kannada", NULL, NULL, MADB_CS_UNSUPPORTED},
1268 {"57009", "ISCII Malayalam", NULL, NULL, MADB_CS_UNSUPPORTED},
1269 {"57010", "ISCII Gujarati", NULL, NULL, MADB_CS_UNSUPPORTED},
1270 {"57011", "ISCII Punjabi", NULL, NULL, MADB_CS_UNSUPPORTED},
1271 {"65000", "utf-7 Unicode (UTF-7)", NULL, NULL, MADB_CS_UNSUPPORTED},
1272 {"65001", "utf-8 Unicode (UTF-8)", "utf8", NULL, MADB_CS_EXACT},
1273 /* non Windows */
1274 #else
1275 /* iconv encodings */
1276 {"ASCII", "US-ASCII", "ascii", "ASCII", MADB_CS_APPROX},
1277 {"US-ASCII", "US-ASCII", "ascii", "ASCII", MADB_CS_APPROX},
1278 {"Big5", "Chinese for Taiwan Multi-byte set", "big5", "BIG5", MADB_CS_EXACT},
1279 {"CP866", "IBM 866", "cp866", "CP866", MADB_CS_EXACT},
1280 {"IBM-1252", "Catalan Spain", "cp1252", "CP1252", MADB_CS_EXACT},
1281 {"ISCII-DEV", "Hindi", NULL, NULL, MADB_CS_UNSUPPORTED},
1282 {"ISO-8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1283 {"ISO8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1284 {"ISO_8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1285 {"ISO88591", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1286 {"ISO-8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1287 {"ISO8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1288 {"ISO_8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1289 {"ISO885913", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1290 {"ISO-8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1291 {"ISO8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1292 {"ISO_8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1293 {"ISO885915", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1294 {"ISO-8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1295 {"ISO8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1296 {"ISO_8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1297 {"ISO88592", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1298 {"ISO-8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1299 {"ISO8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1300 {"ISO_8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1301 {"ISO88597", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1302 {"ISO-8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1303 {"ISO8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1304 {"ISO_8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1305 {"ISO88598", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1306 {"ISO-8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1307 {"ISO8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1308 {"ISO_8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1309 {"ISO88599", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1310 {"ISO-8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1311 {"ISO8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1312 {"ISO_8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1313 {"ISO88594", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1314 {"ISO-8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1315 {"ISO8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1316 {"ISO_8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1317 {"ISO88595", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1318 {"KOI8-R", "KOI8-R", "koi8r", "KOI8R", MADB_CS_EXACT},
1319 {"koi8r", "KOI8-R", "koi8r", "KOI8R", MADB_CS_EXACT},
1320 {"KOI8-U", "KOI8-U", "koi8u", "KOI8U", MADB_CS_EXACT},
1321 {"koi8u", "KOI8-U", "koi8u", "KOI8U", MADB_CS_EXACT},
1322 {"koi8t", "KOI8-T", NULL, "KOI8-T", MADB_CS_UNSUPPORTED},
1323 {"KOI8-T", "KOI8-T", NULL, "KOI8-T", MADB_CS_UNSUPPORTED},
1324 {"SJIS", "SHIFT_JIS", "sjis", "SJIS", MADB_CS_EXACT},
1325 {"Shift-JIS", "SHIFT_JIS", "sjis", "SJIS", MADB_CS_EXACT},
1326 {"ansi1251", "Cyrillic", "cp1251", "CP1251", MADB_CS_EXACT},
1327 {"cp1251", "Cyrillic", "cp1251", "CP1251", MADB_CS_EXACT},
1328 {"armscii8", "Armenian", "armscii8", "ASMSCII-8", MADB_CS_EXACT},
1329 {"armscii-8", "Armenian", "armscii8", "ASMSCII-8", MADB_CS_EXACT},
1330 {"big5hkscs", "Big5-HKSCS", NULL, NULL, MADB_CS_UNSUPPORTED},
1331 {"cp1255", "Hebrew", "cp1255", "CP1255", MADB_CS_EXACT},
1332 {"eucCN", "GB-2312", "gb2312", "GB2312", MADB_CS_EXACT},
1333 {"eucJP", "UJIS", "ujis", "UJIS", MADB_CS_EXACT},
1334 {"eucKR", "EUC-KR", "euckr", "EUCKR", MADB_CS_EXACT},
1335 {"euctw", "EUC-TW", NULL, NULL, MADB_CS_UNSUPPORTED},
1336 {"gb18030", "GB 18030-2000", "gb18030", "GB18030", MADB_CS_UNSUPPORTED},
1337 {"gb2312", "GB2312", "gb2312", "GB2312", MADB_CS_EXACT},
1338 {"gbk", "GBK", "gbk", "GBK", MADB_CS_EXACT},
1339 {"georgianps", "Georgian", "geostd8", "GEORGIAN-PS", MADB_CS_EXACT},
1340 {"utf8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
1341 {"utf-8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
1342 #endif
1343 {NULL, NULL, NULL, NULL, 0}
1344 };
1345 /* }}} */
1346
1347 /* {{{ madb_get_os_character_set */
madb_get_os_character_set()1348 const char *madb_get_os_character_set()
1349 {
1350 unsigned int i= 0;
1351 char *p= NULL;
1352 #ifdef _WIN32
1353 char codepage[FN_REFLEN];
1354 snprintf(codepage, FN_REFLEN, "%u", GetConsoleCP() ? GetConsoleCP() : GetACP());
1355 p= codepage;
1356 #elif defined(HAVE_NL_LANGINFO) && defined(HAVE_SETLOCALE)
1357 if (setlocale(LC_CTYPE, ""))
1358 p= nl_langinfo(CODESET);
1359 #endif
1360 if (!p)
1361 return MADB_DEFAULT_CHARSET_NAME;
1362 while (MADB_OS_CHARSET[i].identifier)
1363 {
1364 if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
1365 strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0)
1366 return MADB_OS_CHARSET[i].charset;
1367 i++;
1368 }
1369 return MADB_DEFAULT_CHARSET_NAME;
1370 }
1371 /* }}} */
1372
1373 /* {{{ madb_get_code_page */
1374 #ifdef _WIN32
madb_get_windows_cp(const char * charset)1375 int madb_get_windows_cp(const char *charset)
1376 {
1377 unsigned int i= 0;
1378 while (MADB_OS_CHARSET[i].identifier)
1379 {
1380 if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
1381 strcmp(MADB_OS_CHARSET[i].charset, charset) == 0)
1382 return atoi(MADB_OS_CHARSET[i].identifier);
1383 i++;
1384 }
1385 return -1;
1386 }
1387 #endif
1388 /* }}} */
1389
1390 #ifdef HAVE_ICONV
1391 /* {{{ map_charset_name
1392 Changing charset name into something iconv understands, if necessary.
1393 Another purpose it to avoid BOMs in result string, adding BE if necessary
1394 e.g.UTF16 does not work form iconv, while UTF-16 does.
1395 */
map_charset_name(const char * cs_name,my_bool target_cs,char * buffer,size_t buff_len)1396 static void map_charset_name(const char *cs_name, my_bool target_cs, char *buffer, size_t buff_len)
1397 {
1398 char digits[3], endianness[3]= "BE";
1399
1400 if (sscanf(cs_name, "UTF%2[0-9]%2[LBE]", digits, endianness))
1401 {
1402 /* We should have at least digits. Endianness we write either default(BE), or what we found in the string */
1403 snprintf(buffer, buff_len, "UTF-%s%s", digits, endianness);
1404 }
1405 else
1406 {
1407 /* Not our client - copy as is*/
1408 strncpy(buffer, cs_name, buff_len - 1);
1409 buffer[buff_len - 1]= '\0';
1410 }
1411
1412 if (target_cs)
1413 {
1414 strncat(buffer, "//TRANSLIT", buff_len - strlen(buffer));
1415 }
1416 }
1417 /* }}} */
1418 #endif
1419
1420 /* {{{ mariadb_convert_string
1421 Converts string from one charset to another, and writes converted string to given buffer
1422 @param[in] from
1423 @param[in/out] from_len
1424 @param[in] from_cs
1425 @param[out] to
1426 @param[in/out] to_len
1427 @param[in] to_cs
1428 @param[out] errorcode
1429
1430 @return -1 in case of error, bytes used in the "to" buffer, otherwise
1431 */
mariadb_convert_string(const char * from,size_t * from_len,MARIADB_CHARSET_INFO * from_cs,char * to,size_t * to_len,MARIADB_CHARSET_INFO * to_cs,int * errorcode)1432 size_t STDCALL mariadb_convert_string(const char *from __attribute__((unused)),
1433 size_t *from_len __attribute__((unused)),
1434 MARIADB_CHARSET_INFO *from_cs __attribute__((unused)),
1435 char *to __attribute__((unused)),
1436 size_t *to_len __attribute__((unused)),
1437 MARIADB_CHARSET_INFO *to_cs __attribute__((unused)), int *errorcode)
1438 {
1439 #ifndef HAVE_ICONV
1440 *errorcode= ENOTSUP;
1441 return -1;
1442 #else
1443 iconv_t conv= 0;
1444 size_t rc= -1;
1445 size_t save_len= *to_len;
1446 char to_encoding[128], from_encoding[128];
1447
1448 *errorcode= 0;
1449
1450 /* check if conversion is supported */
1451 if (!from_cs || !from_cs->encoding || !from_cs->encoding[0] ||
1452 !to_cs || !to_cs->encoding || !to_cs->encoding[0])
1453 {
1454 *errorcode= EINVAL;
1455 return rc;
1456 }
1457
1458 map_charset_name(to_cs->encoding, 1, to_encoding, sizeof(to_encoding));
1459 map_charset_name(from_cs->encoding, 0, from_encoding, sizeof(from_encoding));
1460
1461 if ((conv= iconv_open(to_encoding, from_encoding)) == (iconv_t)-1)
1462 {
1463 *errorcode= errno;
1464 goto error;
1465 }
1466 if ((rc= iconv(conv, IF_WIN(,IF_SOLARIS(,(char **)))&from, from_len, &to, to_len)) == (size_t)-1)
1467 {
1468 *errorcode= errno;
1469 goto error;
1470 }
1471 rc= save_len - *to_len;
1472 error:
1473 if (conv != (iconv_t)-1)
1474 iconv_close(conv);
1475 return rc;
1476 #endif
1477 }
1478 /* }}} */
1479
1480