1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License, version 2.0, 5 as published by the Free Software Foundation. 6 7 This program is also distributed with certain software (including 8 but not limited to OpenSSL) that is licensed under separate terms, 9 as designated in a particular file or component or in included license 10 documentation. The authors of MySQL hereby grant you an additional 11 permission to link the program and your derivative works with the 12 separately licensed software that they have included with MySQL. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License, version 2.0, for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 22 23 /* 24 A better inplementation of the UNIX ctype(3) library. 25 */ 26 27 #ifndef _m_ctype_h 28 #define _m_ctype_h 29 30 #include "my_global.h" /* uint16, uchar */ 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #define MY_CS_NAME_SIZE 32 37 #define MY_CS_CTYPE_TABLE_SIZE 257 38 #define MY_CS_TO_LOWER_TABLE_SIZE 256 39 #define MY_CS_TO_UPPER_TABLE_SIZE 256 40 #define MY_CS_SORT_ORDER_TABLE_SIZE 256 41 #define MY_CS_TO_UNI_TABLE_SIZE 256 42 43 #define CHARSET_DIR "charsets/" 44 45 #define my_wc_t ulong 46 47 #define MY_CS_REPLACEMENT_CHARACTER 0xFFFD 48 49 /* 50 On i386 we store Unicode->CS conversion tables for 51 some character sets using Big-endian order, 52 to copy two bytes at onces. 53 This gives some performance improvement. 54 */ 55 #ifdef __i386__ 56 #define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8)) 57 #define MY_PUT_MB2(s, code) { *((uint16*)(s))= (code); } 58 #else 59 #define MB2(x) (x) 60 #define MY_PUT_MB2(s, code) { (s)[0]= code >> 8; (s)[1]= code & 0xFF; } 61 #endif 62 63 64 65 typedef struct unicase_info_char_st 66 { 67 uint32 toupper; 68 uint32 tolower; 69 uint32 sort; 70 } MY_UNICASE_CHARACTER; 71 72 73 typedef struct unicase_info_st 74 { 75 my_wc_t maxchar; 76 const MY_UNICASE_CHARACTER **page; 77 } MY_UNICASE_INFO; 78 79 80 extern MY_UNICASE_INFO my_unicase_default; 81 extern MY_UNICASE_INFO my_unicase_turkish; 82 extern MY_UNICASE_INFO my_unicase_mysql500; 83 extern MY_UNICASE_INFO my_unicase_unicode520; 84 85 #define MY_UCA_MAX_CONTRACTION 6 86 #define MY_UCA_MAX_WEIGHT_SIZE 8 87 #define MY_UCA_WEIGHT_LEVELS 1 88 89 typedef struct my_contraction_t 90 { 91 my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */ 92 uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */ 93 my_bool with_context; 94 } MY_CONTRACTION; 95 96 97 98 typedef struct my_contraction_list_t 99 { 100 size_t nitems; /* Number of items in the list */ 101 MY_CONTRACTION *item; /* List of contractions */ 102 char *flags; /* Character flags, e.g. "is contraction head") */ 103 } MY_CONTRACTIONS; 104 105 106 my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc); 107 my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc); 108 uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c, 109 my_wc_t wc1, my_wc_t wc2); 110 111 112 /* Collation weights on a single level (e.g. primary, secondary, tertiarty) */ 113 typedef struct my_uca_level_info_st 114 { 115 my_wc_t maxchar; 116 uchar *lengths; 117 uint16 **weights; 118 MY_CONTRACTIONS contractions; 119 } MY_UCA_WEIGHT_LEVEL; 120 121 122 typedef struct uca_info_st 123 { 124 MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS]; 125 126 /* Logical positions */ 127 my_wc_t first_non_ignorable; 128 my_wc_t last_non_ignorable; 129 my_wc_t first_primary_ignorable; 130 my_wc_t last_primary_ignorable; 131 my_wc_t first_secondary_ignorable; 132 my_wc_t last_secondary_ignorable; 133 my_wc_t first_tertiary_ignorable; 134 my_wc_t last_tertiary_ignorable; 135 my_wc_t first_trailing; 136 my_wc_t last_trailing; 137 my_wc_t first_variable; 138 my_wc_t last_variable; 139 140 } MY_UCA_INFO; 141 142 143 144 extern MY_UCA_INFO my_uca_v400; 145 146 147 typedef struct uni_ctype_st 148 { 149 uchar pctype; 150 uchar *ctype; 151 } MY_UNI_CTYPE; 152 153 extern MY_UNI_CTYPE my_uni_ctype[256]; 154 155 /* wm_wc and wc_mb return codes */ 156 #define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */ 157 #define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */ 158 #define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */ 159 #define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */ 160 #define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */ 161 /* These following three are currently not really used */ 162 #define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */ 163 #define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */ 164 #define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */ 165 /* A helper macros for "need at least n bytes" */ 166 #define MY_CS_TOOSMALLN(n) (-100-(n)) 167 168 #define MY_SEQ_INTTAIL 1 169 #define MY_SEQ_SPACES 2 170 171 /* My charsets_list flags */ 172 #define MY_CS_COMPILED 1 /* compiled-in sets */ 173 #define MY_CS_CONFIG 2 /* sets that have a *.conf file */ 174 #define MY_CS_INDEX 4 /* sets listed in the Index file */ 175 #define MY_CS_LOADED 8 /* sets that are currently loaded */ 176 #define MY_CS_BINSORT 16 /* if binary sort order */ 177 #define MY_CS_PRIMARY 32 /* if primary collation */ 178 #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */ 179 #define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */ 180 #define MY_CS_READY 256 /* if a charset is initialized */ 181 #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ 182 #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ 183 #define MY_CS_HIDDEN 2048 /* don't display in SHOW */ 184 #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */ 185 #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */ 186 #define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */ 187 #define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */ 188 #define MY_CHARSET_UNDEFINED 0 189 190 /* Character repertoire flags */ 191 #define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */ 192 #define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */ 193 #define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */ 194 195 /* Flags for strxfrm */ 196 #define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */ 197 #define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */ 198 #define MY_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */ 199 #define MY_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */ 200 #define MY_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */ 201 #define MY_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */ 202 #define MY_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */ 203 #define MY_STRXFRM_NLEVELS 6 /* Number of possible levels*/ 204 205 #define MY_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */ 206 #define MY_STRXFRM_PAD_TO_MAXLEN 0x00000080 /* if pad tail(for filesort) */ 207 208 #define MY_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */ 209 #define MY_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */ 210 #define MY_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */ 211 #define MY_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */ 212 #define MY_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */ 213 #define MY_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */ 214 #define MY_STRXFRM_DESC_SHIFT 8 215 216 #define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */ 217 #define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */ 218 219 #define MY_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */ 220 #define MY_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */ 221 #define MY_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */ 222 #define MY_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */ 223 #define MY_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */ 224 #define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */ 225 #define MY_STRXFRM_REVERSE_SHIFT 16 226 227 228 typedef struct my_uni_idx_st 229 { 230 uint16 from; 231 uint16 to; 232 const uchar *tab; 233 } MY_UNI_IDX; 234 235 typedef struct 236 { 237 uint beg; 238 uint end; 239 uint mb_len; 240 } my_match_t; 241 242 struct charset_info_st; 243 244 typedef struct my_charset_loader_st 245 { 246 char error[128]; 247 void *(*once_alloc)(size_t); 248 void *(*mem_malloc)(size_t); 249 void *(*mem_realloc)(void *, size_t); 250 void (*mem_free)(void *); 251 void (*reporter)(enum loglevel, const char *format, ...); 252 int (*add_collation)(struct charset_info_st *cs); 253 } MY_CHARSET_LOADER; 254 255 256 extern int (*my_string_stack_guard)(int); 257 258 /* See strings/CHARSET_INFO.txt for information about this structure */ 259 typedef struct my_collation_handler_st 260 { 261 my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *); 262 /* Collation routines */ 263 int (*strnncoll)(const struct charset_info_st *, 264 const uchar *, size_t, const uchar *, size_t, my_bool); 265 int (*strnncollsp)(const struct charset_info_st *, 266 const uchar *, size_t, const uchar *, size_t, 267 my_bool diff_if_only_endspace_difference); 268 size_t (*strnxfrm)(const struct charset_info_st *, 269 uchar *dst, size_t dstlen, uint nweights, 270 const uchar *src, size_t srclen, uint flags); 271 size_t (*strnxfrmlen)(const struct charset_info_st *, size_t); 272 my_bool (*like_range)(const struct charset_info_st *, 273 const char *s, size_t s_length, 274 pchar w_prefix, pchar w_one, pchar w_many, 275 size_t res_length, 276 char *min_str, char *max_str, 277 size_t *min_len, size_t *max_len); 278 int (*wildcmp)(const struct charset_info_st *, 279 const char *str,const char *str_end, 280 const char *wildstr,const char *wildend, 281 int escape,int w_one, int w_many); 282 283 int (*strcasecmp)(const struct charset_info_st *, const char *, 284 const char *); 285 286 uint (*instr)(const struct charset_info_st *, 287 const char *b, size_t b_length, 288 const char *s, size_t s_length, 289 my_match_t *match, uint nmatch); 290 291 /* Hash calculation */ 292 void (*hash_sort)(const struct charset_info_st *cs, const uchar *key, 293 size_t len, ulong *nr1, ulong *nr2); 294 my_bool (*propagate)(const struct charset_info_st *cs, const uchar *str, 295 size_t len); 296 } MY_COLLATION_HANDLER; 297 298 extern MY_COLLATION_HANDLER my_collation_mb_bin_handler; 299 extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler; 300 extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; 301 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; 302 303 /* Some typedef to make it easy for C++ to make function pointers */ 304 typedef int (*my_charset_conv_mb_wc)(const struct charset_info_st *, 305 my_wc_t *, const uchar *, const uchar *); 306 typedef int (*my_charset_conv_wc_mb)(const struct charset_info_st *, my_wc_t, 307 uchar *, uchar *); 308 typedef size_t (*my_charset_conv_case)(const struct charset_info_st *, 309 char *, size_t, char *, size_t); 310 311 312 /* See strings/CHARSET_INFO.txt about information on this structure */ 313 typedef struct my_charset_handler_st 314 { 315 my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader); 316 /* Multibyte routines */ 317 uint (*ismbchar)(const struct charset_info_st *, const char *, 318 const char *); 319 uint (*mbcharlen)(const struct charset_info_st *, uint c); 320 size_t (*numchars)(const struct charset_info_st *, const char *b, 321 const char *e); 322 size_t (*charpos)(const struct charset_info_st *, const char *b, 323 const char *e, size_t pos); 324 size_t (*well_formed_len)(const struct charset_info_st *, 325 const char *b,const char *e, 326 size_t nchars, int *error); 327 size_t (*lengthsp)(const struct charset_info_st *, const char *ptr, 328 size_t length); 329 size_t (*numcells)(const struct charset_info_st *, const char *b, 330 const char *e); 331 332 /* Unicode conversion */ 333 my_charset_conv_mb_wc mb_wc; 334 my_charset_conv_wc_mb wc_mb; 335 336 /* CTYPE scanner */ 337 int (*ctype)(const struct charset_info_st *cs, int *ctype, 338 const uchar *s, const uchar *e); 339 340 /* Functions for case and sort conversion */ 341 size_t (*caseup_str)(const struct charset_info_st *, char *); 342 size_t (*casedn_str)(const struct charset_info_st *, char *); 343 344 my_charset_conv_case caseup; 345 my_charset_conv_case casedn; 346 347 /* Charset dependant snprintf() */ 348 size_t (*snprintf)(const struct charset_info_st *, char *to, size_t n, 349 const char *fmt, 350 ...) MY_ATTRIBUTE((format(printf, 4, 5))); 351 size_t (*long10_to_str)(const struct charset_info_st *, char *to, size_t n, 352 int radix, long int val); 353 size_t (*longlong10_to_str)(const struct charset_info_st *, char *to, 354 size_t n, int radix, longlong val); 355 356 void (*fill)(const struct charset_info_st *, char *to, size_t len, 357 int fill); 358 359 /* String-to-number conversion routines */ 360 long (*strntol)(const struct charset_info_st *, const char *s, 361 size_t l, int base, char **e, int *err); 362 ulong (*strntoul)(const struct charset_info_st *, const char *s, 363 size_t l, int base, char **e, int *err); 364 longlong (*strntoll)(const struct charset_info_st *, const char *s, 365 size_t l, int base, char **e, int *err); 366 ulonglong (*strntoull)(const struct charset_info_st *, const char *s, 367 size_t l, int base, char **e, int *err); 368 double (*strntod)(const struct charset_info_st *, char *s, 369 size_t l, char **e, int *err); 370 longlong (*strtoll10)(const struct charset_info_st *cs, 371 const char *nptr, char **endptr, int *error); 372 ulonglong (*strntoull10rnd)(const struct charset_info_st *cs, 373 const char *str, size_t length, 374 int unsigned_fl, 375 char **endptr, int *error); 376 size_t (*scan)(const struct charset_info_st *, const char *b, 377 const char *e, int sq); 378 } MY_CHARSET_HANDLER; 379 380 extern MY_CHARSET_HANDLER my_charset_8bit_handler; 381 extern MY_CHARSET_HANDLER my_charset_ascii_handler; 382 extern MY_CHARSET_HANDLER my_charset_ucs2_handler; 383 384 385 /* 386 We define this CHARSET_INFO_DEFINED here to prevent a repeat of the 387 typedef in hash.c, which will cause a compiler error. 388 */ 389 #define CHARSET_INFO_DEFINED 390 391 /* See strings/CHARSET_INFO.txt about information on this structure */ 392 typedef struct charset_info_st 393 { 394 uint number; 395 uint primary_number; 396 uint binary_number; 397 uint state; 398 const char *csname; 399 const char *name; 400 const char *comment; 401 const char *tailoring; 402 const uchar *ctype; 403 const uchar *to_lower; 404 const uchar *to_upper; 405 const uchar *sort_order; 406 MY_UCA_INFO *uca; /* This can be changed in apply_one_rule() */ 407 const uint16 *tab_to_uni; 408 const MY_UNI_IDX *tab_from_uni; 409 const MY_UNICASE_INFO *caseinfo; 410 const struct lex_state_maps_st *state_maps; /* parser internal data */ 411 const uchar *ident_map; /* parser internal data */ 412 uint strxfrm_multiply; 413 uchar caseup_multiply; 414 uchar casedn_multiply; 415 uint mbminlen; 416 uint mbmaxlen; 417 uint mbmaxlenlen; 418 my_wc_t min_sort_char; 419 my_wc_t max_sort_char; /* For LIKE optimization */ 420 uchar pad_char; 421 my_bool escape_with_backslash_is_dangerous; 422 uchar levels_for_compare; 423 uchar levels_for_order; 424 425 MY_CHARSET_HANDLER *cset; 426 MY_COLLATION_HANDLER *coll; 427 428 } CHARSET_INFO; 429 #define ILLEGAL_CHARSET_INFO_NUMBER (~0U) 430 431 432 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin; 433 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1; 434 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename; 435 436 extern CHARSET_INFO my_charset_big5_chinese_ci; 437 extern CHARSET_INFO my_charset_big5_bin; 438 extern CHARSET_INFO my_charset_cp932_japanese_ci; 439 extern CHARSET_INFO my_charset_cp932_bin; 440 extern CHARSET_INFO my_charset_cp1250_czech_ci; 441 extern CHARSET_INFO my_charset_eucjpms_japanese_ci; 442 extern CHARSET_INFO my_charset_eucjpms_bin; 443 extern CHARSET_INFO my_charset_euckr_korean_ci; 444 extern CHARSET_INFO my_charset_euckr_bin; 445 extern CHARSET_INFO my_charset_gb2312_chinese_ci; 446 extern CHARSET_INFO my_charset_gb2312_bin; 447 extern CHARSET_INFO my_charset_gbk_chinese_ci; 448 extern CHARSET_INFO my_charset_gbk_bin; 449 extern CHARSET_INFO my_charset_gb18030_chinese_ci; 450 extern CHARSET_INFO my_charset_gb18030_bin; 451 extern CHARSET_INFO my_charset_latin1_german2_ci; 452 extern CHARSET_INFO my_charset_latin1_bin; 453 extern CHARSET_INFO my_charset_latin2_czech_ci; 454 extern CHARSET_INFO my_charset_sjis_japanese_ci; 455 extern CHARSET_INFO my_charset_sjis_bin; 456 extern CHARSET_INFO my_charset_tis620_thai_ci; 457 extern CHARSET_INFO my_charset_tis620_bin; 458 extern CHARSET_INFO my_charset_ucs2_general_ci; 459 extern CHARSET_INFO my_charset_ucs2_bin; 460 extern CHARSET_INFO my_charset_ucs2_unicode_ci; 461 extern CHARSET_INFO my_charset_ucs2_general_mysql500_ci; 462 extern CHARSET_INFO my_charset_ujis_japanese_ci; 463 extern CHARSET_INFO my_charset_ujis_bin; 464 extern CHARSET_INFO my_charset_utf16_bin; 465 extern CHARSET_INFO my_charset_utf16_general_ci; 466 extern CHARSET_INFO my_charset_utf16_unicode_ci; 467 extern CHARSET_INFO my_charset_utf16le_bin; 468 extern CHARSET_INFO my_charset_utf16le_general_ci; 469 extern CHARSET_INFO my_charset_utf32_bin; 470 extern CHARSET_INFO my_charset_utf32_general_ci; 471 extern CHARSET_INFO my_charset_utf32_unicode_ci; 472 473 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_utf8_general_ci; 474 extern CHARSET_INFO my_charset_utf8_tolower_ci; 475 extern CHARSET_INFO my_charset_utf8_unicode_ci; 476 extern CHARSET_INFO my_charset_utf8_bin; 477 extern CHARSET_INFO my_charset_utf8_general_mysql500_ci; 478 extern CHARSET_INFO my_charset_utf8mb4_bin; 479 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_utf8mb4_general_ci; 480 extern CHARSET_INFO my_charset_utf8mb4_unicode_ci; 481 #define MY_UTF8MB3 "utf8" 482 #define MY_UTF8MB4 "utf8mb4" 483 484 485 /* declarations for simple charsets */ 486 extern size_t my_strnxfrm_simple(const CHARSET_INFO *, 487 uchar *dst, size_t dstlen, uint nweights, 488 const uchar *src, size_t srclen, uint flags); 489 size_t my_strnxfrmlen_simple(const CHARSET_INFO *, size_t); 490 extern int my_strnncoll_simple(const CHARSET_INFO *, const uchar *, size_t, 491 const uchar *, size_t, my_bool); 492 493 extern int my_strnncollsp_simple(const CHARSET_INFO *, const uchar *, size_t, 494 const uchar *, size_t, 495 my_bool diff_if_only_endspace_difference); 496 497 extern void my_hash_sort_simple(const CHARSET_INFO *cs, 498 const uchar *key, size_t len, 499 ulong *nr1, ulong *nr2); 500 501 extern size_t my_lengthsp_8bit(const CHARSET_INFO *cs, const char *ptr, 502 size_t length); 503 504 extern uint my_instr_simple(const struct charset_info_st *, 505 const char *b, size_t b_length, 506 const char *s, size_t s_length, 507 my_match_t *match, uint nmatch); 508 509 510 /* Functions for 8bit */ 511 extern size_t my_caseup_str_8bit(const CHARSET_INFO *, char *); 512 extern size_t my_casedn_str_8bit(const CHARSET_INFO *, char *); 513 extern size_t my_caseup_8bit(const CHARSET_INFO *, char *src, size_t srclen, 514 char *dst, size_t dstlen); 515 extern size_t my_casedn_8bit(const CHARSET_INFO *, char *src, size_t srclen, 516 char *dst, size_t dstlen); 517 518 extern int my_strcasecmp_8bit(const CHARSET_INFO * cs, const char *, 519 const char *); 520 521 int my_mb_wc_8bit(const CHARSET_INFO *cs,my_wc_t *wc, const uchar *s, 522 const uchar *e); 523 int my_wc_mb_8bit(const CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e); 524 525 int my_mb_ctype_8bit(const CHARSET_INFO *,int *, const uchar *,const uchar *); 526 int my_mb_ctype_mb(const CHARSET_INFO *,int *, const uchar *,const uchar *); 527 528 size_t my_scan_8bit(const CHARSET_INFO *cs, const char *b, const char *e, 529 int sq); 530 531 size_t my_snprintf_8bit(const struct charset_info_st *, char *to, size_t n, 532 const char *fmt, ...) 533 MY_ATTRIBUTE((format(printf, 4, 5))); 534 535 long my_strntol_8bit(const CHARSET_INFO *, const char *s, size_t l, 536 int base, char **e, int *err); 537 ulong my_strntoul_8bit(const CHARSET_INFO *, const char *s, size_t l, 538 int base, char **e, int *err); 539 longlong my_strntoll_8bit(const CHARSET_INFO *, const char *s, size_t l, 540 int base, char **e, int *err); 541 ulonglong my_strntoull_8bit(const CHARSET_INFO *, const char *s, size_t l, 542 int base, char **e, int *err); 543 double my_strntod_8bit(const CHARSET_INFO *, char *s, size_t l, char **e, 544 int *err); 545 size_t my_long10_to_str_8bit(const CHARSET_INFO *, char *to, size_t l, 546 int radix, long int val); 547 size_t my_longlong10_to_str_8bit(const CHARSET_INFO *, char *to, size_t l, 548 int radix, longlong val); 549 550 longlong my_strtoll10_8bit(const CHARSET_INFO *cs, 551 const char *nptr, char **endptr, int *error); 552 longlong my_strtoll10_ucs2(const CHARSET_INFO *cs, 553 const char *nptr, char **endptr, int *error); 554 555 ulonglong my_strntoull10rnd_8bit(const CHARSET_INFO *cs, 556 const char *str, size_t length, int 557 unsigned_fl, char **endptr, int *error); 558 ulonglong my_strntoull10rnd_ucs2(const CHARSET_INFO *cs, 559 const char *str, size_t length, 560 int unsigned_fl, char **endptr, int *error); 561 562 void my_fill_8bit(const CHARSET_INFO *cs, char* to, size_t l, int fill); 563 564 /* For 8-bit character set */ 565 my_bool my_like_range_simple(const CHARSET_INFO *cs, 566 const char *ptr, size_t ptr_length, 567 pbool escape, pbool w_one, pbool w_many, 568 size_t res_length, 569 char *min_str, char *max_str, 570 size_t *min_length, size_t *max_length); 571 572 /* For ASCII-based multi-byte character sets with mbminlen=1 */ 573 my_bool my_like_range_mb(const CHARSET_INFO *cs, 574 const char *ptr, size_t ptr_length, 575 pbool escape, pbool w_one, pbool w_many, 576 size_t res_length, 577 char *min_str, char *max_str, 578 size_t *min_length, size_t *max_length); 579 580 /* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */ 581 my_bool my_like_range_generic(const CHARSET_INFO *cs, 582 const char *ptr, size_t ptr_length, 583 pbool escape, pbool w_one, pbool w_many, 584 size_t res_length, 585 char *min_str, char *max_str, 586 size_t *min_length, size_t *max_length); 587 588 int my_wildcmp_8bit(const CHARSET_INFO *, 589 const char *str,const char *str_end, 590 const char *wildstr,const char *wildend, 591 int escape, int w_one, int w_many); 592 593 int my_wildcmp_bin(const CHARSET_INFO *, 594 const char *str,const char *str_end, 595 const char *wildstr,const char *wildend, 596 int escape, int w_one, int w_many); 597 598 size_t my_numchars_8bit(const CHARSET_INFO *, const char *b, const char *e); 599 size_t my_numcells_8bit(const CHARSET_INFO *, const char *b, const char *e); 600 size_t my_charpos_8bit(const CHARSET_INFO *, const char *b, const char *e, 601 size_t pos); 602 size_t my_well_formed_len_8bit(const CHARSET_INFO *, const char *b, 603 const char *e, size_t pos, int *error); 604 uint my_mbcharlen_8bit(const CHARSET_INFO *, uint c); 605 606 607 /* Functions for multibyte charsets */ 608 extern size_t my_caseup_str_mb(const CHARSET_INFO *, char *); 609 extern size_t my_casedn_str_mb(const CHARSET_INFO *, char *); 610 extern size_t my_caseup_mb(const CHARSET_INFO *, char *src, size_t srclen, 611 char *dst, size_t dstlen); 612 extern size_t my_casedn_mb(const CHARSET_INFO *, char *src, size_t srclen, 613 char *dst, size_t dstlen); 614 extern size_t my_caseup_mb_varlen(const CHARSET_INFO *, char *src, 615 size_t srclen, char *dst, size_t dstlen); 616 extern size_t my_casedn_mb_varlen(const CHARSET_INFO *, char *src, 617 size_t srclen, char *dst, size_t dstlen); 618 extern size_t my_caseup_ujis(const CHARSET_INFO *, char *src, size_t srclen, 619 char *dst, size_t dstlen); 620 extern size_t my_casedn_ujis(const CHARSET_INFO *, char *src, size_t srclen, 621 char *dst, size_t dstlen); 622 extern int my_strcasecmp_mb(const CHARSET_INFO * cs,const char *, 623 const char *); 624 625 int my_wildcmp_mb(const CHARSET_INFO *, 626 const char *str,const char *str_end, 627 const char *wildstr,const char *wildend, 628 int escape, int w_one, int w_many); 629 size_t my_numchars_mb(const CHARSET_INFO *, const char *b, const char *e); 630 size_t my_numcells_mb(const CHARSET_INFO *, const char *b, const char *e); 631 size_t my_charpos_mb(const CHARSET_INFO *, const char *b, const char *e, 632 size_t pos); 633 size_t my_well_formed_len_mb(const CHARSET_INFO *, const char *b, 634 const char *e, size_t pos, int *error); 635 uint my_instr_mb(const struct charset_info_st *, 636 const char *b, size_t b_length, 637 const char *s, size_t s_length, 638 my_match_t *match, uint nmatch); 639 640 int my_strnncoll_mb_bin(const CHARSET_INFO * cs, 641 const uchar *s, size_t slen, 642 const uchar *t, size_t tlen, 643 my_bool t_is_prefix); 644 645 int my_strnncollsp_mb_bin(const CHARSET_INFO *cs, 646 const uchar *a, size_t a_length, 647 const uchar *b, size_t b_length, 648 my_bool diff_if_only_endspace_difference); 649 650 int my_wildcmp_mb_bin(const CHARSET_INFO *cs, 651 const char *str,const char *str_end, 652 const char *wildstr,const char *wildend, 653 int escape, int w_one, int w_many); 654 655 int my_strcasecmp_mb_bin(const CHARSET_INFO * cs MY_ATTRIBUTE((unused)), 656 const char *s, const char *t); 657 658 void my_hash_sort_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), 659 const uchar *key, size_t len,ulong *nr1, ulong *nr2); 660 661 size_t my_strnxfrm_mb(const CHARSET_INFO *, 662 uchar *dst, size_t dstlen, uint nweights, 663 const uchar *src, size_t srclen, uint flags); 664 665 size_t my_strnxfrm_unicode(const CHARSET_INFO *, 666 uchar *dst, size_t dstlen, uint nweights, 667 const uchar *src, size_t srclen, uint flags); 668 669 size_t my_strnxfrm_unicode_full_bin(const CHARSET_INFO *, 670 uchar *dst, size_t dstlen, uint nweights, 671 const uchar *src, size_t srclen, uint flags); 672 size_t my_strnxfrmlen_unicode_full_bin(const CHARSET_INFO *, size_t); 673 674 int my_wildcmp_unicode(const CHARSET_INFO *cs, 675 const char *str, const char *str_end, 676 const char *wildstr, const char *wildend, 677 int escape, int w_one, int w_many, 678 const MY_UNICASE_INFO *weights); 679 680 extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader, 681 const char *buf, size_t buflen); 682 extern char *my_strchr(const CHARSET_INFO *cs, const char *str, 683 const char *end, pchar c); 684 extern size_t my_strcspn(const CHARSET_INFO *cs, const char *str, 685 const char *end, const char *reject, 686 size_t reject_length); 687 688 my_bool my_propagate_simple(const CHARSET_INFO *cs, const uchar *str, 689 size_t len); 690 my_bool my_propagate_complex(const CHARSET_INFO *cs, const uchar *str, 691 size_t len); 692 693 694 uint my_string_repertoire(const CHARSET_INFO *cs, const char *str, size_t len); 695 my_bool my_charset_is_ascii_based(const CHARSET_INFO *cs); 696 my_bool my_charset_is_8bit_pure_ascii(const CHARSET_INFO *cs); 697 uint my_charset_repertoire(const CHARSET_INFO *cs); 698 699 700 uint my_strxfrm_flag_normalize(uint flags, uint nlevels); 701 void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend, 702 uint flags, uint level); 703 size_t my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO *cs, 704 uchar *str, uchar *frmend, uchar *strend, 705 uint nweights, uint flags, uint level); 706 707 my_bool my_charset_is_ascii_compatible(const CHARSET_INFO *cs); 708 709 const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs, 710 int level); 711 712 extern size_t my_vsnprintf_ex(const CHARSET_INFO *cs, char *to, size_t n, 713 const char* fmt, va_list ap); 714 715 size_t my_convert(char *to, size_t to_length, const CHARSET_INFO *to_cs, 716 const char *from, size_t from_length, 717 const CHARSET_INFO *from_cs, uint *errors); 718 719 uint my_mbcharlen_ptr(const CHARSET_INFO *cs, const char *s, const char *e); 720 721 #define _MY_U 01 /* Upper case */ 722 #define _MY_L 02 /* Lower case */ 723 #define _MY_NMR 04 /* Numeral (digit) */ 724 #define _MY_SPC 010 /* Spacing character */ 725 #define _MY_PNT 020 /* Punctuation */ 726 #define _MY_CTR 040 /* Control character */ 727 #define _MY_B 0100 /* Blank */ 728 #define _MY_X 0200 /* heXadecimal digit */ 729 730 731 #define my_isascii(c) (!((c) & ~0177)) 732 #define my_toascii(c) ((c) & 0177) 733 #define my_tocntrl(c) ((c) & 31) 734 #define my_toprint(c) ((c) | 64) 735 #define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)]) 736 #define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)]) 737 #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L)) 738 #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U) 739 #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L) 740 #define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR) 741 #define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X) 742 #define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR)) 743 #define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC) 744 #define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT) 745 #define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B)) 746 #define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR)) 747 #define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR) 748 749 /* Some macros that should be cleaned up a little */ 750 #define my_isvar(s,c) (my_isalnum(s,c) || (c) == '_') 751 #define my_isvar_start(s,c) (my_isalpha(s,c) || (c) == '_') 752 753 #define my_binary_compare(s) ((s)->state & MY_CS_BINSORT) 754 #define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM) 755 #define my_strnxfrm(cs, d, dl, s, sl) \ 756 ((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE)) 757 #define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0)) 758 #define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \ 759 ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j))) 760 #define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m))) 761 #define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b))) 762 #define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num)) 763 764 765 #define use_mb(s) ((s)->cset->ismbchar != NULL) 766 #define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b))) 767 #define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a))) 768 /** 769 Get the length of gb18030 code by the given two leading bytes 770 771 @param[in] s charset_info 772 @param[in] a first byte of gb18030 code 773 @param[in] b second byte of gb18030 code 774 @return the length of gb18030 code starting with given two bytes, 775 the length would be 2 or 4 for valid gb18030 code, 776 or 0 for invalid gb18030 code 777 */ 778 #define my_mbcharlen_2(s, a, b) ((s)->cset->mbcharlen((s),((((a) & 0xFF) << 8) + ((b) & 0xFF)))) 779 /** 780 Get the maximum length of leading bytes needed to determine the length of a 781 multi-byte gb18030 code 782 783 @param[in] s charset_info 784 @return number of leading bytes we need, would be 2 for gb18030 785 and 1 for all other charsets 786 */ 787 #define my_mbmaxlenlen(s) ((s)->mbmaxlenlen) 788 /** 789 Judge if the given byte is a possible leading byte for a charset. 790 For gb18030 whose mbmaxlenlen is 2, we can't determine the length of 791 a multi-byte character by looking at the first byte only 792 793 @param[in] s charset_info 794 @param[in] i possible leading byte 795 @return true if it is, otherwise false 796 */ 797 #define my_ismb1st(s, i) \ 798 (my_mbcharlen((s), (i)) > 1 || \ 799 (my_mbmaxlenlen((s)) == 2 && my_mbcharlen((s), (i)) == 0)) 800 801 #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a))) 802 #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a))) 803 #define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e))) 804 #define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e))) 805 #define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e))) 806 #define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e))) 807 #define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d))) 808 809 #ifdef __cplusplus 810 } 811 #endif 812 813 #endif /* _m_ctype_h */ 814