1-- source include/have_gbk.inc 2 3# 4# Tests with the gbk character set 5# 6--disable_warnings 7drop table if exists t1; 8--enable_warnings 9 10SET @test_character_set= 'gbk'; 11SET @test_collation= 'gbk_chinese_ci'; 12-- source include/ctype_common.inc 13 14SET NAMES gbk; 15SET collation_connection='gbk_chinese_ci'; 16-- source include/ctype_filesort.inc 17-- source include/ctype_innodb_like.inc 18-- source include/ctype_like_escape.inc 19-- source include/ctype_like_range_f1f2.inc 20-- source include/ctype_ascii_order.inc 21SET collation_connection='gbk_bin'; 22-- source include/ctype_filesort.inc 23-- source include/ctype_innodb_like.inc 24-- source include/ctype_like_escape.inc 25-- source include/ctype_like_range_f1f2.inc 26 27# 28# Bug#11987 mysql will truncate the text when 29# the text contain GBK char:"0xA3A0" and "0xA1" 30# 31SET NAMES gbk; 32CREATE TABLE t1 (a text) character set gbk; 33INSERT INTO t1 VALUES (0xA3A0),(0xA1A1); 34SELECT hex(a) FROM t1 ORDER BY a; 35DROP TABLE t1; 36 37# 38# Bugs#15375: Unassigned multibyte codes are broken 39# into parts when converting to Unicode. 40# This query should return 0x003F0041. I.e. it should 41# scan unassigned double-byte character 0xA140, convert 42# it as QUESTION MARK 0x003F and then scan the next 43# character, which is a single byte character 0x41. 44# 45select hex(convert(_gbk 0xA14041 using ucs2)); 46 47# End of 4.1 tests 48 49# 50# Bug#21620 ALTER TABLE affects other columns 51# 52create table t1 (c1 text not null, c2 text not null) character set gbk; 53alter table t1 change c1 c1 mediumtext character set gbk not null; 54show create table t1; 55drop table t1; 56 57# 58# Bug#35993: severe memory corruption and crash with multibyte conversion 59# 60 61CREATE TABLE t1(a MEDIUMTEXT CHARACTER SET gbk, 62 b MEDIUMTEXT CHARACTER SET big5); 63INSERT INTO t1 VALUES 64 (REPEAT(0x1125,200000), REPEAT(0x1125,200000)), ('', ''), ('', ''); 65 66SELECT a FROM t1 GROUP BY 1 LIMIT 1 INTO @nullll; 67SELECT b FROM t1 GROUP BY 1 LIMIT 1 INTO @nullll; 68 69DROP TABLES t1; 70 71--echo End of 5.0 tests 72 73 74--echo # 75--echo # Start of 5.5 tests 76--echo # 77 78--echo # 79--echo # Testing WL#4583 Case conversion in Asian character sets 80--echo # 81# 82# Populate t1 with all hex digits 83# 84SET NAMES utf8; 85SET collation_connection=gbk_chinese_ci; 86CREATE TABLE t1 (b VARCHAR(2)); 87INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'); 88INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F'); 89# 90# Populate tables head and tail with values '00'-'FF' 91# 92CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2; 93CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2; 94DROP TABLE t1; 95# 96# Populate table t1 with all codes [80..FF][20..FF] 97# Expected valid gbk codes [81..FE][40..7E,80..FE] 98# 99CREATE TABLE t1 AS 100SELECT concat(head, tail) AS code, ' ' AS a 101FROM head, tail 102WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF') 103ORDER BY head, tail; 104DROP TEMPORARY TABLE head, tail; 105SHOW CREATE TABLE t1; 106UPDATE IGNORE t1 SET a=unhex(code) ORDER BY code; 107SELECT COUNT(*) FROM t1 WHERE a<>'?'; 108# 109# Display all characters that have upper or lower case mapping. 110# 111SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a)); 112# 113# Make sure all possible conversion happened 114# 115# Expect U+216A to U+216B ROMAN NUMERAL ELEVEN to ROMAN TWELVE 116# 117SELECT * FROM t1 118WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <> 119 HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code; 120# 121# Expect 122# U+00E0 LATIN SMALL LETTER A WITH GRAVE 123# U+00E1 LATIN SMALL LETTER A WITH ACUTE 124# U+00E8 LATIN SMALL LETTER E WITH GRAVE 125# U+00E9 LATIN SMALL LETTER E WITH ACUTE 126# U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX 127# U+00EC LATIN SMALL LETTER I WITH GRAVE 128# U+00ED LATIN SMALL LETTER I WITH ACUTE 129# U+00F2 LATIN SMALL LETTER O WITH GRAVE 130# U+00F3 LATIN SMALL LETTER O WITH ACUTE 131# U+00F9 LATIN SMALL LETTER U WITH GRAVE 132# U+00FA LATIN SMALL LETTER U WITH ACUTE 133# U+00FC LATIN SMALL LETTER U WITH DIAERESIS 134# U+0101 LATIN SMALL LETTER A WITH MACRON 135# U+0113 LATIN SMALL LETTER E WITH MACRON 136# U+011B LATIN SMALL LETTER E WITH CARON 137# U+012B LATIN SMALL LETTER I WITH MACRON 138# U+0144 LATIN SMALL LETTER N WITH ACUTE 139# U+0148 LATIN SMALL LETTER N WITH CARON 140# U+014D LATIN SMALL LETTER O WITH MACRON 141# U+016B LATIN SMALL LETTER U WITH MACRON 142# U+01CE LATIN SMALL LETTER A WITH CARON 143# U+01D0 LATIN SMALL LETTER I WITH CARON 144# U+01D2 LATIN SMALL LETTER O WITH CARON 145# U+01D4 LATIN SMALL LETTER U WITH CARON 146# U+01D6 LATIN SMALL LETTER U WITH DIAERESIS AND MACRON 147# U+01D8 LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE 148# U+01DA LATIN SMALL LETTER U WITH DIAERESIS AND CARON 149# U+01DC LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE 150# 151SELECT * FROM t1 152WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <> 153 HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code; 154 155DROP TABLE t1; 156 157 158 159 160--echo # 161--echo # End of 5.5 tests 162--echo # 163 164 165--echo # 166--echo # Start of 5.6 tests 167--echo # 168 169--echo # 170--echo # WL#3664 WEIGHT_STRING 171--echo # 172 173set names gbk; 174--source include/weight_string.inc 175--source include/weight_string_l1.inc 176--source include/weight_string_A1A1.inc 177--source include/weight_string_8140.inc 178--source include/weight_string_8EA1.inc 179 180set collation_connection=gbk_bin; 181--source include/weight_string.inc 182--source include/weight_string_l1.inc 183--source include/weight_string_A1A1.inc 184--source include/weight_string_8140.inc 185--source include/weight_string_8EA1.inc 186 187--echo # 188--echo # End of 5.6 tests 189--echo # 190 191--echo # 192--echo # Start of 10.0 tests 193--echo # 194 195let $ctype_unescape_combinations=selected; 196--source include/ctype_unescape.inc 197 198--character_set gbk 199SET NAMES gbk; 200--source include/ctype_E05C.inc 201 202SET NAMES utf8, character_set_connection=gbk; 203--source include/ctype_mdev13118.inc 204 205--echo # 206--echo # MDEV-9886 Illegal mix of collations with a view comparing a field to a binary constant 207--echo # 208 209SET NAMES latin1; 210CREATE TABLE t1 (a TEXT CHARACTER SET gbk); 211INSERT INTO t1 VALUES (0xEE5D); 212SELECT a<>0xEE5D AS a FROM t1; 213CREATE VIEW v1 AS SELECT a<>0xEE5D AS a FROM t1; 214SHOW CREATE VIEW v1; 215SELECT * FROM v1; 216DROP VIEW v1; 217DROP TABLE t1; 218 219 220--echo # 221--echo # End of 10.0 tests 222--echo # 223 224 225--echo # 226--echo # Start of 10.1 tests 227--echo # 228 229--echo # 230--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion 231--echo # 232 233CREATE TABLE t1 ( 234 id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 235 b VARBINARY(16), 236 type SET('ascii','bad','head','tail','mb2','unassigned') 237); 238INSERT INTO t1 (b, type) VALUES (0x40, 'ascii,tail'); 239INSERT INTO t1 (b, type) VALUES (0x80, 'tail'); 240INSERT INTO t1 (b, type) VALUES (0x81, 'head,tail'); 241INSERT INTO t1 (b, type) VALUES (0xFF, 'bad'); 242INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned'); 243INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2'); 244INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2'); 245CREATE TABLE t2 AS SELECT 246 CONCAT(t1.b,t2.b) AS b, 247 t1.type AS type1, 248 t2.type AS type2, 249 CONCAT('[',t1.type,'][',t2.type,']') AS comment 250FROM t1, t1 t2; 251 252CREATE TABLE t3 253( 254 b VARBINARY(16), 255 c VARCHAR(16) CHARACTER SET gbk, 256 comment VARCHAR(128) 257); 258--echo # 259--echo # A combination of two valid characters, should give no warnings 260--echo # 261INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 262WHERE 263 (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND 264 (FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) 265ORDER BY b; 266SELECT COUNT(*) FROM t3; 267SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 268SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 269DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 270DELETE FROM t3; 271 272--echo # 273--echo # Sequences that start with a tail or a bad byte, 274--echo # or end with a bad byte, all should be fixed. 275--echo # 276INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 277WHERE type1='tail' OR type1='bad' OR type2='bad' 278ORDER BY b; 279SELECT COUNT(*) FROM t3; 280SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 281SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 282DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 283DELETE FROM t3; 284 285--echo # 286--echo # Sequences that start with an ASCII or an MB2 character, 287--echo # followed by a non-ASCII tail, all should be fixed. 288--echo # 289INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 290WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) 291 AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2)) 292ORDER BY b; 293SELECT COUNT(*) FROM t3; 294SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 295SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 296DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 297DELETE FROM t3; 298 299--echo # 300--echo # Other sequences 301--echo # 302INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b; 303SELECT COUNT(*) FROM t3; 304SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 305SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 306DELETE FROM t3; 307DROP TABLE t3; 308DROP TABLE t2; 309 310CREATE TABLE t2 AS SELECT 311 CONCAT(t1.b,t2.b,t3.b) AS b, 312 t1.type AS type1, 313 t2.type AS type2, 314 t3.type AS type3, 315 CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment 316FROM t1, t1 t2,t1 t3; 317SELECT COUNT(*) FROM t2; 318 319CREATE TABLE t3 320( 321 b VARBINARY(16), 322 c VARCHAR(16) CHARACTER SET gbk, 323 comment VARCHAR(128) 324); 325 326--echo # 327--echo # A combination of three valid characters, should give no warnings 328--echo # 329INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 330WHERE 331 (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND 332 (FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND 333 (FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3)) 334ORDER BY b; 335SELECT COUNT(*) FROM t3; 336SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 337SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 338DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 339DELETE FROM t3; 340 341--echo # 342--echo # Sequences that start with a tail or a bad byte, 343--echo # or have a bad byte, all should be fixed. 344--echo # 345INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 346WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad' 347ORDER BY b; 348SELECT COUNT(*) FROM t3; 349SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 350SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 351DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 352DELETE FROM t3; 353 354--echo # 355--echo # Sequences that start with an ASCII or an MB2 character, 356--echo # followed by a pure non-ASCII tail, all should be fixed. 357--echo # 358INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 359WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) 360 AND type2='tail' 361ORDER BY b; 362SELECT COUNT(*) FROM t3; 363SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 364SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 365DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 366DELETE FROM t3; 367 368--echo # 369--echo # Sequences that consist of two ASCII or MB2 characters, 370--echo # followed by a pure non-ASCII tail, all should be fixed. 371--echo # 372INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 373WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND 374 (FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND 375 type3='tail' 376ORDER BY b; 377SELECT COUNT(*) FROM t3; 378SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 379SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 380DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 381DELETE FROM t3; 382 383 384--echo # 385--echo # Sequences that consist of two MB2 characters, 386--echo # followed by a non-ASCII head or tail, all should be fixed. 387--echo # 388INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 389WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2) 390 AND NOT FIND_IN_SET('ascii',type3) 391 AND NOT FIND_IN_SET('mb2',type3) 392ORDER BY b; 393SELECT COUNT(*) FROM t3; 394SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 395SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 396DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 397DELETE FROM t3; 398 399 400--echo # 401--echo # Sequences that consist of head + tail + MB2 should go without warnings 402--echo # 403INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 404WHERE FIND_IN_SET('head',type1) 405 AND FIND_IN_SET('tail',type2) 406 AND FIND_IN_SET('mb2',type3) 407ORDER BY b; 408SELECT COUNT(*) FROM t3; 409SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 410SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 411DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 412DELETE FROM t3; 413 414--echo # 415--echo # Sequences that consist of (ascii or mb2) + head + tail should go without warnings 416--echo # 417INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 418WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) 419 AND FIND_IN_SET('head',type2) 420 AND FIND_IN_SET('tail',type3) 421ORDER BY b; 422SELECT COUNT(*) FROM t3; 423SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 424SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 425DELETE FROM t2 WHERE b IN (SELECT b FROM t3); 426DELETE FROM t3; 427 428 429#--echo # 430#--echo # Other sequences 431#--echo # 432INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b; 433SELECT COUNT(*) FROM t3; 434SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b; 435SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b; 436 437DROP TABLE t3; 438DROP TABLE t2; 439DROP TABLE t1; 440 441--echo # 442--echo # END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion 443--echo # 444 445--echo # 446--echo # MDEV-7661 Unexpected result for: CAST(0xHHHH AS CHAR CHARACTER SET xxx) for incorrect byte sequences 447--echo # 448set sql_mode=''; 449SELECT HEX(CAST(0xA341 AS CHAR CHARACTER SET gb2312)); 450SELECT HEX(CONVERT(CAST(0xA341 AS CHAR CHARACTER SET gb2312) USING utf8)); 451set sql_mode=default; 452 453 454--echo # 455--echo # End of 10.1 tests 456--echo # 457 458--echo # 459--echo # Start of 10.2 tests 460--echo # 461 462--echo # 463--echo # MDEV-9811 LOAD DATA INFILE does not work well with gbk in some cases 464--echo # 465CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET gbk); 466LOAD DATA INFILE '../../std_data/loaddata/mdev8711.txt' INTO TABLE t1 CHARACTER SET gbk LINES TERMINATED BY '@'; 467SELECT HEX(a) FROM t1; 468DELETE FROM t1; 469LOAD DATA INFILE '../../std_data/loaddata/mdev8711.txt' INTO TABLE t1 CHARACTER SET gbk LINES TERMINATED BY '@' IGNORE 1 LINES; 470SELECT HEX(a) FROM t1; 471DROP TABLE t1; 472--echo # 473--echo # MDEV-9711 NO PAD Collatons 474--echo # 475SET character_set_connection=gbk; 476let $coll='gbk_chinese_nopad_ci'; 477let $coll_pad='gbk_chinese_ci'; 478--source include/ctype_pad_all_engines.inc 479 480let $coll='gbk_nopad_bin'; 481let $coll_pad='gbk_bin'; 482--source include/ctype_pad_all_engines.inc 483 484--echo # 485--echo # End of 10.2 tests 486--echo # 487 488 489--echo # 490--echo # Start of 10.5 tests 491--echo # 492 493--echo # 494--echo # MDEV-22625 SIGSEGV in intern_find_sys_var (optimized builds) 495--echo # 496 497SET NAMES gbk; 498SET @seq=_gbk 0xAAA1; 499--source include/ctype_ident_sys.inc 500 501--echo # 502--echo # End of 10.5 tests 503--echo # 504