1# 2# Compare a field to an utf8 string literal with illegal byte sequences 3# 4 5--echo # 6--echo # Start of ctype_utf8_ilseq.inc 7--echo # 8 9--eval CREATE TABLE t1 ENGINE=$ENGINE AS SELECT REPEAT(' ', 60) AS ch LIMIT 0; 10ALTER TABLE t1 11 ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 12 ADD KEY(ch); 13SHOW CREATE TABLE t1; 14 15INSERT INTO t1 (ch) VALUES ('admin'),('admin1'); 16SELECT ch FROM t1 WHERE ch='admin'; 17EXPLAIN SELECT ch FROM t1 WHERE ch='admin'; 18SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin'; 19SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin'; 20DELETE FROM t1; 21INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????'); 22INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b'); 23INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z'); 24INSERT INTO t1 (ch) VALUES ('z'); 25# LATIN SMALL LETTER A + LATIN CAPITAL LETTER E WITH GRAVE 26INSERT INTO t1 (ch) VALUES (_utf8 0x61D080); 27# LATIN SMALL LETTER A + ARMENIAN SMALL LETTER REH 28INSERT INTO t1 (ch) VALUES (_utf8 0x61D680); 29 30SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a' ORDER BY ch; 31SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='ab' ORDER BY ch; 32--replace_column 9 # 33EXPLAIN 34SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a' ORDER BY ch; 35SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a' ORDER BY ch; 36--replace_column 9 # 37EXPLAIN 38SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='ab' ORDER BY ch; 39SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='ab' ORDER BY ch; 40 41SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a' ORDER BY ch; 42SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'ab' ORDER BY ch; 43--replace_column 9 # 44EXPLAIN 45SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a' ORDER BY ch; 46SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a' ORDER BY ch; 47--replace_column 9 # 48EXPLAIN 49SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'ab' ORDER BY ch; 50SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'ab' ORDER BY ch; 51 52SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a' ORDER BY ch; 53SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'ab' ORDER BY ch; 54--replace_column 9 # 55EXPLAIN 56SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a' ORDER BY ch; 57SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a' ORDER BY ch; 58--replace_column 9 # 59EXPLAIN 60SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'ab' ORDER BY ch; 61SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'ab' ORDER BY ch; 62 63ALTER TABLE t1 DROP KEY ch; 64 65--echo # 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence 66SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,''''); 67PREPARE stmt FROM @query; 68EXECUTE stmt; 69SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b'''); 70PREPARE stmt FROM @query; 71EXECUTE stmt; 72 73# 74# Non-equality comparison currently work differently depending on collation: 75# 76# - utf8_general_ci falls back to memcmp() on bad byte 77# - utf8_unicode_ci treats bad bytes greater than any valid character 78# 79# For example, these two characters: 80# _utf8 0xD080 (U+00C8 LATIN CAPITAL LETTER E WITH GRAVE) 81# _utf8 0xD680 (U+0580 ARMENIAN SMALL LETTER REH) 82# 83# will give different results (depending on collation) when compared 84# to an incomplete byte sequence 0xD1 (mb2head not followed by mb2tail). 85# 86# For utf8_general_ci the result depends on the valid side: 87# - 0xD080 is smaller than 0xD1, because 0xD0 < 0xD1 88# - 0xD680 is greater than 0xD1, because 0xD6 > 0xD1 89# 90# For utf8_unicode_ci the result does not depend on the valid side: 91# - 0xD080 is smaller than 0xD1, because 0xD1 is greater than any valid character 92# - 0xD680 is smaller than 0xD1, because 0xD1 is greater than any valid character 93# 94# utf8_general_ci should be eventually fixed to treat bad bytes greater 95# than any valid character, similar to utf8_unicode_ci. 96# 97 98SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch'); 99PREPARE stmt FROM @query; 100EXECUTE stmt; 101SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch'); 102PREPARE stmt FROM @query; 103EXECUTE stmt; 104 105--echo # 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence 106SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch'); 107PREPARE stmt FROM @query; 108EXECUTE stmt; 109SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch'); 110PREPARE stmt FROM @query; 111EXECUTE stmt; 112 113--echo # 0x8F is a bad byte sequence (an mb2tail without mb2head) 114SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch'); 115PREPARE stmt FROM @query; 116EXECUTE stmt; 117SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch'); 118PREPARE stmt FROM @query; 119EXECUTE stmt; 120 121--echo # 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times) 122SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch'); 123PREPARE stmt FROM @query; 124EXECUTE stmt; 125SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch'); 126PREPARE stmt FROM @query; 127EXECUTE stmt; 128 129DROP TABLE t1; 130 131--echo # 132--echo # End of ctype_utf8_ilseq.inc 133--echo # 134