1#
2# Compare a field to an utf8 string literal with illegal byte sequences
3#
4
5--echo #
6--echo # Start of ctype_utf8_ilseq.inc
7--echo #
8
9--eval CREATE TABLE t1 ENGINE=$ENGINE AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;
10ALTER TABLE t1
11  ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
12  ADD KEY(ch);
13SHOW CREATE TABLE t1;
14
15INSERT INTO t1 (ch) VALUES ('admin'),('admin1');
16SELECT ch FROM t1 WHERE ch='admin��';
17EXPLAIN SELECT ch FROM t1 WHERE ch='admin��';
18SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin��';
19SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin��';
20DELETE FROM t1;
21INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
22INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
23INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
24INSERT INTO t1 (ch) VALUES ('z');
25# LATIN SMALL LETTER A + LATIN CAPITAL LETTER E WITH GRAVE
26INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
27# LATIN SMALL LETTER A + ARMENIAN SMALL LETTER REH
28INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
29
30SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a��' ORDER BY ch;
31SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a��b' ORDER BY ch;
32--replace_column 9 #
33EXPLAIN
34SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a��' ORDER BY ch;
35SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a��' ORDER BY ch;
36--replace_column 9 #
37EXPLAIN
38SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a��b' ORDER BY ch;
39SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a��b' ORDER BY ch;
40
41SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a��' ORDER BY ch;
42SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a��b' ORDER BY ch;
43--replace_column 9 #
44EXPLAIN
45SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a��' ORDER BY ch;
46SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a��' ORDER BY ch;
47--replace_column 9 #
48EXPLAIN
49SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a��b' ORDER BY ch;
50SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a��b' ORDER BY ch;
51
52SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a��' ORDER BY ch;
53SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a��b' ORDER BY ch;
54--replace_column 9 #
55EXPLAIN
56SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a��' ORDER BY ch;
57SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a��' ORDER BY ch;
58--replace_column 9 #
59EXPLAIN
60SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a��b' ORDER BY ch;
61SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a��b' ORDER BY ch;
62
63ALTER TABLE t1 DROP KEY ch;
64
65--echo # 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
66SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
67PREPARE stmt FROM @query;
68EXECUTE stmt;
69SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
70PREPARE stmt FROM @query;
71EXECUTE stmt;
72
73#
74# Non-equality comparison currently work differently depending on collation:
75#
76# - utf8_general_ci falls back to memcmp() on bad byte
77# - utf8_unicode_ci treats bad bytes greater than any valid character
78#
79#  For example, these two characters:
80#    _utf8 0xD080 (U+00C8 LATIN CAPITAL LETTER E WITH GRAVE)
81#    _utf8 0xD680 (U+0580 ARMENIAN SMALL LETTER REH)
82#
83#  will give different results (depending on collation) when compared
84#  to an incomplete byte sequence 0xD1 (mb2head not followed by mb2tail).
85#
86# For utf8_general_ci the result depends on the valid side:
87# - 0xD080 is smaller than 0xD1, because 0xD0 < 0xD1
88# - 0xD680 is greater than 0xD1, because 0xD6 > 0xD1
89#
90# For utf8_unicode_ci the result does not depend on the valid side:
91# - 0xD080 is smaller than 0xD1, because 0xD1 is greater than any valid character
92# - 0xD680 is smaller than 0xD1, because 0xD1 is greater than any valid character
93#
94# utf8_general_ci should be eventually fixed to treat bad bytes greater
95# than any valid character, similar to utf8_unicode_ci.
96#
97
98SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch');
99PREPARE stmt FROM @query;
100EXECUTE stmt;
101SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch');
102PREPARE stmt FROM @query;
103EXECUTE stmt;
104
105--echo # 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
106SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch');
107PREPARE stmt FROM @query;
108EXECUTE stmt;
109SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch');
110PREPARE stmt FROM @query;
111EXECUTE stmt;
112
113--echo # 0x8F is a bad byte sequence (an mb2tail without mb2head)
114SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch');
115PREPARE stmt FROM @query;
116EXECUTE stmt;
117SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch');
118PREPARE stmt FROM @query;
119EXECUTE stmt;
120
121--echo # 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
122SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch');
123PREPARE stmt FROM @query;
124EXECUTE stmt;
125SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch');
126PREPARE stmt FROM @query;
127EXECUTE stmt;
128
129DROP TABLE t1;
130
131--echo #
132--echo # End of ctype_utf8_ilseq.inc
133--echo #
134