1-- source include/have_gbk.inc
2
3#
4# Tests with the gbk character set
5#
6--disable_warnings
7drop table if exists t1;
8--enable_warnings
9
10SET @test_character_set= 'gbk';
11SET @test_collation= 'gbk_chinese_ci';
12-- source include/ctype_common.inc
13
14SET NAMES gbk;
15SET collation_connection='gbk_chinese_ci';
16-- source include/ctype_filesort.inc
17-- source include/ctype_innodb_like.inc
18-- source include/ctype_like_escape.inc
19-- source include/ctype_like_range_f1f2.inc
20-- source include/ctype_ascii_order.inc
21SET collation_connection='gbk_bin';
22-- source include/ctype_filesort.inc
23-- source include/ctype_innodb_like.inc
24-- source include/ctype_like_escape.inc
25-- source include/ctype_like_range_f1f2.inc
26
27#
28# Bug#11987 mysql will truncate the text when
29# the text contain GBK char:"0xA3A0" and "0xA1"
30#
31SET NAMES gbk;
32CREATE TABLE t1 (a text) character set gbk;
33INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
34SELECT hex(a) FROM t1 ORDER BY a;
35DROP TABLE t1;
36
37#
38# Bugs#15375: Unassigned multibyte codes are broken
39# into parts when converting to Unicode.
40# This query should return 0x003F0041. I.e. it should
41# scan unassigned double-byte character 0xA140, convert
42# it as QUESTION MARK 0x003F and then scan the next
43# character, which is a single byte character 0x41.
44#
45select hex(convert(_gbk 0xA14041 using ucs2));
46
47# End of 4.1 tests
48
49#
50# Bug#21620 ALTER TABLE affects other columns
51#
52create table t1 (c1 text not null, c2 text not null) character set gbk;
53alter table t1 change c1 c1 mediumtext  character set gbk not null;
54show create table t1;
55drop table t1;
56
57#
58# Bug#35993: severe memory corruption and crash with multibyte conversion
59#
60
61CREATE TABLE t1(a MEDIUMTEXT CHARACTER SET gbk,
62                b MEDIUMTEXT CHARACTER SET big5);
63INSERT INTO t1 VALUES
64  (REPEAT(0x1125,200000), REPEAT(0x1125,200000)), ('', ''), ('', '');
65
66SELECT a FROM t1 GROUP BY 1 LIMIT 1 INTO @nullll;
67SELECT b FROM t1 GROUP BY 1 LIMIT 1 INTO @nullll;
68
69DROP TABLES t1;
70
71--echo End of 5.0 tests
72
73
74--echo #
75--echo # Start of 5.5 tests
76--echo #
77
78--echo #
79--echo # Testing WL#4583 Case conversion in Asian character sets
80--echo #
81#
82# Populate t1 with all hex digits
83#
84SET NAMES utf8;
85SET collation_connection=gbk_chinese_ci;
86CREATE TABLE t1 (b VARCHAR(2));
87INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
88INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
89#
90# Populate tables head and tail with values '00'-'FF'
91#
92CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
93CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
94DROP TABLE t1;
95#
96# Populate table t1 with all codes [80..FF][20..FF]
97# Expected valid gbk codes [81..FE][40..7E,80..FE]
98#
99CREATE TABLE t1 AS
100SELECT concat(head, tail) AS code, ' ' AS a
101FROM head, tail
102WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
103ORDER BY head, tail;
104DROP TEMPORARY TABLE head, tail;
105SHOW CREATE TABLE t1;
106UPDATE IGNORE t1 SET a=unhex(code) ORDER BY code;
107SELECT COUNT(*) FROM t1 WHERE a<>'?';
108#
109# Display all characters that have upper or lower case mapping.
110#
111SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
112#
113# Make sure all possible conversion happened
114#
115# Expect U+216A to U+216B ROMAN NUMERAL ELEVEN to ROMAN TWELVE
116#
117SELECT * FROM t1
118WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
119      HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
120#
121# Expect
122#       U+00E0 LATIN SMALL LETTER A WITH GRAVE
123#       U+00E1 LATIN SMALL LETTER A WITH ACUTE
124#       U+00E8 LATIN SMALL LETTER E WITH GRAVE
125#       U+00E9 LATIN SMALL LETTER E WITH ACUTE
126#       U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
127#       U+00EC LATIN SMALL LETTER I WITH GRAVE
128#       U+00ED LATIN SMALL LETTER I WITH ACUTE
129#       U+00F2 LATIN SMALL LETTER O WITH GRAVE
130#       U+00F3 LATIN SMALL LETTER O WITH ACUTE
131#       U+00F9 LATIN SMALL LETTER U WITH GRAVE
132#       U+00FA LATIN SMALL LETTER U WITH ACUTE
133#       U+00FC LATIN SMALL LETTER U WITH DIAERESIS
134#       U+0101 LATIN SMALL LETTER A WITH MACRON
135#       U+0113 LATIN SMALL LETTER E WITH MACRON
136#       U+011B LATIN SMALL LETTER E WITH CARON
137#       U+012B LATIN SMALL LETTER I WITH MACRON
138#       U+0144 LATIN SMALL LETTER N WITH ACUTE
139#       U+0148 LATIN SMALL LETTER N WITH CARON
140#       U+014D LATIN SMALL LETTER O WITH MACRON
141#       U+016B LATIN SMALL LETTER U WITH MACRON
142#       U+01CE LATIN SMALL LETTER A WITH CARON
143#       U+01D0 LATIN SMALL LETTER I WITH CARON
144#       U+01D2 LATIN SMALL LETTER O WITH CARON
145#       U+01D4 LATIN SMALL LETTER U WITH CARON
146#       U+01D6 LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
147#       U+01D8 LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
148#       U+01DA LATIN SMALL LETTER U WITH DIAERESIS AND CARON
149#       U+01DC LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
150#
151SELECT * FROM t1
152WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
153      HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
154
155DROP TABLE t1;
156
157
158
159
160--echo #
161--echo # End of 5.5 tests
162--echo #
163
164
165--echo #
166--echo # Start of 5.6 tests
167--echo #
168
169--echo #
170--echo # WL#3664 WEIGHT_STRING
171--echo #
172
173set names gbk;
174--source include/weight_string.inc
175--source include/weight_string_l1.inc
176--source include/weight_string_A1A1.inc
177--source include/weight_string_8140.inc
178--source include/weight_string_8EA1.inc
179
180set collation_connection=gbk_bin;
181--source include/weight_string.inc
182--source include/weight_string_l1.inc
183--source include/weight_string_A1A1.inc
184--source include/weight_string_8140.inc
185--source include/weight_string_8EA1.inc
186
187--echo #
188--echo # End of 5.6 tests
189--echo #
190
191--echo #
192--echo # Start of 10.0 tests
193--echo #
194
195let $ctype_unescape_combinations=selected;
196--source include/ctype_unescape.inc
197
198--character_set gbk
199SET NAMES gbk;
200--source include/ctype_E05C.inc
201
202SET NAMES utf8, character_set_connection=gbk;
203--source include/ctype_mdev13118.inc
204
205--echo #
206--echo # MDEV-9886 Illegal mix of collations with a view comparing a field to a binary constant
207--echo #
208
209SET NAMES latin1;
210CREATE TABLE t1 (a TEXT CHARACTER SET gbk);
211INSERT INTO t1 VALUES (0xEE5D);
212SELECT a<>0xEE5D AS a FROM t1;
213CREATE VIEW v1 AS SELECT a<>0xEE5D AS a FROM t1;
214SHOW CREATE VIEW v1;
215SELECT * FROM v1;
216DROP VIEW v1;
217DROP TABLE t1;
218
219
220--echo #
221--echo # End of 10.0 tests
222--echo #
223
224
225--echo #
226--echo # Start of 10.1 tests
227--echo #
228
229--echo #
230--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
231--echo #
232
233CREATE TABLE t1 (
234  id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
235  b VARBINARY(16),
236  type SET('ascii','bad','head','tail','mb2','unassigned')
237);
238INSERT INTO t1 (b, type) VALUES (0x40,   'ascii,tail');
239INSERT INTO t1 (b, type) VALUES (0x80,   'tail');
240INSERT INTO t1 (b, type) VALUES (0x81,   'head,tail');
241INSERT INTO t1 (b, type) VALUES (0xFF,   'bad');
242INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
243INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
244INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
245CREATE TABLE t2 AS SELECT
246  CONCAT(t1.b,t2.b) AS b,
247  t1.type AS type1,
248  t2.type AS type2,
249  CONCAT('[',t1.type,'][',t2.type,']') AS comment
250FROM t1, t1 t2;
251
252CREATE TABLE t3
253(
254  b VARBINARY(16),
255  c VARCHAR(16) CHARACTER SET gbk,
256  comment VARCHAR(128)
257);
258--echo #
259--echo # A combination of two valid characters, should give no warnings
260--echo #
261INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
262WHERE
263  (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
264  (FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
265ORDER BY b;
266SELECT COUNT(*) FROM t3;
267SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
268SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
269DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
270DELETE FROM t3;
271
272--echo #
273--echo # Sequences that start with a tail or a bad byte,
274--echo # or end with a bad byte, all should be fixed.
275--echo #
276INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
277WHERE type1='tail' OR type1='bad' OR type2='bad'
278ORDER BY b;
279SELECT COUNT(*) FROM t3;
280SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
281SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
282DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
283DELETE FROM t3;
284
285--echo #
286--echo # Sequences that start with an ASCII or an MB2 character,
287--echo # followed by a non-ASCII tail, all should be fixed.
288--echo #
289INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
290WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
291      AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
292ORDER BY b;
293SELECT COUNT(*) FROM t3;
294SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
295SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
296DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
297DELETE FROM t3;
298
299--echo #
300--echo # Other sequences
301--echo #
302INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
303SELECT COUNT(*) FROM t3;
304SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
305SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
306DELETE FROM t3;
307DROP TABLE t3;
308DROP TABLE t2;
309
310CREATE TABLE t2 AS SELECT
311  CONCAT(t1.b,t2.b,t3.b) AS b,
312  t1.type AS type1,
313  t2.type AS type2,
314  t3.type AS type3,
315  CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
316FROM t1, t1 t2,t1 t3;
317SELECT COUNT(*) FROM t2;
318
319CREATE TABLE t3
320(
321  b VARBINARY(16),
322  c VARCHAR(16) CHARACTER SET gbk,
323  comment VARCHAR(128)
324);
325
326--echo #
327--echo # A combination of three valid characters, should give no warnings
328--echo #
329INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
330WHERE
331  (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
332  (FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
333  (FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3))
334ORDER BY b;
335SELECT COUNT(*) FROM t3;
336SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
337SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
338DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
339DELETE FROM t3;
340
341--echo #
342--echo # Sequences that start with a tail or a bad byte,
343--echo # or have a bad byte, all should be fixed.
344--echo #
345INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
346WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
347ORDER BY b;
348SELECT COUNT(*) FROM t3;
349SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
350SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
351DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
352DELETE FROM t3;
353
354--echo #
355--echo # Sequences that start with an ASCII or an MB2 character,
356--echo # followed by a pure non-ASCII tail, all should be fixed.
357--echo #
358INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
359WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
360      AND type2='tail'
361ORDER BY b;
362SELECT COUNT(*) FROM t3;
363SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
364SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
365DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
366DELETE FROM t3;
367
368--echo #
369--echo # Sequences that consist of two ASCII or MB2 characters,
370--echo # followed by a pure non-ASCII tail, all should be fixed.
371--echo #
372INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
373WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
374      (FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
375      type3='tail'
376ORDER BY b;
377SELECT COUNT(*) FROM t3;
378SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
379SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
380DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
381DELETE FROM t3;
382
383
384--echo #
385--echo # Sequences that consist of two MB2 characters,
386--echo # followed by a non-ASCII head or tail, all should be fixed.
387--echo #
388INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
389WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
390      AND NOT FIND_IN_SET('ascii',type3)
391      AND NOT FIND_IN_SET('mb2',type3)
392ORDER BY b;
393SELECT COUNT(*) FROM t3;
394SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
395SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
396DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
397DELETE FROM t3;
398
399
400--echo #
401--echo # Sequences that consist of head + tail + MB2 should go without warnings
402--echo #
403INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
404WHERE FIND_IN_SET('head',type1)
405  AND FIND_IN_SET('tail',type2)
406  AND FIND_IN_SET('mb2',type3)
407ORDER BY b;
408SELECT COUNT(*) FROM t3;
409SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
410SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
411DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
412DELETE FROM t3;
413
414--echo #
415--echo # Sequences that consist of (ascii or mb2) + head + tail should go without warnings
416--echo #
417INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
418WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
419  AND FIND_IN_SET('head',type2)
420  AND FIND_IN_SET('tail',type3)
421ORDER BY b;
422SELECT COUNT(*) FROM t3;
423SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
424SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
425DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
426DELETE FROM t3;
427
428
429#--echo #
430#--echo # Other sequences
431#--echo #
432INSERT IGNORE INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
433SELECT COUNT(*) FROM t3;
434SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
435SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
436
437DROP TABLE t3;
438DROP TABLE t2;
439DROP TABLE t1;
440
441--echo #
442--echo # END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
443--echo #
444
445--echo #
446--echo # MDEV-7661 Unexpected result for: CAST(0xHHHH AS CHAR CHARACTER SET xxx) for incorrect byte sequences
447--echo #
448set sql_mode='';
449SELECT HEX(CAST(0xA341 AS CHAR CHARACTER SET gb2312));
450SELECT HEX(CONVERT(CAST(0xA341 AS CHAR CHARACTER SET gb2312) USING utf8));
451set sql_mode=default;
452
453
454--echo #
455--echo # End of 10.1 tests
456--echo #
457
458--echo #
459--echo # Start of 10.2 tests
460--echo #
461
462--echo #
463--echo # MDEV-9811 LOAD DATA INFILE does not work well with gbk in some cases
464--echo #
465CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET gbk);
466LOAD DATA INFILE '../../std_data/loaddata/mdev8711.txt' INTO TABLE t1 CHARACTER SET gbk LINES TERMINATED BY '@';
467SELECT HEX(a) FROM t1;
468DELETE FROM t1;
469LOAD DATA INFILE '../../std_data/loaddata/mdev8711.txt' INTO TABLE t1 CHARACTER SET gbk LINES TERMINATED BY '@' IGNORE 1 LINES;
470SELECT HEX(a) FROM t1;
471DROP TABLE t1;
472--echo #
473--echo # MDEV-9711 NO PAD Collatons
474--echo #
475SET character_set_connection=gbk;
476let $coll='gbk_chinese_nopad_ci';
477let $coll_pad='gbk_chinese_ci';
478--source include/ctype_pad_all_engines.inc
479
480let $coll='gbk_nopad_bin';
481let $coll_pad='gbk_bin';
482--source include/ctype_pad_all_engines.inc
483
484--echo #
485--echo # End of 10.2 tests
486--echo #
487
488
489--echo #
490--echo # Start of 10.5 tests
491--echo #
492
493--echo #
494--echo # MDEV-22625 SIGSEGV in intern_find_sys_var (optimized builds)
495--echo #
496
497SET NAMES gbk;
498SET @seq=_gbk 0xAAA1;
499--source include/ctype_ident_sys.inc
500
501--echo #
502--echo # End of 10.5 tests
503--echo #
504