1# 2# WL#2673 Unicode collation algorithm new version 3# 4CREATE TABLE t1 AS SELECT repeat('a', 10) as c LIMIT 0; 5SHOW CREATE TABLE t1; 6 7# 8# Unicode-5.0.0 characters 9# 10 11# Latin Extended-B and IP extensions 12INSERT INTO t1 VALUES (_utf32 0x0180),(_utf32 0x023A); 13INSERT INTO t1 VALUES (_utf32 0x023B),(_utf32 0x023C); 14INSERT INTO t1 VALUES (_utf32 0x023D),(_utf32 0x023E); 15INSERT INTO t1 VALUES (_utf32 0x0241),(_utf32 0x0242); 16INSERT INTO t1 VALUES (_utf32 0x0243),(_utf32 0x0244); 17INSERT INTO t1 VALUES (_utf32 0x0245),(_utf32 0x0246); 18INSERT INTO t1 VALUES (_utf32 0x0247),(_utf32 0x0248); 19INSERT INTO t1 VALUES (_utf32 0x0249),(_utf32 0x024A); 20INSERT INTO t1 VALUES (_utf32 0x024B),(_utf32 0x024C); 21INSERT INTO t1 VALUES (_utf32 0x024D),(_utf32 0x024E); 22INSERT INTO t1 VALUES (_utf32 0x024F),(_utf32 0x026B); 23INSERT INTO t1 VALUES (_utf32 0x027D),(_utf32 0x0289); 24INSERT INTO t1 VALUES (_utf32 0x028C); 25 26# Greek and Coptic 27INSERT INTO t1 VALUES (_utf32 0x037B), (_utf32 0x037C); 28INSERT INTO t1 VALUES (_utf32 0x037D), (_utf32 0x03FD); 29INSERT INTO t1 VALUES (_utf32 0x03FE), (_utf32 0x03FF); 30 31# Cyrillic 32INSERT INTO t1 VALUES (_utf32 0x04C0), (_utf32 0x04CF); 33INSERT INTO t1 VALUES (_utf32 0x04F6), (_utf32 0x04F7); 34INSERT INTO t1 VALUES (_utf32 0x04FA), (_utf32 0x04FB); 35INSERT INTO t1 VALUES (_utf32 0x04FC), (_utf32 0x04FD); 36INSERT INTO t1 VALUES (_utf32 0x04FE), (_utf32 0x04FF); 37INSERT INTO t1 VALUES (_utf32 0x0510), (_utf32 0x0511); 38INSERT INTO t1 VALUES (_utf32 0x0512), (_utf32 0x0513); 39 40# Georgian, Georgian Supplement 41INSERT INTO t1 VALUES (_utf32 0x10A0), (_utf32 0x10A1); 42INSERT INTO t1 VALUES (_utf32 0x10A2), (_utf32 0x10A3); 43INSERT INTO t1 VALUES (_utf32 0x10A4), (_utf32 0x10A5); 44INSERT INTO t1 VALUES (_utf32 0x10A6), (_utf32 0x10A7); 45INSERT INTO t1 VALUES (_utf32 0x2D00), (_utf32 0x2D01); 46INSERT INTO t1 VALUES (_utf32 0x2D02), (_utf32 0x2D03); 47INSERT INTO t1 VALUES (_utf32 0x2D04), (_utf32 0x2D05); 48INSERT INTO t1 VALUES (_utf32 0x2D06), (_utf32 0x2D07); 49 50# Phonetic Extensions 51INSERT INTO t1 VALUES (_utf32 0x1D7D); 52 53# Letterlike Symbols 54INSERT INTO t1 VALUES (_utf32 0x2132),(_utf32 0x214E); 55 56# Number Forms 57INSERT INTO t1 VALUES (_utf32 0x2183),(_utf32 0x2184); 58 59# Coptic 60INSERT INTO t1 VALUES (_utf32 0x2C80), (_utf32 0x2C81); 61INSERT INTO t1 VALUES (_utf32 0x2C82), (_utf32 0x2C83); 62INSERT INTO t1 VALUES (_utf32 0x2C84), (_utf32 0x2C85); 63INSERT INTO t1 VALUES (_utf32 0x2C86), (_utf32 0x2C87); 64INSERT INTO t1 VALUES (_utf32 0x2C88), (_utf32 0x2C89); 65INSERT INTO t1 VALUES (_utf32 0x2C8A), (_utf32 0x2C8B); 66INSERT INTO t1 VALUES (_utf32 0x2C8C), (_utf32 0x2C8D); 67INSERT INTO t1 VALUES (_utf32 0x2C8E), (_utf32 0x2C8F); 68 69# Latin Extended-C 70INSERT INTO t1 VALUES (_utf32 0x2C60), (_utf32 0x2C61); 71INSERT INTO t1 VALUES (_utf32 0x2C62), (_utf32 0x2C63); 72INSERT INTO t1 VALUES (_utf32 0x2C64), (_utf32 0x2C65); 73INSERT INTO t1 VALUES (_utf32 0x2C66), (_utf32 0x2C67); 74INSERT INTO t1 VALUES (_utf32 0x2C68), (_utf32 0x2C69); 75INSERT INTO t1 VALUES (_utf32 0x2C6A), (_utf32 0x2C6B); 76INSERT INTO t1 VALUES (_utf32 0x2C6C), (_utf32 0x2C75); 77INSERT INTO t1 VALUES (_utf32 0x2C76); 78 79# Glagolitic 80INSERT INTO t1 VALUES (_utf32 0x2C00), (_utf32 0x2C01); 81INSERT INTO t1 VALUES (_utf32 0x2C02), (_utf32 0x2C03); 82INSERT INTO t1 VALUES (_utf32 0x2C04), (_utf32 0x2C05); 83INSERT INTO t1 VALUES (_utf32 0x2C06), (_utf32 0x2C07); 84INSERT INTO t1 VALUES (_utf32 0x2C30), (_utf32 0x2C31); 85INSERT INTO t1 VALUES (_utf32 0x2C32), (_utf32 0x2C33); 86INSERT INTO t1 VALUES (_utf32 0x2C34), (_utf32 0x2C35); 87INSERT INTO t1 VALUES (_utf32 0x2C36), (_utf32 0x2C37); 88 89# Deseret 90INSERT INTO t1 VALUES (_utf32 0x10400), (_utf32 0x10401); 91INSERT INTO t1 VALUES (_utf32 0x10402), (_utf32 0x10403); 92INSERT INTO t1 VALUES (_utf32 0x10404), (_utf32 0x10405); 93INSERT INTO t1 VALUES (_utf32 0x10406), (_utf32 0x10407); 94INSERT INTO t1 VALUES (_utf32 0x10428), (_utf32 0x10429); 95INSERT INTO t1 VALUES (_utf32 0x1042A), (_utf32 0x1042B); 96INSERT INTO t1 VALUES (_utf32 0x1042C), (_utf32 0x1042D); 97INSERT INTO t1 VALUES (_utf32 0x1042E), (_utf32 0x1042F); 98 99 100# 101# Unicode 5.1.0 characters 102# 103 104INSERT INTO t1 VALUES (_utf32 0x0370); # GREEK CAPITAL LETTER HETA 105INSERT INTO t1 VALUES (_utf32 0x0371); # GREEK SMALL LETTER HETA 106INSERT INTO t1 VALUES (_utf32 0x0372); # GREEK CAPITAL LETTER ARCHAIC SAMPI 107INSERT INTO t1 VALUES (_utf32 0x0373); # GREEK SMALL LETTER ARCHAIC SAMPI 108 109INSERT INTO t1 VALUES (_utf32 0x0514); # CYRILLIC CAPITAL LETTER LHA 110INSERT INTO t1 VALUES (_utf32 0x0515); # CYRILLIC SMALL LETTER LHA 111INSERT INTO t1 VALUES (_utf32 0x0516); # CYRILLIC CAPITAL LETTER RHA 112INSERT INTO t1 VALUES (_utf32 0x0517); # CYRILLIC SMALL LETTER RHA 113 114INSERT INTO t1 VALUES (_utf32 0xA640); # CYRILLIC CAPITAL LETTER ZEMLYA 115INSERT INTO t1 VALUES (_utf32 0xA641); # CYRILLIC SMALL LETTER ZEMLYA 116INSERT INTO t1 VALUES (_utf32 0xA642); # CYRILLIC CAPITAL LETTER DZELO 117INSERT INTO t1 VALUES (_utf32 0xA643); # CYRILLIC SMALL LETTER DZELO 118 119INSERT INTO t1 VALUES (_utf32 0xA722); # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF 120INSERT INTO t1 VALUES (_utf32 0xA723); # LATIN SMALL LETTER EGYPTOLOGICAL ALEF 121INSERT INTO t1 VALUES (_utf32 0xA724); # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN 122INSERT INTO t1 VALUES (_utf32 0xA725); # LATIN SMALL LETTER EGYPTOLOGICAL AIN 123 124INSERT INTO t1 VALUES (_utf32 0xA726); # LATIN CAPITAL LETTER HENG 125INSERT INTO t1 VALUES (_utf32 0xA727); # LATIN SMALL LETTER HENG 126INSERT INTO t1 VALUES (_utf32 0xA728); # LATIN CAPITAL LETTER TZ 127INSERT INTO t1 VALUES (_utf32 0xA729); # LATIN SMALL LETTER TZ 128INSERT INTO t1 VALUES (_utf32 0xA72A); # LATIN CAPITAL LETTER TRESILLO 129INSERT INTO t1 VALUES (_utf32 0xA72B); # LATIN SMALL LETTER TRESILLO 130 131# 132# Unicode 5.2.0 characters 133# 134 135INSERT INTO t1 VALUES (_utf32 0x2CEB); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI;Lu;0;L;;;;;N;;;;2CEC; 136INSERT INTO t1 VALUES (_utf32 0x2CEC); # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI;Ll;0;L;;;;;N;;;2CEB;;2CEB 137INSERT INTO t1 VALUES (_utf32 0x2CED); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA;Lu;0;L;;;;;N;;;;2CEE; 138INSERT INTO t1 VALUES (_utf32 0x2CEE); # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA;Ll;0;L;;;;;N;;;2CED;;2CED 139 140# 141# Check case folding and UCA weights 142# 143SELECT hex(c), hex(lower(c)), hex(upper(c)), hex(weight_string(c)), c 144FROM t1 ORDER BY c, BINARY c; 145 146 147# 148# Check that LIKE works fine with and without index. 149# This test makes sure that cs->min_sort_char and cs->max_sort_char 150# are set properly 151# Also check that LIKE is case insensitive for supplementary characters 152# 153INSERT INTO t1 VALUES ('a'); 154INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0xFFFF)); 155INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10FFFF)); 156INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10400)); 157SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c; 158SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c; 159SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c; 160ALTER TABLE t1 ADD KEY(c); 161EXPLAIN SELECT hex(c) FROM t1 WHERE c LIKE 'a%' ORDER BY c; 162SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c; 163SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c; 164SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c; 165 166DROP TABLE t1; 167