1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14use strict; 15use warnings; 16BEGIN { $| = 1; print "1..58\n"; } 17my $count = 0; 18sub ok ($;$) { 19 my $p = my $r = shift; 20 if (@_) { 21 my $x = shift; 22 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23 } 24 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25} 26 27use Unicode::Collate; 28 29ok(1); 30 31######################### 32 33# a standard collator (3.1.1) 34my $Collator = Unicode::Collate->new( 35 level => 1, 36 table => 'keys.txt', 37 normalization => undef, 38 39 entry => <<'ENTRIES', 40326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 41326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 423270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 433271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 443272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 453273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 463274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 473275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 483276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 493277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 503278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 513279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 52327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 53327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 54ENTRIES 55); 56 57my $hangul = Unicode::Collate->new( 58 level => 1, 59 table => 'keys.txt', 60 normalization => undef, 61 hangul_terminator => 16, 62 63 entry => <<'ENTRIES', 64326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 65326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 663270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 673271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 683272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 693273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 703274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 713275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 723276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 733277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 743278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 753279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 76327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 77327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 78ENTRIES 79); 80 81ok(ref $hangul, "Unicode::Collate"); 82 83######################### 84 85# LVX vs LVV: /GAA/ vs /GA/.latinA 86ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 87ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 88 89# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 90ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 91ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 92 93# LVX vs LVV: /GAA/ vs /GA/.hanja 94ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 95ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 96 97# LVL vs LVT: /GA/./G/ vs /GAG/ 98ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 99ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 100 101# LVT vs LVX: /GAG/ vs /GA/.latinA 102ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 103ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 104 105# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 106ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 107ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 108 109# LVT vs LVX: /GAG/ vs /GA/.hanja 110ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 111ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 112 113# LV vs Syl(LV): /GA/ vs /[GA]/ 114ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 115ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 116 117# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 118ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 119ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 120 121# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 122ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 123ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 124 125# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 126ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 127ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 128 129# Syl(LVT) vs : /GAG/ vs /[GAG]/ 130ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 131ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 132 133######################### 134 135my $hangcirc = Unicode::Collate->new( 136 level => 1, 137 table => 'keys.txt', 138 normalization => undef, 139 hangul_terminator => 16, 140 141 entry => <<'ENTRIES', 142326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA 143326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA 1443270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA 1453271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA 1463272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA 1473273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA 1483274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA 1493275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A 1503276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA 1513277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA 1523278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA 1533279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA 154327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA 155327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA 156ENTRIES 157); 158 159# LV vs Circled Syl(LV): /GA/ vs /(GA)/ 160ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 161ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}")); 162ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}")); 163 164# LV vs Circled Syl(LV): followed by latin A 165ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 166ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A")); 167ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A")); 168 169# LV vs Circled Syl(LV): followed by hiragana A 170ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 171ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 172ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 173 174# LVT vs LVX: /GAG/ vs /GA/.hanja 175ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 176ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 177ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 178 179######################### 180 181# checks contraction in LVT: 182# weights of these contractions may be non-sense. 183 184my $hangcont = Unicode::Collate->new( 185 level => 1, 186 table => 'keys.txt', 187 normalization => undef, 188 hangul_terminator => 16, 189 190 entry => <<'ENTRIES', 1911100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A 1921161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK 193ENTRIES 194); 195 196# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/ 197ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 198ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}")); 199 200# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/ 201ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 202ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 203 204# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/ 205ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 206ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 207 208# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/ 209ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 210ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 211 212# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/ 213ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 214ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 215 216##### 217 218$Collator->change(hangul_terminator => 16); 219 220ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 221ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}")); 222ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A")); 223ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 224ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 225 226$Collator->change(hangul_terminator => 0); 227 228ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 229ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 230ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 231ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 232ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 233 234