1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use strict; 18use warnings; 19BEGIN { $| = 1; print "1..58\n"; } 20my $count = 0; 21sub ok ($;$) { 22 my $p = my $r = shift; 23 if (@_) { 24 my $x = shift; 25 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 26 } 27 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 28} 29 30use Unicode::Collate; 31 32ok(1); 33 34######################### 35 36# a standard collator (3.1.1) 37my $Collator = Unicode::Collate->new( 38 level => 1, 39 table => 'keys.txt', 40 normalization => undef, 41 42 entry => <<'ENTRIES', 43326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 44326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 453270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 463271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 473272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 483273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 493274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 503275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 513276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 523277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 533278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 543279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 55327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 56327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 57ENTRIES 58); 59 60my $hangul = Unicode::Collate->new( 61 level => 1, 62 table => 'keys.txt', 63 normalization => undef, 64 hangul_terminator => 16, 65 66 entry => <<'ENTRIES', 67326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 68326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 693270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 703271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 713272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 723273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 733274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 743275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 753276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 763277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 773278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 783279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 79327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 80327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 81ENTRIES 82); 83 84ok(ref $hangul, "Unicode::Collate"); 85 86######################### 87 88# LVX vs LVV: /GAA/ vs /GA/.latinA 89ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 90ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 91 92# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 93ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 94ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 95 96# LVX vs LVV: /GAA/ vs /GA/.hanja 97ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 98ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 99 100# LVL vs LVT: /GA/./G/ vs /GAG/ 101ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 102ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 103 104# LVT vs LVX: /GAG/ vs /GA/.latinA 105ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 106ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 107 108# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 109ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 110ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 111 112# LVT vs LVX: /GAG/ vs /GA/.hanja 113ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 114ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 115 116# LV vs Syl(LV): /GA/ vs /[GA]/ 117ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 118ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 119 120# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 121ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 122ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 123 124# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 125ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 126ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 127 128# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 129ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 130ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 131 132# Syl(LVT) vs : /GAG/ vs /[GAG]/ 133ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 134ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 135 136######################### 137 138my $hangcirc = Unicode::Collate->new( 139 level => 1, 140 table => 'keys.txt', 141 normalization => undef, 142 hangul_terminator => 16, 143 144 entry => <<'ENTRIES', 145326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA 146326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA 1473270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA 1483271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA 1493272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA 1503273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA 1513274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA 1523275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A 1533276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA 1543277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA 1553278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA 1563279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA 157327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA 158327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA 159ENTRIES 160); 161 162# LV vs Circled Syl(LV): /GA/ vs /(GA)/ 163ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 164ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}")); 165ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}")); 166 167# LV vs Circled Syl(LV): followed by latin A 168ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 169ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A")); 170ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A")); 171 172# LV vs Circled Syl(LV): followed by hiragana A 173ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 174ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 175ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 176 177# LVT vs LVX: /GAG/ vs /GA/.hanja 178ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 179ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 180ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 181 182######################### 183 184# checks contraction in LVT: 185# weights of these contractions may be non-sense. 186 187my $hangcont = Unicode::Collate->new( 188 level => 1, 189 table => 'keys.txt', 190 normalization => undef, 191 hangul_terminator => 16, 192 193 entry => <<'ENTRIES', 1941100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A 1951161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK 196ENTRIES 197); 198 199# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/ 200ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 201ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}")); 202 203# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/ 204ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 205ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 206 207# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/ 208ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 209ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 210 211# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/ 212ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 213ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 214 215# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/ 216ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 217ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 218 219##### 220 221$Collator->change(hangul_terminator => 16); 222 223ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 224ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}")); 225ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A")); 226ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 227ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 228 229$Collator->change(hangul_terminator => 0); 230 231ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 232ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 233ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 234ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 235ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 236 237