1BEGIN { 2 unless ("A" eq pack('U', 0x41)) { 3 print "1..0 # Unicode::Collate " . 4 "cannot stringify a Unicode code point\n"; 5 exit 0; 6 } 7 if ($ENV{PERL_CORE}) { 8 chdir('t') if -d 't'; 9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 10 } 11} 12 13use Test; 14BEGIN { plan tests => 58 }; 15 16use strict; 17use warnings; 18use Unicode::Collate; 19 20######################### 21 22ok(1); 23 24# a standard collator (3.1.1) 25my $Collator = Unicode::Collate->new( 26 level => 1, 27 table => 'keys.txt', 28 normalization => undef, 29 30 entry => <<'ENTRIES', 31326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 32326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 333270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 343271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 353272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 363273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 373274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 383275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 393276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 403277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 413278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 423279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 43327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 44327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 45ENTRIES 46); 47 48my $hangul = Unicode::Collate->new( 49 level => 1, 50 table => 'keys.txt', 51 normalization => undef, 52 hangul_terminator => 16, 53 54 entry => <<'ENTRIES', 55326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 56326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 573270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 583271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 593272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 603273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 613274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 623275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 633276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 643277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 653278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 663279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 67327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 68327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 69ENTRIES 70); 71 72ok(ref $hangul, "Unicode::Collate"); 73 74######################### 75 76# LVX vs LVV: /GAA/ vs /GA/.latinA 77ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 78ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 79 80# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 81ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 82ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 83 84# LVX vs LVV: /GAA/ vs /GA/.hanja 85ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 86ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 87 88# LVL vs LVT: /GA/./G/ vs /GAG/ 89ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 90ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 91 92# LVT vs LVX: /GAG/ vs /GA/.latinA 93ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 94ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 95 96# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 97ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 98ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 99 100# LVT vs LVX: /GAG/ vs /GA/.hanja 101ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 102ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 103 104# LV vs Syl(LV): /GA/ vs /[GA]/ 105ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 106ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 107 108# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 109ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 110ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 111 112# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 113ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 114ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 115 116# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 117ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 118ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 119 120# Syl(LVT) vs : /GAG/ vs /[GAG]/ 121ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 122ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 123 124######################### 125 126my $hangcirc = Unicode::Collate->new( 127 level => 1, 128 table => 'keys.txt', 129 normalization => undef, 130 hangul_terminator => 16, 131 132 entry => <<'ENTRIES', 133326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA 134326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA 1353270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA 1363271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA 1373272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA 1383273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA 1393274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA 1403275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A 1413276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA 1423277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA 1433278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA 1443279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA 145327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA 146327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA 147ENTRIES 148); 149 150# LV vs Circled Syl(LV): /GA/ vs /(GA)/ 151ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 152ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}")); 153ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}")); 154 155# LV vs Circled Syl(LV): followed by latin A 156ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 157ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A")); 158ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A")); 159 160# LV vs Circled Syl(LV): followed by hiragana A 161ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 162ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 163ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 164 165# LVT vs LVX: /GAG/ vs /GA/.hanja 166ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 167ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 168ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 169 170######################### 171 172# checks contraction in LVT: 173# weights of these contractions may be non-sense. 174 175my $hangcont = Unicode::Collate->new( 176 level => 1, 177 table => 'keys.txt', 178 normalization => undef, 179 hangul_terminator => 16, 180 181 entry => <<'ENTRIES', 1821100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A 1831161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK 184ENTRIES 185); 186 187# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/ 188ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 189ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}")); 190 191# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/ 192ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 193ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 194 195# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/ 196ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 197ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 198 199# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/ 200ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 201ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 202 203# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/ 204ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 205ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 206 207##### 208 209$Collator->change(hangul_terminator => 16); 210 211ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 212ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}")); 213ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A")); 214ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 215ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 216 217$Collator->change(hangul_terminator => 0); 218 219ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 220ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 221ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 222ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 223ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 224 2251; 226__END__ 227