1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use strict; 18use warnings; 19BEGIN { $| = 1; print "1..72\n"; } 20my $count = 0; 21sub ok ($;$) { 22 my $p = my $r = shift; 23 if (@_) { 24 my $x = shift; 25 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 26 } 27 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 28} 29 30use Unicode::Collate; 31 32ok(1); 33 34######################### 35 36# a standard collator (3.1.1) 37my $Collator = Unicode::Collate->new( 38 table => 'keys.txt', 39 normalization => undef, 40); 41 42 43# a collator for hangul sorting, 44# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html 45# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf 46my $hangul = Unicode::Collate->new( 47 level => 3, 48 table => undef, 49 normalization => undef, 50 51 entry => <<'ENTRIES', 520061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 530041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 54#1161 ; [.1800.0020.0002] # <comment> initial jungseong A 55#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA 561100 ; [.1831.0020.0002] # choseong KIYEOK 571100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 581100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 591101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 601101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 611101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 621102 ; [.1833.0020.0002] # choseong NIEUN 631102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 641102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 653042 ; [.1921.0020.000E] # HIRAGANA LETTER A 6611A8 ; [.FE10.0020.0002] # jongseong KIYEOK 6711A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 681161 ; [.FE20.0020.0002] # jungseong A <non-initial> 691163 ; [.FE21.0020.0002] # jungseong YA <non-initial> 70ENTRIES 71); 72 73ok(ref $hangul, "Unicode::Collate"); 74 75my $trailwt = Unicode::Collate->new( 76 level => 3, 77 table => undef, 78 normalization => undef, 79 hangul_terminator => 16, 80 81 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 820061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 830041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 8411A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 8511A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 861161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 871163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 881100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 891101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 901102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 913042 ; [.1921.0020.000E] # HIRAGANA LETTER A 92ENTRIES 93); 94 95######################### 96 97# L(simp)L(simp) vs L(comp): /GGA/ 98ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 99ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 100ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 101 102# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ 103ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 104ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 105ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 106 107# T(simp)T(simp) vs T(comp): /AGG/ 108ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 109ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 110ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 111 112# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ 113ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 114ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 115ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 116 117# LV vs LLV: /GA/ vs /GNA/ 118ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 119ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 120ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 121 122# LVX vs LVV: /GAA/ vs /GA/.latinA 123ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 124ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 125ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 126 127# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 128ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 129ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 130ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 131 132# LVX vs LVV: /GAA/ vs /GA/.hanja 133ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 134ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 135ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 136 137# LVL vs LVT: /GA/./G/ vs /GAG/ 138ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 139ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 140ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 141 142# LVT vs LVX: /GAG/ vs /GA/.latinA 143ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 144ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 145ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 146 147# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 148ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 149ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 150ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 151 152# LVT vs LVX: /GAG/ vs /GA/.hanja 153ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 154ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 155ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 156 157# LVT vs LVV: /GAG/ vs /GAA/ 158ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 159ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 160ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 161 162# LVL vs LVV: /GA/./G/ vs /GAA/ 163ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 164ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 165ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 166 167# LV vs Syl(LV): /GA/ vs /[GA]/ 168ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 169ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 170ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); 171 172# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 173ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 174ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 175ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 176 177# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 178ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 179ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 180ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 181 182# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 183ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 184ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 185ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 186 187# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ 188ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 189ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 190ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 191 192# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ 193ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 194ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 195ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 196 197######################### 198 199# checks contraction in LVT: 200# weights of these contractions may be non-sense. 201 202my $hangcont = Unicode::Collate->new( 203 level => 3, 204 table => undef, 205 normalization => undef, 206 entry => <<'ENTRIES', 2071100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 2081101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 2091161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 2101162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 2111163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 21211A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 21311A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 2141161 11A9 ; [.0000.0000.0000] # A-GG <contraction> 2151100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 216ENTRIES 217); 218 219# contracted into VT 220ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 221ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 222 223# not contracted into LVT but into VT 224ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 225ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 226 227# contracted into LVT 228ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 229ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 230 231# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 232ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 233ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 234 235# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ 236ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 237ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 238 2391; 240__END__ 241