1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..72\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36# a standard collator (3.1.1)
37my $Collator = Unicode::Collate->new(
38  table => 'keys.txt',
39  normalization => undef,
40);
41
42
43# a collator for hangul sorting,
44# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
45#     http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
46my $hangul = Unicode::Collate->new(
47  level => 3,
48  table => undef,
49  normalization => undef,
50
51  entry => <<'ENTRIES',
520061      ; [.0A15.0020.0002] # LATIN SMALL LETTER A
530041      ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
54#1161     ; [.1800.0020.0002] # <comment> initial jungseong A
55#1163     ; [.1801.0020.0002] # <comment> initial jungseong YA
561100      ; [.1831.0020.0002] # choseong KIYEOK
571100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
581100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
591101      ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
601101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
611101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
621102      ; [.1833.0020.0002] # choseong NIEUN
631102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
641102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
653042      ; [.1921.0020.000E] # HIRAGANA LETTER A
6611A8      ; [.FE10.0020.0002] # jongseong KIYEOK
6711A9      ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
681161      ; [.FE20.0020.0002] # jungseong A <non-initial>
691163      ; [.FE21.0020.0002] # jungseong YA <non-initial>
70ENTRIES
71);
72
73ok(ref $hangul, "Unicode::Collate");
74
75my $trailwt = Unicode::Collate->new(
76  level => 3,
77  table => undef,
78  normalization => undef,
79  hangul_terminator => 16,
80
81  entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
820061  ; [.0A15.0020.0002] # LATIN SMALL LETTER A
830041  ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
8411A8  ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
8511A9  ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
861161  ; [.1831.0020.0002] # HANGUL JUNGSEONG A
871163  ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
881100  ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
891101  ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
901102  ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
913042  ; [.1921.0020.000E] # HIRAGANA LETTER A
92ENTRIES
93);
94
95#########################
96
97# L(simp)L(simp) vs L(comp): /GGA/
98ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
99ok($hangul  ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
100ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
101
102# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
103ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
104ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
105ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
106
107# T(simp)T(simp) vs T(comp): /AGG/
108ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
109ok($hangul  ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
110ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
111
112# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
113ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
114ok($hangul  ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
115ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
116
117# LV vs LLV: /GA/ vs /GNA/
118ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
119ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
120ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
121
122# LVX vs LVV: /GAA/ vs /GA/.latinA
123ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
124ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
125ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
126
127# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
128ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
129ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
130ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
131
132# LVX vs LVV: /GAA/ vs /GA/.hanja
133ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
134ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
135ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
136
137# LVL vs LVT: /GA/./G/ vs /GAG/
138ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
139ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
140ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
141
142# LVT vs LVX: /GAG/ vs /GA/.latinA
143ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
144ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
145ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
146
147# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
148ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
149ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
150ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
151
152# LVT vs LVX: /GAG/ vs /GA/.hanja
153ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
154ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
155ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
156
157# LVT vs LVV: /GAG/ vs /GAA/
158ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
159ok($hangul  ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
160ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
161
162# LVL vs LVV: /GA/./G/ vs /GAA/
163ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
164ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
165ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
166
167# LV vs Syl(LV): /GA/ vs /[GA]/
168ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
169ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
170ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
171
172# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
173ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
174ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
175ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
176
177# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
178ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
179ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
180ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
181
182# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
183ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
184ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
185ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
186
187# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
188ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
189ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
190ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
191
192# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
193ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
194ok($hangul  ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
195ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
196
197#########################
198
199# checks contraction in LVT:
200# weights of these contractions may be non-sense.
201
202my $hangcont = Unicode::Collate->new(
203  level => 3,
204  table => undef,
205  normalization => undef,
206  entry => <<'ENTRIES',
2071100  ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
2081101  ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
2091161  ; [.188D.0020.0002] # HANGUL JUNGSEONG A
2101162  ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
2111163  ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
21211A8  ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
21311A9  ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
2141161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
2151100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
216ENTRIES
217);
218
219# contracted into VT
220ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
221ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
222
223# not contracted into LVT but into VT
224ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
225ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
226
227# contracted into LVT
228ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
229ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
230
231# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
232ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
233ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
234
235# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
236ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
237ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
238
2391;
240__END__
241