1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..58\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36# a standard collator (3.1.1)
37my $Collator = Unicode::Collate->new(
38  level => 1,
39  table => 'keys.txt',
40  normalization => undef,
41
42  entry => <<'ENTRIES',
43326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
44326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
453270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
463271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
473272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
483273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
493274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
503275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
513276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
523277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
533278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
543279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
55327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
56327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
57ENTRIES
58);
59
60my $hangul = Unicode::Collate->new(
61  level => 1,
62  table => 'keys.txt',
63  normalization => undef,
64  hangul_terminator => 16,
65
66  entry => <<'ENTRIES',
67326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
68326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
693270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
703271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
713272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
723273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
733274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
743275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
753276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
763277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
773278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
783279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
79327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
80327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
81ENTRIES
82);
83
84ok(ref $hangul, "Unicode::Collate");
85
86#########################
87
88# LVX vs LVV: /GAA/ vs /GA/.latinA
89ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
90ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
91
92# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
93ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
94ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
95
96# LVX vs LVV: /GAA/ vs /GA/.hanja
97ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
98ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
99
100# LVL vs LVT: /GA/./G/ vs /GAG/
101ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
102ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
103
104# LVT vs LVX: /GAG/ vs /GA/.latinA
105ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
106ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
107
108# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
109ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
110ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
111
112# LVT vs LVX: /GAG/ vs /GA/.hanja
113ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
114ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
115
116# LV vs Syl(LV): /GA/ vs /[GA]/
117ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
118ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
119
120# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
121ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
122ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
123
124# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
125ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
126ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
127
128# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
129ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
130ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
131
132# Syl(LVT) vs : /GAG/ vs /[GAG]/
133ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
134ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
135
136#########################
137
138my $hangcirc = Unicode::Collate->new(
139  level => 1,
140  table => 'keys.txt',
141  normalization => undef,
142  hangul_terminator => 16,
143
144  entry => <<'ENTRIES',
145326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA
146326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA
1473270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA
1483271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA
1493272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA
1503273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA
1513274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA
1523275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A
1533276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA
1543277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA
1553278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA
1563279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA
157327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA
158327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA
159ENTRIES
160);
161
162# LV vs Circled Syl(LV): /GA/ vs /(GA)/
163ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
164ok($hangul  ->gt("\x{1100}\x{1161}", "\x{326E}"));
165ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}"));
166
167# LV vs Circled Syl(LV): followed by latin A
168ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
169ok($hangul  ->lt("\x{1100}\x{1161}A", "\x{326E}A"));
170ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A"));
171
172# LV vs Circled Syl(LV): followed by hiragana A
173ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
174ok($hangul  ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
175ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
176
177# LVT vs LVX: /GAG/ vs /GA/.hanja
178ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
179ok($hangul  ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
180ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
181
182#########################
183
184# checks contraction in LVT:
185# weights of these contractions may be non-sense.
186
187my $hangcont = Unicode::Collate->new(
188  level => 1,
189  table => 'keys.txt',
190  normalization => undef,
191  hangul_terminator => 16,
192
193  entry => <<'ENTRIES',
1941100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A
1951161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK
196ENTRIES
197);
198
199# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/
200ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
201ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}"));
202
203# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/
204ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
205ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
206
207# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/
208ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
209ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
210
211# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/
212ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
213ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
214
215# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/
216ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
217ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
218
219#####
220
221$Collator->change(hangul_terminator => 16);
222
223ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
224ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}"));
225ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A"));
226ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
227ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
228
229$Collator->change(hangul_terminator => 0);
230
231ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
232ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
233ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
234ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
235ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
236
237