1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..58\n"; }
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31#########################
32
33# a standard collator (3.1.1)
34my $Collator = Unicode::Collate->new(
35  level => 1,
36  table => 'keys.txt',
37  normalization => undef,
38
39  entry => <<'ENTRIES',
40326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
41326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
423270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
433271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
443272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
453273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
463274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
473275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
483276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
493277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
503278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
513279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
52327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
53327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
54ENTRIES
55);
56
57my $hangul = Unicode::Collate->new(
58  level => 1,
59  table => 'keys.txt',
60  normalization => undef,
61  hangul_terminator => 16,
62
63  entry => <<'ENTRIES',
64326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
65326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
663270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
673271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
683272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
693273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
703274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
713275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
723276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
733277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
743278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
753279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
76327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
77327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
78ENTRIES
79);
80
81ok(ref $hangul, "Unicode::Collate");
82
83#########################
84
85# LVX vs LVV: /GAA/ vs /GA/.latinA
86ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
87ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
88
89# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
90ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
91ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
92
93# LVX vs LVV: /GAA/ vs /GA/.hanja
94ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
95ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
96
97# LVL vs LVT: /GA/./G/ vs /GAG/
98ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
99ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
100
101# LVT vs LVX: /GAG/ vs /GA/.latinA
102ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
103ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
104
105# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
106ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
107ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
108
109# LVT vs LVX: /GAG/ vs /GA/.hanja
110ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
111ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
112
113# LV vs Syl(LV): /GA/ vs /[GA]/
114ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
115ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
116
117# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
118ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
119ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
120
121# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
122ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
123ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
124
125# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
126ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
127ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
128
129# Syl(LVT) vs : /GAG/ vs /[GAG]/
130ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
131ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
132
133#########################
134
135my $hangcirc = Unicode::Collate->new(
136  level => 1,
137  table => 'keys.txt',
138  normalization => undef,
139  hangul_terminator => 16,
140
141  entry => <<'ENTRIES',
142326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA
143326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA
1443270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA
1453271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA
1463272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA
1473273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA
1483274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA
1493275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A
1503276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA
1513277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA
1523278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA
1533279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA
154327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA
155327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA
156ENTRIES
157);
158
159# LV vs Circled Syl(LV): /GA/ vs /(GA)/
160ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
161ok($hangul  ->gt("\x{1100}\x{1161}", "\x{326E}"));
162ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}"));
163
164# LV vs Circled Syl(LV): followed by latin A
165ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
166ok($hangul  ->lt("\x{1100}\x{1161}A", "\x{326E}A"));
167ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A"));
168
169# LV vs Circled Syl(LV): followed by hiragana A
170ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
171ok($hangul  ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
172ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
173
174# LVT vs LVX: /GAG/ vs /GA/.hanja
175ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
176ok($hangul  ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
177ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
178
179#########################
180
181# checks contraction in LVT:
182# weights of these contractions may be non-sense.
183
184my $hangcont = Unicode::Collate->new(
185  level => 1,
186  table => 'keys.txt',
187  normalization => undef,
188  hangul_terminator => 16,
189
190  entry => <<'ENTRIES',
1911100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A
1921161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK
193ENTRIES
194);
195
196# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/
197ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
198ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}"));
199
200# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/
201ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
202ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
203
204# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/
205ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
206ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
207
208# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/
209ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
210ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
211
212# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/
213ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
214ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
215
216#####
217
218$Collator->change(hangul_terminator => 16);
219
220ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
221ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}"));
222ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A"));
223ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
224ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
225
226$Collator->change(hangul_terminator => 0);
227
228ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
229ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
230ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
231ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
232ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
233
234