1
2BEGIN {
3    if ($ENV{PERL_CORE}) {
4	chdir('t') if -d 't';
5	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6    }
7}
8
9use strict;
10use warnings;
11BEGIN { $| = 1; print "1..118\n"; }
12my $count = 0;
13sub ok ($;$) {
14    my $p = my $r = shift;
15    if (@_) {
16	my $x = shift;
17	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18    }
19    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
22use Unicode::Collate;
23
24ok(1);
25
26sub _pack_U   { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
29#########################
30
31our $kjeEntry = <<'ENTRIES';
320301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
330334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
34043A  ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
35041A  ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
36045C  ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
37043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
38040C  ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
39041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
40ENTRIES
41
42our $aaEntry = <<'ENTRIES';
430304  ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
44030A  ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
450327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
46031A  ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
470061  ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
480041  ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
49007A  ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
50005A  ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
5100E5  ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
5200C5  ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
530061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
540041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
55ENTRIES
56
57#########################
58
59my $kjeNoN = Unicode::Collate->new(
60    level => 1,
61    table => undef,
62    normalization => undef,
63    entry => $kjeEntry,
64);
65
66ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
67ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
68ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
69ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
70
71# 5
72
73our %sortkeys;
74
75$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
76$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
77$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
78
79eval { require Unicode::Normalize };
80if (!$@) {
81    my $kjeNFD = Unicode::Collate->new(
82	level => 1,
83	table => undef,
84	entry => $kjeEntry,
85    );
86
87ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
88ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
89ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
90ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
91# 9
92
93    my $aaNFD = Unicode::Collate->new(
94	level => 1,
95	table => undef,
96	entry => $aaEntry,
97    );
98
99ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
100ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
101ok($aaNFD->eq(_pack_U(0xE5), "A\x{30A}\x{304}"));
102ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
103ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
104ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
105ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
106ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
107# 17
108
109    my $aaPre = Unicode::Collate->new(
110	level => 1,
111	normalization => "prenormalized",
112	table => undef,
113	entry => $aaEntry,
114    );
115
116ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
117ok($aaPre->eq("A", "A\x{304}\x{30A}"));
118ok($aaPre->eq(_pack_U(0xE5), "A\x{30A}\x{304}"));
119ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
120ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
121ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
122ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
123ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
124# 25
125} else {
126    ok(1) for 1..20;
127}
128
129# again: loading Unicode::Normalize should not affect $kjeNoN.
130ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
131ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
132ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
133ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
134
135ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
136ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
137ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
138
139# 32
140
141my $aaNoN = Unicode::Collate->new(
142    level => 1,
143    table => undef,
144    entry => $aaEntry,
145    normalization => undef,
146);
147
148ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
149ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
150ok($aaNoN->eq(_pack_U(0xE5), "A\x{30A}\x{304}"));
151ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
152ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
153ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
154ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
155ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
156
157# 40
158
159# suppress contractions (not affected)
160
161my $kjeSup = Unicode::Collate->new(
162    level => 1,
163    table => undef,
164    normalization => undef,
165    entry => $kjeEntry,
166    suppress => [0x400..0x45F],
167);
168
169ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}"));
170ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}"));
171ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}"));
172ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}"));
173
174# 44
175
176our $tibetanEntry = <<'ENTRIES';
1770000           ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1780FB2           ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA
1790FB3           ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA
1800F71           ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
1810F72           ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
1820F73           ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1830F71 0F72      ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1840F80           ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
1850F81           ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1860F71 0F80      ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1870F74           ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
1880F75           ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1890F71 0F74      ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1900F76           ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1910FB2 0F80      ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1920F77           ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1930FB2 0F81      ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1940FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1950F78           ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1960FB3 0F80      ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1970F79           ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1980FB3 0F81      ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1990FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
200ENTRIES
201
202# ccc(0F71) = 129
203# ccc(0F80) = 130
204# 0F76 = 0FB2 0F80
205# 0F78 = 0FB3 0F80
206# 0F81 = 0F71 0F80
207# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
208# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
209
210eval { require Unicode::Normalize };
211if (!$@) {
212    my $tibNFD = Unicode::Collate->new(
213	table => undef,
214	entry => $tibetanEntry,
215	UCA_Version => 24,
216    );
217
218    # VOCALIC RR
219    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
220    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
221    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
222    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
223    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
224    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
225    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
226    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
227    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
228    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
229    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
230    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
231    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
232    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
233# 58
234
235    # VOCALIC LL
236    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
237    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
238    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
239    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
240    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
241    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
242    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
243    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
244    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
245    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
246    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
247    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
248    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
249    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
250# 72
251
252    my $a1 = "\x{FB2}\x{334}\x{F81}";
253    my $b1 = "\x{F77}\0\x{334}";
254    my $a2 = "\x{FB2}\x{334}\x{F81}";
255    my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}";
256
257    for my $v (qw/20 22 24 26 28/) {
258	my $tib = Unicode::Collate->new(
259	    table => undef,
260	    entry => $tibetanEntry,
261	    UCA_Version => $v,
262	);
263	my $long = 22 <= $v && $v <= 24;
264	ok($tib->cmp($a1, $b1), $long ? 0 : -1);
265	ok($tib->cmp($a2, $b2), $long ? 1 : 0);
266
267	$tib->change(long_contraction => 0);
268	ok($tib->cmp($a1, $b1), -1);
269	ok($tib->cmp($a2, $b2),  0);
270
271	$tib->change(long_contraction => 1);
272	ok($tib->cmp($a1, $b1), 0);
273	ok($tib->cmp($a2, $b2), 1);
274    }
275# 102
276
277    # UCA_Version => 22
278    ok($tibNFD->cmp($a1, $b1), 0);
279    ok($tibNFD->cmp($a2, $b2), 1);
280
281    $tibNFD->change(UCA_Version => 26); # not affect long_contraction
282    ok($tibNFD->cmp($a1, $b1), 0);
283    ok($tibNFD->cmp($a2, $b2), 1);
284# 106
285
286    my $discontNFD = Unicode::Collate->new(
287	table => undef,
288	UCA_Version => 22,
289	entry => <<'ENTRIES',
2900000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
2910301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
2920300  ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
2930327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA
2940334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
2950041  ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2960041 0327 0301 ; [.0102.0020.0008.0041]
2970041 0300 ; [.0103.0020.0008.0041]
298ENTRIES
299    );
300
301    ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
302    ok($discontNFD->eq("A\x{327}\x{300}",        "A\x{300}\0\x{327}"));
303
304    $discontNFD->change(long_contraction => 0);
305    ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
306    ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}"));
307    ok($discontNFD->eq("A\x{327}\x{300}",        "A\x{300}\0\x{327}"));
308
309    $discontNFD->change(level => 1);
310    ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}"));
311
312    # discontiguous
313    ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
314    ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}"));
315    ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A"));
316
317    # contiguous
318    ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
319    ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}"));
320    ok($discontNFD->gt("A\x{327}\x{301}", "A"));
321} else {
322    ok(1) for 1..74;
323}
324# 118
325