1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..74\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36our $kjeEntry = <<'ENTRIES';
370301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
380334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
39043A  ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
40041A  ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
41045C  ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
42043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
43040C  ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
44041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
45ENTRIES
46
47our $aaEntry = <<'ENTRIES';
480304  ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
49030A  ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
500327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
51031A  ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
520061  ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
530041  ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
54007A  ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
55005A  ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
5600E5  ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
5700C5  ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
580061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
590041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
60ENTRIES
61
62#########################
63
64my $kjeNoN = Unicode::Collate->new(
65    level => 1,
66    table => undef,
67    normalization => undef,
68    entry => $kjeEntry,
69);
70
71ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
72ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
73ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
74ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
75
76# 5
77
78our %sortkeys;
79
80$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
81$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
82$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
83
84eval { require Unicode::Normalize };
85if (!$@) {
86    my $kjeNFD = Unicode::Collate->new(
87	level => 1,
88	table => undef,
89	entry => $kjeEntry,
90    );
91
92ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
93ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
94ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
95ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
96# 9
97
98    my $aaNFD = Unicode::Collate->new(
99	level => 1,
100	table => undef,
101	entry => $aaEntry,
102    );
103
104ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
105ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
106ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
107ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
108ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
109ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
110ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
111ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
112# 17
113
114    my $aaPre = Unicode::Collate->new(
115	level => 1,
116	normalization => "prenormalized",
117	table => undef,
118	entry => $aaEntry,
119    );
120
121ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
122ok($aaPre->eq("A", "A\x{304}\x{30A}"));
123ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
124ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
125ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
126ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
127ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
128ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
129# 25
130} else {
131    ok(1) for 1..20;
132}
133
134# again: loading Unicode::Normalize should not affect $kjeNoN.
135ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
136ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
137ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
138ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
139
140ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
141ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
142ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
143
144# 32
145
146my $aaNoN = Unicode::Collate->new(
147    level => 1,
148    table => undef,
149    entry => $aaEntry,
150    normalization => undef,
151);
152
153ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
154ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
155ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
156ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
157ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
158ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
159ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
160ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
161
162# 40
163
164# suppress contractions (not affected)
165
166my $kjeSup = Unicode::Collate->new(
167    level => 1,
168    table => undef,
169    normalization => undef,
170    entry => $kjeEntry,
171    suppress => [0x400..0x45F],
172);
173
174ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}"));
175ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}"));
176ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}"));
177ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}"));
178
179# 44
180
181our $tibetanEntry = <<'ENTRIES';
1820000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1830F71           ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
1840F72           ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
1850F73           ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1860F71 0F72      ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1870F80           ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
1880F81           ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1890F71 0F80      ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1900F74           ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
1910F75           ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1920F71 0F74      ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1930F76           ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1940FB2 0F80      ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1950F77           ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1960FB2 0F81      ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1970FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1980F78           ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1990FB3 0F80      ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
2000F79           ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
2010FB3 0F81      ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
2020FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
203ENTRIES
204
205# ccc(0F71) = 129
206# ccc(0F80) = 130
207# 0F76 = 0FB2 0F80
208# 0F78 = 0FB3 0F80
209# 0F81 = 0F71 0F80
210# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
211# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
212
213eval { require Unicode::Normalize };
214if (!$@) {
215    my $tibNFD = Unicode::Collate->new(
216	table => undef,
217	entry => $tibetanEntry,
218    );
219
220    # VOCALIC RR
221    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
222    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
223    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
224    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
225    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
226    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
227    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
228    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
229    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
230    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
231    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
232    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
233    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
234    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
235# 58
236
237    # VOCALIC LL
238    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
239    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
240    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
241    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
242    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
243    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
244    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
245    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
246    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
247    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
248    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
249    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
250    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
251    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
252# 72
253
254    my $discontNFD = Unicode::Collate->new(
255	table => undef,
256	entry => <<'ENTRIES',
2570000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
2580301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
2590300  ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
2600327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA
2610334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
2620041  ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2630041 0327 0301 ; [.0102.0020.0008.0041]
2640041 0300 ; [.0103.0020.0008.0041]
265ENTRIES
266    );
267
268    ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}"));
269    ok($discontNFD->eq("A\x{300}\0\x{327}",        "A\x{327}\x{300}"));
270} else {
271    ok(1) for 1..30;
272}
273# 74
274