1
2BEGIN {
3    unless (5.008 <= $]) {
4	print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n";
5	print $@;
6	exit;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..176\n"; } # 81 + 5 x @Versions
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31sub _pack_U   { Unicode::Collate::pack_U(@_) }
32sub _unpack_U { Unicode::Collate::unpack_U(@_) }
33
34#########################
35
36no warnings 'utf8';
37
38# NULL is tailorable but illegal code points are not.
39# illegal code points should be always ingored
40# (cf. UCA, 7.1.1 Illegal code points).
41
42my $entry = <<'ENTRIES';
430000  ; [.0020.0000.0000.0000] # [0000] NULL
440001  ; [.0021.0000.0000.0001] # [0001] START OF HEADING
45FFFE  ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
46FFFF  ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
47D800  ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
48DFFF  ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
49FDD0  ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
50FDEF  ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
510002  ; [.0030.0000.0000.0002] # [0002] START OF TEXT
5210FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
53110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
540041  ; [.1000.0020.0008.0041] # latin A
550041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
560041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
57ENTRIES
58
59##################
60
61my $illeg = Unicode::Collate->new(
62  entry => $entry,
63  level => 1,
64  table => undef,
65  normalization => undef,
66  UCA_Version => 20,
67);
68
69# 2..12
70ok($illeg->lt("", "\x00"));
71ok($illeg->lt("", "\x01"));
72ok($illeg->eq("", "\x{FFFE}"));
73ok($illeg->eq("", "\x{FFFF}"));
74ok($illeg->eq("", "\x{D800}"));
75ok($illeg->eq("", "\x{DFFF}"));
76ok($illeg->eq("", "\x{FDD0}"));
77ok($illeg->eq("", "\x{FDEF}"));
78ok($illeg->lt("", "\x02"));
79ok($illeg->eq("", "\x{10FFFF}"));
80ok($illeg->eq("", "\x{110000}"));
81
82# 13..22
83ok($illeg->lt("\x00", "\x01"));
84ok($illeg->lt("\x01", "\x02"));
85ok($illeg->ne("\0", "\x{D800}"));
86ok($illeg->ne("\0", "\x{DFFF}"));
87ok($illeg->ne("\0", "\x{FDD0}"));
88ok($illeg->ne("\0", "\x{FDEF}"));
89ok($illeg->ne("\0", "\x{FFFE}"));
90ok($illeg->ne("\0", "\x{FFFF}"));
91ok($illeg->ne("\0", "\x{10FFFF}"));
92ok($illeg->ne("\0", "\x{110000}"));
93
94# 23..26
95ok($illeg->eq("A",   "A\x{FFFF}"));
96ok($illeg->gt("A\0", "A\x{FFFF}"));
97ok($illeg->lt("A",  "A\0"));
98ok($illeg->lt("AA", "A\0"));
99
100##################
101
102my $nonch = Unicode::Collate->new(
103  entry => $entry,
104  level => 1,
105  table => undef,
106  normalization => undef,
107  UCA_Version => 22,
108);
109
110# 27..37
111ok($nonch->lt("", "\x00"));
112ok($nonch->lt("", "\x01"));
113ok($nonch->lt("", "\x{FFFE}"));
114ok($nonch->lt("", "\x{FFFF}"));
115ok($nonch->lt("", "\x{D800}"));
116ok($nonch->lt("", "\x{DFFF}"));
117ok($nonch->lt("", "\x{FDD0}"));
118ok($nonch->lt("", "\x{FDEF}"));
119ok($nonch->lt("", "\x02"));
120ok($nonch->lt("", "\x{10FFFF}"));
121ok($nonch->lt("", "\x{110000}"));
122
123# 38..47
124ok($nonch->lt("\x00",     "\x01"));
125ok($nonch->lt("\x01",     "\x{FFFE}"));
126ok($nonch->lt("\x{FFFE}", "\x{FFFF}"));
127ok($nonch->lt("\x{FFFF}", "\x{D800}"));
128ok($nonch->lt("\x{D800}", "\x{DFFF}"));
129ok($nonch->lt("\x{DFFF}", "\x{FDD0}"));
130ok($nonch->lt("\x{FDD0}", "\x{FDEF}"));
131ok($nonch->lt("\x{FDEF}", "\x02"));
132ok($nonch->lt("\x02",     "\x{10FFFF}"));
133ok($nonch->lt("\x{10FFFF}", "\x{110000}"));
134
135# 48..51
136ok($nonch->lt("A",   "A\x{FFFF}"));
137ok($nonch->lt("A\0", "A\x{FFFF}"));
138ok($nonch->lt("A",  "A\0"));
139ok($nonch->lt("AA", "A\0"));
140
141##################
142
143my $Collator = Unicode::Collate->new(
144  table => 'keys.txt',
145  level => 1,
146  normalization => undef,
147  UCA_Version => 8,
148);
149
150my @ret = (
151    "Pe\x{300}\x{301}",
152    "Pe\x{300}\0\0\x{301}",
153    "Pe\x{DA00}\x{301}\x{DFFF}",
154    "Pe\x{FFFF}\x{301}",
155    "Pe\x{110000}\x{301}",
156    "Pe\x{300}\x{d801}\x{301}",
157    "Pe\x{300}\x{ffff}\x{301}",
158    "Pe\x{300}\x{110000}\x{301}",
159    "Pe\x{D9ab}\x{DFFF}",
160    "Pe\x{FFFF}",
161    "Pe\x{110000}",
162    "Pe\x{300}\x{D800}\x{DFFF}",
163    "Pe\x{300}\x{FFFF}",
164    "Pe\x{300}\x{110000}",
165);
166
167# 52..65
168for my $ret (@ret) {
169    my $str = $ret."rl";
170    my($match) = $Collator->match($str, "pe");
171    ok($match eq $ret);
172}
173
174##################
175
176my $out = Unicode::Collate->new(
177    level => 1,
178    table => undef,
179    normalization => undef,
180    overrideOut => sub { 0xFFFD },
181);
182
183my @Versions = ( 8,  9, 11, 14, 16, 18, 20, 22, 24, 26,
184		28, 30, 32, 34, 36, 38, 40, 41, 43);
185
186for my $v (@Versions) {
187    $out->change(UCA_Version => $v);
188    ok($out->cmp('',           "\x{10FFFF}") == ($v >= 22 ? -1 : 0));
189    ok($out->cmp('',           "\x{110000}") == ($v >= 22 ? -1 : 0));
190    ok($out->cmp('ABC',        "\x{110000}") == ($v >= 22 ? -1 : 1));
191    ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1));
192    ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ?  0 : 0));
193}
194
195# x+66..x+77
196ok($out->lt('ABC',      "\x{123456}"));
197ok($out->lt("\x{FFFD}", "\x{123456}"));
198
199$out->change(overrideOut => sub {()});
200
201ok($out->eq('',         "\x{123456}"));
202ok($out->gt('ABC',      "\x{123456}"));
203ok($out->gt("\x{FFFD}", "\x{123456}"));
204
205$out->change(overrideOut => undef);
206ok($out->lt('',         "\x{123456}"));
207ok($out->eq("\x{FFFD}", "\x{123456}"));
208
209$out->change(overrideOut => sub { 0xFFFD });
210
211ok($out->lt('',         "\x{123456}"));
212ok($out->lt('ABC',      "\x{123456}"));
213ok($out->lt("\x{FFFD}", "\x{123456}"));
214
215$out->change(overrideOut => 0);
216ok($out->lt('',         "\x{123456}"));
217ok($out->eq("\x{FFFD}", "\x{123456}"));
218
219$out->change(overrideOut => sub { undef });
220ok($out->lt('',         "\x{123456}"));
221ok($out->eq("\x{FFFD}", "\x{123456}"));
222ok($out->eq("\x{FFFD}", "\x{21FFFFF}"));
223ok($out->eq("\x{FFFD}", "\x{2200000}"));
224
225