1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..65\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36##### 2..31
37
38{
39    my $all_undef_8 = Unicode::Collate->new(
40	table => undef,
41	normalization => undef,
42	overrideCJK => undef,
43	overrideHangul => undef,
44	UCA_Version => 8,
45    );
46    # All in the Unicode code point order.
47    # No hangul decomposition.
48
49    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
50    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
51    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
52    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
53    ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
54    ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
55    # U+ABFF: not assigned
56
57    # a hangul syllable is decomposed into jamo.
58    $all_undef_8->change(overrideHangul => 0);
59    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
60    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
61    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
62    ok($all_undef_8->gt("\x{4E00}", "\x{AC00}"));
63    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
64    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
65
66    # CJK defined < Jamo undefined
67    $all_undef_8->change(overrideCJK => 0);
68    ok($all_undef_8->gt("\x{1100}", "\x{3402}"));
69    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
70    ok($all_undef_8->gt("\x{4DFF}", "\x{4E00}"));
71    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
72    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
73    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
74
75    # CJK undefined > Jamo undefined
76    $all_undef_8->change(overrideCJK => undef);
77    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
78    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
79    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
80    ok($all_undef_8->gt("\x{4E00}", "\x{AC00}"));
81    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
82    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
83
84    $all_undef_8->change(overrideHangul => undef);
85    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
86    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
87    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
88    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
89    ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
90    ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
91}
92
93##### 32..38
94
95{
96    my $all_undef_9 = Unicode::Collate->new(
97	table => undef,
98	normalization => undef,
99	overrideCJK => undef,
100	overrideHangul => undef,
101	UCA_Version => 9,
102    );
103    # CJK Ideo. < CJK ext A/B < Others.
104    # No hangul decomposition.
105
106    ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
107    ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
108    ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
109    ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
110    ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}"));
111    # U+ABFF: not assigned
112
113    # a hangul syllable is decomposed into jamo.
114    $all_undef_9->change(overrideHangul => 0);
115    ok($all_undef_9->eq("\x{AC00}", "\x{1100}\x{1161}"));
116    ok($all_undef_9->lt("\x{AC00}", "\x{ABFF}"));
117}
118
119##### 39..46
120
121{
122    my $ignoreHangul = Unicode::Collate->new(
123	table => undef,
124	normalization => undef,
125	overrideHangul => sub {()},
126	entry => 'AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL',
127    );
128    # All Hangul Syllables except U+AE00 are ignored.
129
130    ok($ignoreHangul->eq("\x{AC00}", ""));
131    ok($ignoreHangul->lt("\x{AC00}", "\0"));
132    ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
133    ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
134    ok($ignoreHangul->eq("Pe\x{AC00}rl", "Perl"));
135    ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl"));
136    # 'r' is unassigned.
137
138    $ignoreHangul->change(overrideHangul => 0);
139    ok($ignoreHangul->eq("\x{AC00}", "\x{1100}\x{1161}"));
140
141    $ignoreHangul->change(overrideHangul => undef);
142    ok($ignoreHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
143}
144
145##### 47..51
146
147{
148    my $undefHangul = Unicode::Collate->new(
149	table => undef,
150	normalization => undef,
151	overrideHangul => sub {
152	    my $u = shift;
153	    return $u == 0xAE00 ? 0x100 : undef;
154	}
155    );
156    # All Hangul Syllables except U+AE00 are undefined.
157
158    ok($undefHangul->lt("\x{AE00}", "r"));
159    ok($undefHangul->gt("\x{AC00}", "r"));
160    ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
161    ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
162    ok($undefHangul->lt("\x{AC00}", "\x{B000}"));
163}
164
165##### 52..55
166
167{
168    my $undefCJK = Unicode::Collate->new(
169	table => undef,
170	normalization => undef,
171	overrideCJK => sub {
172	    my $u = shift;
173	    return $u == 0x4E00 ? 0x100 : undef;
174	}
175    );
176    # All CJK Ideographs except U+4E00 are undefined.
177
178    ok($undefCJK->lt("\x{4E00}", "r"));
179    ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned
180    ok($undefCJK->lt("Pe\x{4E00}rl", "Perl"));
181    ok($undefCJK->lt("\x{5000}", "\x{6000}"));
182}
183
184##### 56..60
185
186{
187    my $cpHangul = Unicode::Collate->new(
188	table => undef,
189	normalization => undef,
190	overrideHangul => sub { shift }
191    );
192
193    ok($cpHangul->lt("\x{AC00}", "\x{AC01}"));
194    ok($cpHangul->lt("\x{AC01}", "\x{D7A3}"));
195    ok($cpHangul->lt("\x{D7A3}", "r"));
196    ok($cpHangul->lt("r", "\x{D7A4}"));
197    ok($cpHangul->lt("\x{D7A3}", "\x{4E00}"));
198}
199
200##### 61..65
201
202{
203    my $arrayHangul = Unicode::Collate->new(
204	table => undef,
205	normalization => undef,
206	overrideHangul => sub {
207	    my $u = shift;
208	    return [$u, 0x20, 0x2, $u];
209	}
210    );
211
212    ok($arrayHangul->lt("\x{AC00}", "\x{AC01}"));
213    ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}"));
214    ok($arrayHangul->lt("\x{D7A3}", "r"));
215    ok($arrayHangul->lt("r", "\x{D7A4}"));
216    ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}"));
217}
218
219