1
2BEGIN {
3    if ($ENV{PERL_CORE}) {
4	chdir('t') if -d 't';
5	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6    }
7}
8
9use strict;
10use warnings;
11BEGIN { $| = 1; print "1..65\n"; }
12my $count = 0;
13sub ok ($;$) {
14    my $p = my $r = shift;
15    if (@_) {
16	my $x = shift;
17	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18    }
19    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
22use Unicode::Collate;
23
24ok(1);
25
26sub _pack_U   { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
29#########################
30
31##### 2..31
32
33{
34    my $all_undef_8 = Unicode::Collate->new(
35	table => undef,
36	normalization => undef,
37	overrideCJK => undef,
38	overrideHangul => undef,
39	UCA_Version => 8,
40    );
41    # All in the Unicode code point order.
42    # No hangul decomposition.
43
44    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
45    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
46    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
47    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
48    ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
49    ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
50    # U+ABFF: not assigned
51
52    # a hangul syllable is decomposed into jamo.
53    $all_undef_8->change(overrideHangul => 0);
54    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
55    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
56    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
57    ok($all_undef_8->gt("\x{4E00}", "\x{AC00}"));
58    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
59    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
60
61    # CJK defined < Jamo undefined
62    $all_undef_8->change(overrideCJK => 0);
63    ok($all_undef_8->gt("\x{1100}", "\x{3402}"));
64    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
65    ok($all_undef_8->gt("\x{4DFF}", "\x{4E00}"));
66    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
67    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
68    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
69
70    # CJK undefined > Jamo undefined
71    $all_undef_8->change(overrideCJK => undef);
72    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
73    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
74    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
75    ok($all_undef_8->gt("\x{4E00}", "\x{AC00}"));
76    ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}"));
77    ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}"));
78
79    $all_undef_8->change(overrideHangul => undef);
80    ok($all_undef_8->lt("\x{1100}", "\x{3402}"));
81    ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
82    ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
83    ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
84    ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
85    ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
86}
87
88##### 32..38
89
90{
91    my $all_undef_9 = Unicode::Collate->new(
92	table => undef,
93	normalization => undef,
94	overrideCJK => undef,
95	overrideHangul => undef,
96	UCA_Version => 9,
97    );
98    # CJK Ideo. < CJK ext A/B < Others.
99    # No hangul decomposition.
100
101    ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
102    ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
103    ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
104    ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
105    ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}"));
106    # U+ABFF: not assigned
107
108    # a hangul syllable is decomposed into jamo.
109    $all_undef_9->change(overrideHangul => 0);
110    ok($all_undef_9->eq("\x{AC00}", "\x{1100}\x{1161}"));
111    ok($all_undef_9->lt("\x{AC00}", "\x{ABFF}"));
112}
113
114##### 39..46
115
116{
117    my $ignoreHangul = Unicode::Collate->new(
118	table => undef,
119	normalization => undef,
120	overrideHangul => sub {()},
121	entry => 'AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL',
122    );
123    # All Hangul Syllables except U+AE00 are ignored.
124
125    ok($ignoreHangul->eq("\x{AC00}", ""));
126    ok($ignoreHangul->lt("\x{AC00}", "\0"));
127    ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
128    ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
129    ok($ignoreHangul->eq("Pe\x{AC00}rl", "Perl"));
130    ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl"));
131    # 'r' is unassigned.
132
133    $ignoreHangul->change(overrideHangul => 0);
134    ok($ignoreHangul->eq("\x{AC00}", "\x{1100}\x{1161}"));
135
136    $ignoreHangul->change(overrideHangul => undef);
137    ok($ignoreHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
138}
139
140##### 47..51
141
142{
143    my $undefHangul = Unicode::Collate->new(
144	table => undef,
145	normalization => undef,
146	overrideHangul => sub {
147	    my $u = shift;
148	    return $u == 0xAE00 ? 0x100 : undef;
149	}
150    );
151    # All Hangul Syllables except U+AE00 are undefined.
152
153    ok($undefHangul->lt("\x{AE00}", "r"));
154    ok($undefHangul->gt("\x{AC00}", "r"));
155    ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
156    ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
157    ok($undefHangul->lt("\x{AC00}", "\x{B000}"));
158}
159
160##### 52..55
161
162{
163    my $undefCJK = Unicode::Collate->new(
164	table => undef,
165	normalization => undef,
166	overrideCJK => sub {
167	    my $u = shift;
168	    return $u == 0x4E00 ? 0x100 : undef;
169	}
170    );
171    # All CJK Ideographs except U+4E00 are undefined.
172
173    ok($undefCJK->lt("\x{4E00}", "r"));
174    ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned
175    ok($undefCJK->lt("Pe\x{4E00}rl", "Perl"));
176    ok($undefCJK->lt("\x{5000}", "\x{6000}"));
177}
178
179##### 56..60
180
181{
182    my $cpHangul = Unicode::Collate->new(
183	table => undef,
184	normalization => undef,
185	overrideHangul => sub { shift }
186    );
187
188    ok($cpHangul->lt("\x{AC00}", "\x{AC01}"));
189    ok($cpHangul->lt("\x{AC01}", "\x{D7A3}"));
190    ok($cpHangul->lt("\x{D7A3}", "r"));
191    ok($cpHangul->lt("r", "\x{D7A4}"));
192    ok($cpHangul->lt("\x{D7A3}", "\x{4E00}"));
193}
194
195##### 61..65
196
197{
198    my $arrayHangul = Unicode::Collate->new(
199	table => undef,
200	normalization => undef,
201	overrideHangul => sub {
202	    my $u = shift;
203	    return [$u, 0x20, 0x2, $u];
204	}
205    );
206
207    ok($arrayHangul->lt("\x{AC00}", "\x{AC01}"));
208    ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}"));
209    ok($arrayHangul->lt("\x{D7A3}", "r"));
210    ok($arrayHangul->lt("r", "\x{D7A4}"));
211    ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}"));
212}
213
214