1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..32\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36{
37    # Table is undefined, then no entry is defined.
38    my $undef_table = Unicode::Collate->new(
39	table => undef,
40	normalization => undef,
41	level => 1,
42    );
43
44    # in the Unicode code point order
45    ok($undef_table->lt('', 'A'));
46    ok($undef_table->lt('ABC', 'B'));
47
48    # Hangul should be decomposed (even w/o Unicode::Normalize).
49    ok($undef_table->lt("Perl", "\x{AC00}"));
50    ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
51    ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
52    ok($undef_table->lt("\x{AE00}", "\x{3042}"));
53
54    # U+AC00: Hangul GA
55    # U+AE00: Hangul GEUL
56    # U+3042: Hiragana A
57
58    # Weight for CJK Ideographs is defined, though.
59    ok($undef_table->lt("", "\x{4E00}"));
60    ok($undef_table->lt("\x{4E8C}","ABC"));
61    ok($undef_table->lt("\x{4E00}","\x{3042}"));
62    ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
63
64# 11
65
66    # U+4E00: Ideograph "ONE"
67    # U+4E8C: Ideograph "TWO"
68
69    for my $v ('', 8, 9, 11, 14) {
70	$undef_table->change(UCA_Version => $v) if $v;
71	ok($undef_table->lt("\x{4E00}","\0"));
72    }
73}
74
75# 16
76
77{
78    my $onlyABC = Unicode::Collate->new(
79	table => undef,
80	normalization => undef,
81	entry => << 'ENTRIES',
820061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
830041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
840062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
850042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
860063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
870043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
88ENTRIES
89    );
90    ok(
91	join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
92	join(':',                 qw/ A aB Ab ABA BAC cAc cc / ),
93    );
94}
95
96# 17
97
98{
99    my $few_entries = Unicode::Collate->new(
100	entry => <<'ENTRIES',
1010050 ; [.0101.0020.0002.0050]  # P
1020045 ; [.0102.0020.0002.0045]  # E
1030052 ; [.0103.0020.0002.0052]  # R
104004C ; [.0104.0020.0002.004C]  # L
1051100 ; [.0105.0020.0002.1100]  # Hangul Jamo initial G
1061175 ; [.0106.0020.0002.1175]  # Hangul Jamo middle I
1075B57 ; [.0107.0020.0002.5B57]  # CJK Ideograph "Letter"
108ENTRIES
109	table => undef,
110	normalization => undef,
111    );
112    # defined before undefined
113    my $sortABC = join '',
114	$few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
115
116    ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
117
118    ok($few_entries->lt('E', 'D'));
119    ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
120    ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
121
122    # Hangul must be decomposed.
123    ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
124}
125
126# 22
127
128{
129    my $highestNUL = Unicode::Collate->new(
130	table => undef,
131	normalization => undef,
132	level => 1,
133	entry => '0000 ; [.FFFE.0020.0005.0000]',
134    );
135
136    for my $v ('', 8, 9, 11, 14) {
137	$highestNUL->change(UCA_Version => $v) if $v;
138	ok($highestNUL->lt("abc\x{4E00}", "abc\0"));
139	ok($highestNUL->lt("abc\x{E0000}","abc\0"));
140    }
141}
142
143# 32
144