1
2BEGIN {
3    if ($ENV{PERL_CORE}) {
4	chdir('t') if -d 't';
5	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6    }
7}
8
9use strict;
10use warnings;
11BEGIN { $| = 1; print "1..41\n"; }
12my $count = 0;
13sub ok ($;$) {
14    my $p = my $r = shift;
15    if (@_) {
16	my $x = shift;
17	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18    }
19    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
22use Unicode::Collate;
23
24ok(1);
25
26sub _pack_U   { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
29#########################
30
31my $trad = Unicode::Collate->new(
32  table => 'keys.txt',
33  normalization => undef,
34  ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
35  level => 3,
36  entry => << 'ENTRIES',
37 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
38 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
39 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
40ENTRIES
41);
42# 0063  ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
43# 0064  ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
44
45##### 2..3
46
47ok(
48  join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
49  join(':',              qw/ aca acia acka acha ada / ),
50);
51
52ok(
53  join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
54  join(':',              qw/ ACA ACIA ACKA ACHA ADA / ),
55);
56
57##### 4..7
58
59ok($trad->gt("ocho", "oc\cAho")); # UCA v14
60ok($trad->gt("ocho", "oc\0\cA\0\cBho"));  # UCA v14
61ok($trad->eq("-", ""));
62ok($trad->gt("ocho", "oc-ho"));
63
64##### 8..11
65
66$trad->change(UCA_Version => 9);
67
68ok($trad->eq("ocho", "oc\cAho")); # UCA v9
69ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
70ok($trad->eq("-", ""));
71ok($trad->gt("ocho", "oc-ho"));
72
73##### 12..15
74
75$trad->change(UCA_Version => 8);
76
77ok($trad->gt("ocho", "oc\cAho"));
78ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
79ok($trad->eq("-", ""));
80ok($trad->gt("ocho", "oc-ho"));
81
82
83##### 16..19
84
85$trad->change(UCA_Version => 9);
86
87my $hiragana = "\x{3042}\x{3044}";
88my $katakana = "\x{30A2}\x{30A4}";
89
90# HIRAGANA and KATAKANA are ignorable via ignoreName
91ok($trad->eq($hiragana, ""));
92ok($trad->eq("", $katakana));
93ok($trad->eq($hiragana, $katakana));
94ok($trad->eq($katakana, $hiragana));
95
96
97##### 20..31
98
99# According to Conformance Test (UCA_Version == 9 or 11),
100# a L3-ignorable is treated as a completely ignorable.
101
102my $L3ignorable = Unicode::Collate->new(
103  alternate => 'Non-ignorable',
104  level => 3,
105  table => undef,
106  normalization => undef,
107  UCA_Version => 9,
108  entry => <<'ENTRIES',
1090000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1100001  ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
1110591  ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
1121D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
1130021  ; [*024B.0020.0002.0021] # EXCLAMATION MARK
11409BE  ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
11509C7  ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
11609CB  ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
11709C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
1181D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
1191D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
1201D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
1211D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
122ENTRIES
123);
124
125ok($L3ignorable->lt("\cA", "!"));
126ok($L3ignorable->lt("\x{591}", "!"));
127ok($L3ignorable->eq("\cA", "\x{591}"));
128ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9C7}\cA\x{9BE}A"));
129ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9C7}\x{591}\x{9BE}A"));
130ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9C7}\x{1D165}\x{9BE}A"));
131ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9CB}A"));
132ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
133ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
134ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
135ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
136ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));
137
138##### 32..41
139
140my $c = Unicode::Collate->new(
141  table => 'keys.txt',
142  normalization => undef,
143  level => 1,
144  UCA_Version => 14,
145  entry => << 'ENTRIES',
146034F  ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER
1470063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
1480043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
1490043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
150ENTRIES
151);
152# 0063  ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
153# 0064  ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
154
155ok($c->gt("ocho", "oc\x00\x00ho"));
156ok($c->gt("ocho", "oc\cAho"));
157ok($c->gt("ocho", "oc\x{34F}ho"));
158ok($c->gt("ocio", "oc\x{34F}ho"));
159ok($c->lt("ocgo", "oc\x{34F}ho"));
160ok($c->lt("oceo", "oc\x{34F}ho"));
161
162ok($c->viewSortKey("ocho"),         "[0B4B 0A3F 0B4B | | |]");
163ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
164ok($c->viewSortKey("oc\cAho"),      "[0B4B 0A3D 0AB9 0B4B | | |]");
165ok($c->viewSortKey("oc\x{34F}ho"),  "[0B4B 0A3D 0AB9 0B4B | | |]");
166
167
168