1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..118\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31our $kjeEntry = <<'ENTRIES'; 320301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 330334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY 34043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA 35041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA 36045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE 37043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE 38040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE 39041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE 40ENTRIES 41 42our $aaEntry = <<'ENTRIES'; 430304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230) 44030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230) 450327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202) 46031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232) 470061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A 480041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A 49007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z 50005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z 5100E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM 5200C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM 530061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE 540041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE 55ENTRIES 56 57######################### 58 59my $kjeNoN = Unicode::Collate->new( 60 level => 1, 61 table => undef, 62 normalization => undef, 63 entry => $kjeEntry, 64); 65 66ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); 67ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); 68ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); 69ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 70 71# 5 72 73our %sortkeys; 74 75$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}"); 76$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"); 77$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"); 78 79eval { require Unicode::Normalize }; 80if (!$@) { 81 my $kjeNFD = Unicode::Collate->new( 82 level => 1, 83 table => undef, 84 entry => $kjeEntry, 85 ); 86 87ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}")); 88ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}")); 89ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}")); 90ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 91# 9 92 93 my $aaNFD = Unicode::Collate->new( 94 level => 1, 95 table => undef, 96 entry => $aaEntry, 97 ); 98 99ok($aaNFD->lt("Z", "A\x{30A}\x{304}")); 100ok($aaNFD->eq("A", "A\x{304}\x{30A}")); 101ok($aaNFD->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); 102ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}")); 103ok($aaNFD->lt("Z", "A\x{327}\x{30A}")); 104ok($aaNFD->lt("Z", "A\x{30A}\x{327}")); 105ok($aaNFD->lt("Z", "A\x{31A}\x{30A}")); 106ok($aaNFD->lt("Z", "A\x{30A}\x{31A}")); 107# 17 108 109 my $aaPre = Unicode::Collate->new( 110 level => 1, 111 normalization => "prenormalized", 112 table => undef, 113 entry => $aaEntry, 114 ); 115 116ok($aaPre->lt("Z", "A\x{30A}\x{304}")); 117ok($aaPre->eq("A", "A\x{304}\x{30A}")); 118ok($aaPre->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); 119ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}")); 120ok($aaPre->lt("Z", "A\x{327}\x{30A}")); 121ok($aaPre->lt("Z", "A\x{30A}\x{327}")); 122ok($aaPre->lt("Z", "A\x{31A}\x{30A}")); 123ok($aaPre->lt("Z", "A\x{30A}\x{31A}")); 124# 25 125} else { 126 ok(1) for 1..20; 127} 128 129# again: loading Unicode::Normalize should not affect $kjeNoN. 130ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); 131ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); 132ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); 133ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 134 135ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}")); 136ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}")); 137ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}")); 138 139# 32 140 141my $aaNoN = Unicode::Collate->new( 142 level => 1, 143 table => undef, 144 entry => $aaEntry, 145 normalization => undef, 146); 147 148ok($aaNoN->lt("Z", "A\x{30A}\x{304}")); 149ok($aaNoN->eq("A", "A\x{304}\x{30A}")); 150ok($aaNoN->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); 151ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}")); 152ok($aaNoN->eq("A", "A\x{327}\x{30A}")); 153ok($aaNoN->lt("Z", "A\x{30A}\x{327}")); 154ok($aaNoN->eq("A", "A\x{31A}\x{30A}")); 155ok($aaNoN->lt("Z", "A\x{30A}\x{31A}")); 156 157# 40 158 159# suppress contractions (not affected) 160 161my $kjeSup = Unicode::Collate->new( 162 level => 1, 163 table => undef, 164 normalization => undef, 165 entry => $kjeEntry, 166 suppress => [0x400..0x45F], 167); 168 169ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}")); 170ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}")); 171ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}")); 172ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}")); 173 174# 44 175 176our $tibetanEntry = <<'ENTRIES'; 1770000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 1780FB2 ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA 1790FB3 ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA 1800F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA 1810F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I 1820F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II 1830F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II 1840F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I 1850F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II 1860F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II 1870F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U 1880F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU 1890F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU 1900F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R 1910FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R 1920F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1930FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1940FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1950F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L 1960FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L 1970F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 1980FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 1990FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 200ENTRIES 201 202# ccc(0F71) = 129 203# ccc(0F80) = 130 204# 0F76 = 0FB2 0F80 205# 0F78 = 0FB3 0F80 206# 0F81 = 0F71 0F80 207# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71 208# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71 209 210eval { require Unicode::Normalize }; 211if (!$@) { 212 my $tibNFD = Unicode::Collate->new( 213 table => undef, 214 entry => $tibetanEntry, 215 UCA_Version => 24, 216 ); 217 218 # VOCALIC RR 219 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}")); 220 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}")); 221 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}")); 222 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}")); 223 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}")); 224 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}")); 225 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}")); 226 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}")); 227 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}")); 228 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}")); 229 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}")); 230 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}")); 231 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}")); 232 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}")); 233# 58 234 235 # VOCALIC LL 236 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}")); 237 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}")); 238 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}")); 239 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}")); 240 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}")); 241 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}")); 242 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}")); 243 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}")); 244 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}")); 245 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}")); 246 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}")); 247 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}")); 248 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}")); 249 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}")); 250# 72 251 252 my $a1 = "\x{FB2}\x{334}\x{F81}"; 253 my $b1 = "\x{F77}\0\x{334}"; 254 my $a2 = "\x{FB2}\x{334}\x{F81}"; 255 my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}"; 256 257 for my $v (qw/20 22 24 26 28/) { 258 my $tib = Unicode::Collate->new( 259 table => undef, 260 entry => $tibetanEntry, 261 UCA_Version => $v, 262 ); 263 my $long = 22 <= $v && $v <= 24; 264 ok($tib->cmp($a1, $b1), $long ? 0 : -1); 265 ok($tib->cmp($a2, $b2), $long ? 1 : 0); 266 267 $tib->change(long_contraction => 0); 268 ok($tib->cmp($a1, $b1), -1); 269 ok($tib->cmp($a2, $b2), 0); 270 271 $tib->change(long_contraction => 1); 272 ok($tib->cmp($a1, $b1), 0); 273 ok($tib->cmp($a2, $b2), 1); 274 } 275# 102 276 277 # UCA_Version => 22 278 ok($tibNFD->cmp($a1, $b1), 0); 279 ok($tibNFD->cmp($a2, $b2), 1); 280 281 $tibNFD->change(UCA_Version => 26); # not affect long_contraction 282 ok($tibNFD->cmp($a1, $b1), 0); 283 ok($tibNFD->cmp($a2, $b2), 1); 284# 106 285 286 my $discontNFD = Unicode::Collate->new( 287 table => undef, 288 UCA_Version => 22, 289 entry => <<'ENTRIES', 2900000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 2910301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 2920300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT 2930327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA 2940334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY 2950041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 2960041 0327 0301 ; [.0102.0020.0008.0041] 2970041 0300 ; [.0103.0020.0008.0041] 298ENTRIES 299 ); 300 301 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 302 ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); 303 304 $discontNFD->change(long_contraction => 0); 305 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 306 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}")); 307 ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); 308 309 $discontNFD->change(level => 1); 310 ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}")); 311 312 # discontiguous 313 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 314 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}")); 315 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A")); 316 317 # contiguous 318 ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 319 ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}")); 320 ok($discontNFD->gt("A\x{327}\x{301}", "A")); 321} else { 322 ok(1) for 1..74; 323} 324# 118 325