1b39c5158Smillert 2b39c5158SmillertBEGIN { 3b39c5158Smillert if ($ENV{PERL_CORE}) { 4b39c5158Smillert chdir('t') if -d 't'; 5b39c5158Smillert @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6b39c5158Smillert } 7b39c5158Smillert} 8b39c5158Smillert 9b39c5158Smillertuse strict; 10b39c5158Smillertuse warnings; 1191f110e0Safresh1BEGIN { $| = 1; print "1..96\n"; } 12898184e3Ssthenmy $count = 0; 13898184e3Ssthensub ok ($;$) { 14898184e3Ssthen my $p = my $r = shift; 15898184e3Ssthen if (@_) { 16898184e3Ssthen my $x = shift; 17898184e3Ssthen $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18898184e3Ssthen } 19898184e3Ssthen print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20898184e3Ssthen} 21898184e3Ssthen 22b39c5158Smillertuse Unicode::Collate; 23b39c5158Smillert 24b39c5158Smillertok(1); 25b39c5158Smillert 26b39c5158Smillertsub _pack_U { Unicode::Collate::pack_U(@_) } 27b39c5158Smillertsub _unpack_U { Unicode::Collate::unpack_U(@_) } 28b39c5158Smillert 29*256a93a4Safresh1######################### 30*256a93a4Safresh1 31b39c5158Smillertmy $A_acute = _pack_U(0xC1); 32b39c5158Smillertmy $a_acute = _pack_U(0xE1); 33b39c5158Smillertmy $acute = _pack_U(0x0301); 34b39c5158Smillert 35b39c5158Smillertmy $hiragana = "\x{3042}\x{3044}"; 36b39c5158Smillertmy $katakana = "\x{30A2}\x{30A4}"; 37b39c5158Smillert 3891f110e0Safresh1# 1 39b39c5158Smillert 40b39c5158Smillertmy $Collator = Unicode::Collate->new( 41b39c5158Smillert table => 'keys.txt', 42b39c5158Smillert normalization => undef, 43b39c5158Smillert); 44b39c5158Smillert 45b39c5158Smillertok(ref $Collator, "Unicode::Collate"); 46b39c5158Smillert 47b39c5158Smillertok($Collator->cmp("", ""), 0); 48b39c5158Smillertok($Collator->eq("", "")); 49b39c5158Smillertok($Collator->cmp("", "perl"), -1); 50b39c5158Smillert 51b39c5158Smillertok( 52b39c5158Smillert join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ), 53b39c5158Smillert join(':', qw/ aca acha acia acka ada / ), 54b39c5158Smillert); 55b39c5158Smillert 56b39c5158Smillertok( 57b39c5158Smillert join(':', $Collator->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ), 58b39c5158Smillert join(':', qw/ ACA ACHA ACIA ACKA ADA / ), 59b39c5158Smillert); 60b39c5158Smillert 6191f110e0Safresh1# 7 62b39c5158Smillert 63b39c5158Smillertok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1) 64b39c5158Smillertok($Collator->cmp($a_acute, $A_acute), -1); 65b39c5158Smillertok($Collator->eq("A\cA$acute", $A_acute)); # UCA v9. \cA is invariant. 66b39c5158Smillert 67b39c5158Smillertmy %old_level = $Collator->change(level => 1); 68b39c5158Smillertok($Collator->eq("A$acute", $A_acute)); 69b39c5158Smillertok($Collator->eq("A", $A_acute)); 70b39c5158Smillert 71b39c5158Smillertok($Collator->change(level => 2)->eq($a_acute, $A_acute)); 72b39c5158Smillertok($Collator->lt("A", $A_acute)); 73b39c5158Smillert 74b39c5158Smillertok($Collator->change(%old_level)->lt("A", $A_acute)); 75b39c5158Smillertok($Collator->lt("A", $A_acute)); 76b39c5158Smillertok($Collator->lt("A", $a_acute)); 77b39c5158Smillertok($Collator->lt($a_acute, $A_acute)); 78b39c5158Smillert 7991f110e0Safresh1# 18 80b39c5158Smillert 81b39c5158Smillert$Collator->change(level => 2); 82b39c5158Smillert 83b39c5158Smillertok($Collator->{level}, 2); 84b39c5158Smillert 85b39c5158Smillertok( $Collator->cmp("ABC","abc"), 0); 86b39c5158Smillertok( $Collator->eq("ABC","abc") ); 87b39c5158Smillertok( $Collator->le("ABC","abc") ); 88b39c5158Smillertok( $Collator->cmp($hiragana, $katakana), 0); 89b39c5158Smillertok( $Collator->eq($hiragana, $katakana) ); 90b39c5158Smillertok( $Collator->ge($hiragana, $katakana) ); 91b39c5158Smillert 9291f110e0Safresh1# 25 93b39c5158Smillert 94b39c5158Smillert# hangul 95b39c5158Smillertok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); 96b39c5158Smillertok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") ); 97b39c5158Smillertok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") ); 98b39c5158Smillertok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); 99b39c5158Smillertok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); 100b39c5158Smillertok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana 101b39c5158Smillert 10291f110e0Safresh1# 31 103b39c5158Smillert 104b39c5158Smillert$Collator->change(%old_level, katakana_before_hiragana => 1); 105b39c5158Smillert 106b39c5158Smillertok($Collator->{level}, 4); 107b39c5158Smillert 108b39c5158Smillertok( $Collator->cmp("abc", "ABC"), -1); 109b39c5158Smillertok( $Collator->ne("abc", "ABC") ); 110b39c5158Smillertok( $Collator->lt("abc", "ABC") ); 111b39c5158Smillertok( $Collator->le("abc", "ABC") ); 112b39c5158Smillertok( $Collator->cmp($hiragana, $katakana), 1); 113b39c5158Smillertok( $Collator->ne($hiragana, $katakana) ); 114b39c5158Smillertok( $Collator->gt($hiragana, $katakana) ); 115b39c5158Smillertok( $Collator->ge($hiragana, $katakana) ); 116b39c5158Smillert 11791f110e0Safresh1# 40 118b39c5158Smillert 119b39c5158Smillert$Collator->change(upper_before_lower => 1); 120b39c5158Smillert 121b39c5158Smillertok( $Collator->cmp("abc", "ABC"), 1); 122b39c5158Smillertok( $Collator->ge("abc", "ABC"), 1); 123b39c5158Smillertok( $Collator->gt("abc", "ABC"), 1); 124b39c5158Smillertok( $Collator->cmp($hiragana, $katakana), 1); 125b39c5158Smillertok( $Collator->ge($hiragana, $katakana), 1); 126b39c5158Smillertok( $Collator->gt($hiragana, $katakana), 1); 127b39c5158Smillert 12891f110e0Safresh1# 46 129b39c5158Smillert 130b39c5158Smillert$Collator->change(katakana_before_hiragana => 0); 131b39c5158Smillert 132b39c5158Smillertok( $Collator->cmp("abc", "ABC"), 1); 133b39c5158Smillertok( $Collator->cmp($hiragana, $katakana), -1); 134b39c5158Smillert 13591f110e0Safresh1# 48 136b39c5158Smillert 137b39c5158Smillert$Collator->change(upper_before_lower => 0); 138b39c5158Smillert 139b39c5158Smillertok( $Collator->cmp("abc", "ABC"), -1); 140b39c5158Smillertok( $Collator->le("abc", "ABC") ); 141b39c5158Smillertok( $Collator->cmp($hiragana, $katakana), -1); 142b39c5158Smillertok( $Collator->lt($hiragana, $katakana) ); 143b39c5158Smillert 14491f110e0Safresh1# 52 14591f110e0Safresh1 146898184e3Ssthen{ 147b39c5158Smillert my $ignoreAE = Unicode::Collate->new( 148b39c5158Smillert table => 'keys.txt', 149b39c5158Smillert normalization => undef, 150b39c5158Smillert ignoreChar => qr/^[aAeE]$/, 151b39c5158Smillert ); 152b39c5158Smillert ok($ignoreAE->eq("element","lament")); 153b39c5158Smillert ok($ignoreAE->eq("Perl","ePrl")); 154898184e3Ssthen} 155b39c5158Smillert 15691f110e0Safresh1# 54 157b39c5158Smillert 158898184e3Ssthen{ 159b39c5158Smillert my $undefAE = Unicode::Collate->new( 160b39c5158Smillert table => 'keys.txt', 161b39c5158Smillert normalization => undef, 162b39c5158Smillert undefChar => qr/^[aAeE]$/, 163b39c5158Smillert ); 164b39c5158Smillert ok($undefAE ->gt("edge","fog")); 165b39c5158Smillert ok($Collator->lt("edge","fog")); 166b39c5158Smillert ok($undefAE ->gt("lake","like")); 167b39c5158Smillert ok($Collator->lt("lake","like")); 168898184e3Ssthen} 169b39c5158Smillert 17091f110e0Safresh1# 58 171b39c5158Smillert 172898184e3Ssthen{ 173b39c5158Smillert my $dropArticles = Unicode::Collate->new( 174b39c5158Smillert table => "keys.txt", 175b39c5158Smillert normalization => undef, 176b39c5158Smillert preprocess => sub { 177b39c5158Smillert my $string = shift; 178b39c5158Smillert $string =~ s/\b(?:an?|the)\s+//ig; 179b39c5158Smillert $string; 180b39c5158Smillert }, 181b39c5158Smillert ); 182b39c5158Smillert ok($dropArticles->eq("camel", "a camel")); 183b39c5158Smillert ok($dropArticles->eq("Perl", "The Perl")); 184b39c5158Smillert ok($dropArticles->lt("the pen", "a pencil")); 185b39c5158Smillert ok($Collator->lt("Perl", "The Perl")); 186b39c5158Smillert ok($Collator->gt("the pen", "a pencil")); 187898184e3Ssthen} 188b39c5158Smillert 18991f110e0Safresh1# 63 19091f110e0Safresh1 191898184e3Ssthen{ 192898184e3Ssthen my $undefName = Unicode::Collate->new( 193b39c5158Smillert table => "keys.txt", 194b39c5158Smillert normalization => undef, 195b39c5158Smillert undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, 196b39c5158Smillert ); 197b39c5158Smillert # HIRAGANA and KATAKANA are made undefined via undefName. 198b39c5158Smillert # So they are after CJK Unified Ideographs. 199b39c5158Smillert 200898184e3Ssthen ok($undefName->lt("\x{4E00}", $hiragana)); 201898184e3Ssthen ok($undefName->lt("\x{4E03}", $katakana)); 202b39c5158Smillert ok($Collator ->gt("\x{4E00}", $hiragana)); 203b39c5158Smillert ok($Collator ->gt("\x{4E03}", $katakana)); 204898184e3Ssthen} 205b39c5158Smillert 20691f110e0Safresh1# 67 20791f110e0Safresh1 208898184e3Ssthen{ 209b39c5158Smillert my $O_str = Unicode::Collate->new( 210b39c5158Smillert table => "keys.txt", 211b39c5158Smillert normalization => undef, 212b39c5158Smillert entry => <<'ENTRIES', 213b39c5158Smillert0008 ; [*0008.0000.0000.0000] # BACKSPACE (need to be non-ignorable) 214b39c5158Smillert004F 0337 ; [.0B53.0020.0008.004F] # capital O WITH SHORT SOLIDUS OVERLAY 215b39c5158Smillert006F 0008 002F ; [.0B53.0020.0002.006F] # LATIN SMALL LETTER O WITH STROKE 216b39c5158Smillert004F 0008 002F ; [.0B53.0020.0008.004F] # LATIN CAPITAL LETTER O WITH STROKE 217b39c5158Smillert006F 0337 ; [.0B53.0020.0002.004F] # small O WITH SHORT SOLIDUS OVERLAY 218b39c5158Smillert200B ; [.2000.0000.0000.0000] # ZERO WIDTH SPACE (may be non-sense but ...) 219b39c5158Smillert#00F8 ; [.0B53.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE 220b39c5158Smillert#00D8 ; [.0B53.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE 221b39c5158SmillertENTRIES 222b39c5158Smillert ); 223b39c5158Smillert 224b39c5158Smillert my $o_BS_slash = _pack_U(0x006F, 0x0008, 0x002F); 225b39c5158Smillert my $O_BS_slash = _pack_U(0x004F, 0x0008, 0x002F); 226b39c5158Smillert my $o_sol = _pack_U(0x006F, 0x0337); 227b39c5158Smillert my $O_sol = _pack_U(0x004F, 0x0337); 228b39c5158Smillert my $o_stroke = _pack_U(0x00F8); 229b39c5158Smillert my $O_stroke = _pack_U(0x00D8); 230b39c5158Smillert 231b39c5158Smillert ok($O_str->eq($o_stroke, $o_BS_slash)); 232b39c5158Smillert ok($O_str->eq($O_stroke, $O_BS_slash)); 233b39c5158Smillert 234b39c5158Smillert ok($O_str->eq($o_stroke, $o_sol)); 235b39c5158Smillert ok($O_str->eq($O_stroke, $O_sol)); 236b39c5158Smillert 237b39c5158Smillert ok($Collator->eq("\x{200B}", "\0")); 238b39c5158Smillert ok($O_str ->gt("\x{200B}", "\0")); 239b39c5158Smillert ok($O_str ->gt("\x{200B}", "A")); 240898184e3Ssthen} 241b39c5158Smillert 24291f110e0Safresh1# 74 243b39c5158Smillert 244b39c5158Smillertmy %origVer = $Collator->change(UCA_Version => 8); 245b39c5158Smillert 246b39c5158Smillert$Collator->change(level => 3); 247b39c5158Smillert 248b39c5158Smillertok($Collator->gt("!\x{300}", "")); 249b39c5158Smillertok($Collator->gt("!\x{300}", "!")); 250b39c5158Smillertok($Collator->eq("!\x{300}", "\x{300}")); 251b39c5158Smillert 252b39c5158Smillert$Collator->change(level => 2); 253b39c5158Smillert 254b39c5158Smillertok($Collator->eq("!\x{300}", "\x{300}")); 255b39c5158Smillert 256b39c5158Smillert$Collator->change(level => 4); 257b39c5158Smillert 258b39c5158Smillertok($Collator->gt("!\x{300}", "!")); 259b39c5158Smillertok($Collator->lt("!\x{300}", "\x{300}")); 260b39c5158Smillert 261b39c5158Smillert$Collator->change(%origVer, level => 3); 262b39c5158Smillert 263b39c5158Smillertok($Collator->eq("!\x{300}", "")); 264b39c5158Smillertok($Collator->eq("!\x{300}", "!")); 265b39c5158Smillertok($Collator->lt("!\x{300}", "\x{300}")); 266b39c5158Smillert 267b39c5158Smillert$Collator->change(level => 4); 268b39c5158Smillert 269b39c5158Smillertok($Collator->gt("!\x{300}", "")); 270b39c5158Smillertok($Collator->eq("!\x{300}", "!")); 271b39c5158Smillert 27291f110e0Safresh1# 85 273b39c5158Smillert 274b39c5158Smillert$_ = 'Foo'; 275b39c5158Smillert 276b39c5158Smillertmy $c = Unicode::Collate->new( 277b39c5158Smillert table => 'keys.txt', 278b39c5158Smillert normalization => undef, 279b39c5158Smillert upper_before_lower => 1, 280b39c5158Smillert); 281b39c5158Smillert 282b39c5158Smillertok($_, 'Foo'); # fixed at v. 0.52; no longer clobber $_ 283b39c5158Smillert 284b39c5158Smillertmy($temp, @temp); # Not the result but the side effect matters. 285b39c5158Smillert 286b39c5158Smillert$_ = 'Foo'; 287b39c5158Smillert$temp = $c->getSortKey("abc"); 288b39c5158Smillertok($_, 'Foo'); 289b39c5158Smillert 290b39c5158Smillert$_ = 'Foo'; 291b39c5158Smillert$temp = $c->viewSortKey("abc"); 292b39c5158Smillertok($_, 'Foo'); 293b39c5158Smillert 294b39c5158Smillert$_ = 'Foo'; 295b39c5158Smillert@temp = $c->sort("abc", "xyz", "def"); 296b39c5158Smillertok($_, 'Foo'); 297b39c5158Smillert 298b39c5158Smillert$_ = 'Foo'; 299b39c5158Smillert@temp = $c->index("perl5", "RL"); 300b39c5158Smillertok($_, 'Foo'); 301b39c5158Smillert 302b39c5158Smillert$_ = 'Foo'; 303b39c5158Smillert@temp = $c->index("perl5", "LR"); 304b39c5158Smillertok($_, 'Foo'); 305b39c5158Smillert 30691f110e0Safresh1# 91 30791f110e0Safresh1 30891f110e0Safresh1{ 30991f110e0Safresh1 my $caseless = Unicode::Collate->new( 31091f110e0Safresh1 table => "keys.txt", 31191f110e0Safresh1 normalization => undef, 31291f110e0Safresh1 preprocess => sub { uc shift }, 31391f110e0Safresh1 ); 31491f110e0Safresh1 ok( $Collator->gt("ABC","abc") ); 31591f110e0Safresh1 ok( $caseless->eq("ABC","abc") ); 31691f110e0Safresh1} 31791f110e0Safresh1 31891f110e0Safresh1# 93 31991f110e0Safresh1 32091f110e0Safresh1{ 32191f110e0Safresh1 eval { require Unicode::Normalize; }; 32291f110e0Safresh1 if ($@) { 32391f110e0Safresh1 eval { my $n1 = Unicode::Collate->new(table => "keys.txt"); }; 32491f110e0Safresh1 ok($@ =~ /Unicode::Normalize is required/); 32591f110e0Safresh1 32691f110e0Safresh1 eval { my $n2 = Unicode::Collate->new 32791f110e0Safresh1 (table => "keys.txt", normalization => undef); }; 32891f110e0Safresh1 ok(!$@); 32991f110e0Safresh1 33091f110e0Safresh1 eval { my $n3 = Unicode::Collate->new 33191f110e0Safresh1 (table => "keys.txt", normalization => 'prenormalized'); }; 33291f110e0Safresh1 ok($@ =~ /Unicode::Normalize is required/); 33391f110e0Safresh1 } else { 33491f110e0Safresh1 ok(1) for 1..3; 33591f110e0Safresh1 } 33691f110e0Safresh1} 33791f110e0Safresh1 33891f110e0Safresh1# 96 339b39c5158Smillert 340