1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..91\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31our $IsEBCDIC = ord("A") != 0x41; 32 33my $Collator = Unicode::Collate->new( 34 table => 'keys.txt', 35 normalization => undef, 36); 37 38##### 1 39 40my %old_level = $Collator->change(level => 2); 41 42my $str; 43 44my $orig = "This is a Perl book."; 45my $sub = "PERL"; 46my $rep = "camel"; 47my $ret = "This is a camel book."; 48 49$str = $orig; 50if (my($pos,$len) = $Collator->index($str, $sub)) { 51 substr($str, $pos, $len, $rep); 52} 53 54ok($str, $ret); 55 56$Collator->change(%old_level); 57 58$str = $orig; 59if (my($pos,$len) = $Collator->index($str, $sub)) { 60 substr($str, $pos, $len, $rep); 61} 62 63ok($str, $orig); 64 65##### 3 66 67my $match; 68 69$Collator->change(level => 1); 70 71$str = "Pe\x{300}rl"; 72$sub = "pe"; 73$ret = "Pe\x{300}"; 74$match = undef; 75if (my($pos, $len) = $Collator->index($str, $sub)) { 76 $match = substr($str, $pos, $len); 77} 78ok($match, $ret); 79 80$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; 81$sub = "pE"; 82$ret = "P\x{300}e\x{300}\x{301}\x{303}"; 83$match = undef; 84if (my($pos, $len) = $Collator->index($str, $sub)) { 85 $match = substr($str, $pos, $len); 86} 87ok($match, $ret); 88 89$Collator->change(level => 2); 90 91$str = "Pe\x{300}rl"; 92$sub = "pe"; 93$ret = undef; 94$match = undef; 95if (my($pos, $len) = $Collator->index($str, $sub)) { 96 $match = substr($str, $pos, $len); 97} 98ok($match, $ret); 99 100$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; 101$sub = "pE"; 102$ret = undef; 103$match = undef; 104if (my($pos, $len) = $Collator->index($str, $sub)) { 105 $match = substr($str, $pos, $len); 106} 107ok($match, $ret); 108 109$str = "Pe\x{300}rl"; 110$sub = "pe\x{300}"; 111$ret = "Pe\x{300}"; 112$match = undef; 113if (my($pos, $len) = $Collator->index($str, $sub)) { 114 $match = substr($str, $pos, $len); 115} 116ok($match, $ret); 117 118$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; 119$sub = "p\x{300}E\x{300}\x{301}\x{303}"; 120$ret = "P\x{300}e\x{300}\x{301}\x{303}"; 121$match = undef; 122if (my($pos, $len) = $Collator->index($str, $sub)) { 123 $match = substr($str, $pos, $len); 124} 125ok($match, $ret); 126 127##### 9 128 129$Collator->change(level => 1); 130 131$str = $IsEBCDIC 132 ? "Ich mu\x{0059} studieren Perl." 133 : "Ich mu\x{00DF} studieren Perl."; 134$sub = $IsEBCDIC 135 ? "m\x{00DC}ss" 136 : "m\x{00FC}ss"; 137$ret = $IsEBCDIC 138 ? "mu\x{0059}" 139 : "mu\x{00DF}"; 140$match = undef; 141if (my($pos, $len) = $Collator->index($str, $sub)) { 142 $match = substr($str, $pos, $len); 143} 144ok($match, $ret); 145 146$Collator->change(%old_level); 147 148$match = undef; 149if (my($pos, $len) = $Collator->index($str, $sub)) { 150 $match = substr($str, $pos, $len); 151} 152ok($match, undef); 153 154$match = undef; 155if (my($pos,$len) = $Collator->index("", "")) { 156 $match = substr("", $pos, $len); 157} 158ok($match, ""); 159 160$match = undef; 161if (my($pos,$len) = $Collator->index("", "abc")) { 162 $match = substr("", $pos, $len); 163} 164ok($match, undef); 165 166##### 13 167 168$Collator->change(level => 1); 169 170$str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA"; 171$sub = "e"; 172$ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0"; 173$match = undef; 174if (my($pos, $len) = $Collator->index($str, $sub)) { 175 $match = substr($str, $pos, $len); 176} 177ok($match, $ret); 178 179$Collator->change(level => 1); 180 181$str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe"; 182$sub = "e"; 183$ret = "e\0\cA\x{300}\0\cA"; 184$match = undef; 185if (my($pos, $len) = $Collator->index($str, $sub)) { 186 $match = substr($str, $pos, $len); 187} 188ok($match, $ret); 189 190 191$Collator->change(%old_level); 192 193$str = "e\x{300}"; 194$sub = "e"; 195$ret = undef; 196$match = undef; 197if (my($pos, $len) = $Collator->index($str, $sub)) { 198 $match = substr($str, $pos, $len); 199} 200ok($match, $ret); 201 202##### 16 203 204$Collator->change(level => 1); 205 206$str = "The Perl is a language, and the perl is an interpreter."; 207$sub = "PERL"; 208 209$match = undef; 210if (my($pos, $len) = $Collator->index($str, $sub, -40)) { 211 $match = substr($str, $pos, $len); 212} 213ok($match, "Perl"); 214 215$match = undef; 216if (my($pos, $len) = $Collator->index($str, $sub, 4)) { 217 $match = substr($str, $pos, $len); 218} 219ok($match, "Perl"); 220 221$match = undef; 222if (my($pos, $len) = $Collator->index($str, $sub, 5)) { 223 $match = substr($str, $pos, $len); 224} 225ok($match, "perl"); 226 227$match = undef; 228if (my($pos, $len) = $Collator->index($str, $sub, 32)) { 229 $match = substr($str, $pos, $len); 230} 231ok($match, "perl"); 232 233$match = undef; 234if (my($pos, $len) = $Collator->index($str, $sub, 33)) { 235 $match = substr($str, $pos, $len); 236} 237ok($match, undef); 238 239$match = undef; 240if (my($pos, $len) = $Collator->index($str, $sub, 100)) { 241 $match = substr($str, $pos, $len); 242} 243ok($match, undef); 244 245$Collator->change(%old_level); 246 247##### 22 248 249my @ret; 250 251$Collator->change(level => 1); 252 253$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 254ok($ret); 255ok($$ret eq "P\cBe\x{300}\cB"); 256 257@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 258ok($ret[0], "P\cBe\x{300}\cB"); 259 260$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; 261$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss"; 262 263($ret) = $Collator->match($str, $sub); 264ok($ret, $str); 265 266$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; 267$sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s"; 268 269($ret) = $Collator->match($str, $sub); 270ok($ret, undef); 271 272$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 273ok($ret eq "P\cBe\x{300}\cB:pe:PE"); 274 275$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 276ok($ret == 3); 277 278$str = "ABCDEF"; 279$sub = "cde"; 280$ret = $Collator->match($str, $sub); 281$str = "01234567"; 282ok($ret && $$ret, "CDE"); 283 284$str = "ABCDEF"; 285$sub = "cde"; 286($ret) = $Collator->match($str, $sub); 287$str = "01234567"; 288ok($ret, "CDE"); 289 290 291$Collator->change(level => 3); 292 293$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 294ok($ret, undef); 295 296@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 297ok(@ret == 0); 298 299$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe"); 300ok($ret eq ""); 301 302$ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe"); 303ok($ret == 0); 304 305$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 306ok($ret eq "pe"); 307 308$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 309ok($ret == 1); 310 311$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; 312$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss"; 313 314($ret) = $Collator->match($str, $sub); 315ok($ret, undef); 316 317$Collator->change(%old_level); 318 319##### 38 320 321$Collator->change(level => 1); 322 323sub strreverse { scalar reverse shift } 324 325$str = "P\cBe\x{300}\cBrl and PERL."; 326$ret = $Collator->subst($str, "perl", 'Camel'); 327ok($ret, 1); 328ok($str, "Camel and PERL."); 329 330$str = "P\cBe\x{300}\cBrl and PERL."; 331$ret = $Collator->subst($str, "perl", \&strreverse); 332ok($ret, 1); 333ok($str, "lr\cB\x{300}e\cBP and PERL."); 334 335$str = "P\cBe\x{300}\cBrl and PERL."; 336$ret = $Collator->gsubst($str, "perl", 'Camel'); 337ok($ret, 2); 338ok($str, "Camel and Camel."); 339 340$str = "P\cBe\x{300}\cBrl and PERL."; 341$ret = $Collator->gsubst($str, "perl", \&strreverse); 342ok($ret, 2); 343ok($str, "lr\cB\x{300}e\cBP and LREP."); 344 345$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; 346$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); 347ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> " 348 . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>..."); 349 350##### 47 351 352# http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html 353# when the substring includes an ignorable element like a space... 354 355$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; 356$Collator->gsubst($str, "camel horse", sub { "<b>$_[0]</b>" }); 357ok($str, "Camel donkey zebra came\x{301}l <b>CAMEL horse</b> cAm\0E\0L..."); 358 359$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 360$Collator->gsubst($str, "camel horse", sub { "=$_[0]=" }); 361ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 362 363$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 364$Collator->gsubst($str, "camel-horse", sub { "=$_[0]=" }); 365ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 366 367$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 368$Collator->gsubst($str, "camelhorse", sub { "=$_[0]=" }); 369ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 370 371$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 372$Collator->gsubst($str, " ca mel hor se ", sub { "=$_[0]=" }); 373ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 374 375$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 376$Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" }); 377ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 378 379##### 53 380 381$Collator->change(level => 3); 382 383$str = "P\cBe\x{300}\cBrl and PERL."; 384$ret = $Collator->subst($str, "perl", "Camel"); 385ok(! $ret); 386ok($str, "P\cBe\x{300}\cBrl and PERL."); 387 388$str = "P\cBe\x{300}\cBrl and PERL."; 389$ret = $Collator->subst($str, "perl", \&strreverse); 390ok(! $ret); 391ok($str, "P\cBe\x{300}\cBrl and PERL."); 392 393$str = "P\cBe\x{300}\cBrl and PERL."; 394$ret = $Collator->gsubst($str, "perl", "Camel"); 395ok($ret, 0); 396ok($str, "P\cBe\x{300}\cBrl and PERL."); 397 398$str = "P\cBe\x{300}\cBrl and PERL."; 399$ret = $Collator->gsubst($str, "perl", \&strreverse); 400ok($ret, 0); 401ok($str, "P\cBe\x{300}\cBrl and PERL."); 402 403$Collator->change(%old_level); 404 405##### 61 406 407$str = "Perl and Camel"; 408$ret = $Collator->gsubst($str, "\cA\cA\0", "AB"); 409ok($ret, 15); 410ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB"); 411 412$str = ''; 413$ret = $Collator->subst($str, "", "ABC"); 414ok($ret, 1); 415ok($str, "ABC"); 416 417$str = ''; 418$ret = $Collator->gsubst($str, "", "ABC"); 419ok($ret, 1); 420ok($str, "ABC"); 421 422$str = 'PPPPP'; 423$ret = $Collator->gsubst($str, 'PP', "ABC"); 424ok($ret, 2); 425ok($str, "ABCABCP"); 426 427##### 69 428 429# Shifted; ignorable after variable 430 431($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!"); 432ok($ret, "?\x{300}!\x{301}\x{344}"); 433 434$Collator->change(alternate => 'Non-ignorable'); 435 436($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!"); 437ok($ret, undef); 438 439##### 71 440 441# Now preprocess is defined. 442 443$Collator->change(preprocess => sub {''}); 444 445eval { $Collator->index("", "") }; 446ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 447 448eval { $Collator->index("a", "a") }; 449ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 450 451eval { $Collator->match("", "") }; 452ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 453 454eval { $Collator->match("a", "a") }; 455ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 456 457$Collator->change(preprocess => sub { uc shift }); 458 459eval { $Collator->index("", "") }; 460ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 461 462eval { $Collator->index("a", "a") }; 463ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 464 465eval { $Collator->match("", "") }; 466ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 467 468eval { $Collator->match("a", "a") }; 469ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 470 471##### 79 472 473eval { require Unicode::Normalize }; 474my $has_norm = !$@; 475 476if ($has_norm) { 477 # Now preprocess and normalization are defined. 478 479 $Collator->change(normalization => 'NFD'); 480 481 eval { $Collator->index("", "") }; 482 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 483 484 eval { $Collator->index("a", "a") }; 485 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 486 487 eval { $Collator->match("", "") }; 488 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 489 490 eval { $Collator->match("a", "a") }; 491 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 492} else { 493 ok(1) for 1..4; 494} 495 496$Collator->change(preprocess => undef); 497 498if ($has_norm) { 499 # Now only normalization is defined. 500 501 eval { $Collator->index("", "") }; 502 ok($@ && $@ =~ /Don't use Normalization with index\(\)/); 503 504 eval { $Collator->index("a", "a") }; 505 ok($@ && $@ =~ /Don't use Normalization with index\(\)/); 506 507 eval { $Collator->match("", "") }; 508 ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/); 509 510 eval { $Collator->match("a", "a") }; 511 ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/); 512 513 $Collator->change(normalization => undef); 514} else { 515 ok(1) for 1..4; 516} 517 518##### 87 519 520# Now preprocess and normalization are undef. 521 522eval { $Collator->index("", "") }; 523ok(!$@); 524 525eval { $Collator->index("a", "a") }; 526ok(!$@); 527 528eval { $Collator->match("", "") }; 529ok(!$@); 530 531eval { $Collator->match("a", "a") }; 532ok(!$@); 533 534##### 91 535