1
2BEGIN {
3    if ($ENV{PERL_CORE}) {
4	chdir('t') if -d 't';
5	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6    }
7}
8
9use strict;
10use warnings;
11BEGIN { $| = 1; print "1..91\n"; }
12my $count = 0;
13sub ok ($;$) {
14    my $p = my $r = shift;
15    if (@_) {
16	my $x = shift;
17	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18    }
19    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
22use Unicode::Collate;
23
24ok(1);
25
26sub _pack_U   { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
29#########################
30
31our $IsEBCDIC = ord("A") != 0x41;
32
33my $Collator = Unicode::Collate->new(
34  table => 'keys.txt',
35  normalization => undef,
36);
37
38##### 1
39
40my %old_level = $Collator->change(level => 2);
41
42my $str;
43
44my $orig = "This is a Perl book.";
45my $sub = "PERL";
46my $rep = "camel";
47my $ret = "This is a camel book.";
48
49$str = $orig;
50if (my($pos,$len) = $Collator->index($str, $sub)) {
51  substr($str, $pos, $len, $rep);
52}
53
54ok($str, $ret);
55
56$Collator->change(%old_level);
57
58$str = $orig;
59if (my($pos,$len) = $Collator->index($str, $sub)) {
60  substr($str, $pos, $len, $rep);
61}
62
63ok($str, $orig);
64
65##### 3
66
67my $match;
68
69$Collator->change(level => 1);
70
71$str = "Pe\x{300}rl";
72$sub = "pe";
73$ret = "Pe\x{300}";
74$match = undef;
75if (my($pos, $len) = $Collator->index($str, $sub)) {
76    $match = substr($str, $pos, $len);
77}
78ok($match, $ret);
79
80$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
81$sub = "pE";
82$ret = "P\x{300}e\x{300}\x{301}\x{303}";
83$match = undef;
84if (my($pos, $len) = $Collator->index($str, $sub)) {
85    $match = substr($str, $pos, $len);
86}
87ok($match, $ret);
88
89$Collator->change(level => 2);
90
91$str = "Pe\x{300}rl";
92$sub = "pe";
93$ret = undef;
94$match = undef;
95if (my($pos, $len) = $Collator->index($str, $sub)) {
96    $match = substr($str, $pos, $len);
97}
98ok($match, $ret);
99
100$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
101$sub = "pE";
102$ret = undef;
103$match = undef;
104if (my($pos, $len) = $Collator->index($str, $sub)) {
105    $match = substr($str, $pos, $len);
106}
107ok($match, $ret);
108
109$str = "Pe\x{300}rl";
110$sub = "pe\x{300}";
111$ret = "Pe\x{300}";
112$match = undef;
113if (my($pos, $len) = $Collator->index($str, $sub)) {
114    $match = substr($str, $pos, $len);
115}
116ok($match, $ret);
117
118$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
119$sub = "p\x{300}E\x{300}\x{301}\x{303}";
120$ret = "P\x{300}e\x{300}\x{301}\x{303}";
121$match = undef;
122if (my($pos, $len) = $Collator->index($str, $sub)) {
123    $match = substr($str, $pos, $len);
124}
125ok($match, $ret);
126
127##### 9
128
129$Collator->change(level => 1);
130
131$str = $IsEBCDIC
132    ? "Ich mu\x{0059} studieren Perl."
133    : "Ich mu\x{00DF} studieren Perl.";
134$sub = $IsEBCDIC
135    ? "m\x{00DC}ss"
136    : "m\x{00FC}ss";
137$ret = $IsEBCDIC
138    ? "mu\x{0059}"
139    : "mu\x{00DF}";
140$match = undef;
141if (my($pos, $len) = $Collator->index($str, $sub)) {
142    $match = substr($str, $pos, $len);
143}
144ok($match, $ret);
145
146$Collator->change(%old_level);
147
148$match = undef;
149if (my($pos, $len) = $Collator->index($str, $sub)) {
150    $match = substr($str, $pos, $len);
151}
152ok($match, undef);
153
154$match = undef;
155if (my($pos,$len) = $Collator->index("", "")) {
156    $match = substr("", $pos, $len);
157}
158ok($match, "");
159
160$match = undef;
161if (my($pos,$len) = $Collator->index("", "abc")) {
162    $match = substr("", $pos, $len);
163}
164ok($match, undef);
165
166##### 13
167
168$Collator->change(level => 1);
169
170$str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA";
171$sub = "e";
172$ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0";
173$match = undef;
174if (my($pos, $len) = $Collator->index($str, $sub)) {
175    $match = substr($str, $pos, $len);
176}
177ok($match, $ret);
178
179$Collator->change(level => 1);
180
181$str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe";
182$sub = "e";
183$ret = "e\0\cA\x{300}\0\cA";
184$match = undef;
185if (my($pos, $len) = $Collator->index($str, $sub)) {
186    $match = substr($str, $pos, $len);
187}
188ok($match, $ret);
189
190
191$Collator->change(%old_level);
192
193$str = "e\x{300}";
194$sub = "e";
195$ret = undef;
196$match = undef;
197if (my($pos, $len) = $Collator->index($str, $sub)) {
198    $match = substr($str, $pos, $len);
199}
200ok($match, $ret);
201
202##### 16
203
204$Collator->change(level => 1);
205
206$str = "The Perl is a language, and the perl is an interpreter.";
207$sub = "PERL";
208
209$match = undef;
210if (my($pos, $len) = $Collator->index($str, $sub, -40)) {
211    $match = substr($str, $pos, $len);
212}
213ok($match, "Perl");
214
215$match = undef;
216if (my($pos, $len) = $Collator->index($str, $sub, 4)) {
217    $match = substr($str, $pos, $len);
218}
219ok($match, "Perl");
220
221$match = undef;
222if (my($pos, $len) = $Collator->index($str, $sub, 5)) {
223    $match = substr($str, $pos, $len);
224}
225ok($match, "perl");
226
227$match = undef;
228if (my($pos, $len) = $Collator->index($str, $sub, 32)) {
229    $match = substr($str, $pos, $len);
230}
231ok($match, "perl");
232
233$match = undef;
234if (my($pos, $len) = $Collator->index($str, $sub, 33)) {
235    $match = substr($str, $pos, $len);
236}
237ok($match, undef);
238
239$match = undef;
240if (my($pos, $len) = $Collator->index($str, $sub, 100)) {
241    $match = substr($str, $pos, $len);
242}
243ok($match, undef);
244
245$Collator->change(%old_level);
246
247##### 22
248
249my @ret;
250
251$Collator->change(level => 1);
252
253$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
254ok($ret);
255ok($$ret eq "P\cBe\x{300}\cB");
256
257@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
258ok($ret[0], "P\cBe\x{300}\cB");
259
260$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
261$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
262
263($ret) = $Collator->match($str, $sub);
264ok($ret, $str);
265
266$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
267$sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s";
268
269($ret) = $Collator->match($str, $sub);
270ok($ret, undef);
271
272$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
273ok($ret eq "P\cBe\x{300}\cB:pe:PE");
274
275$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
276ok($ret == 3);
277
278$str = "ABCDEF";
279$sub = "cde";
280$ret = $Collator->match($str, $sub);
281$str = "01234567";
282ok($ret && $$ret, "CDE");
283
284$str = "ABCDEF";
285$sub = "cde";
286($ret) = $Collator->match($str, $sub);
287$str = "01234567";
288ok($ret, "CDE");
289
290
291$Collator->change(level => 3);
292
293$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
294ok($ret, undef);
295
296@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
297ok(@ret == 0);
298
299$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
300ok($ret eq "");
301
302$ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
303ok($ret == 0);
304
305$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
306ok($ret eq "pe");
307
308$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
309ok($ret == 1);
310
311$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
312$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
313
314($ret) = $Collator->match($str, $sub);
315ok($ret, undef);
316
317$Collator->change(%old_level);
318
319##### 38
320
321$Collator->change(level => 1);
322
323sub strreverse { scalar reverse shift }
324
325$str = "P\cBe\x{300}\cBrl and PERL.";
326$ret = $Collator->subst($str, "perl", 'Camel');
327ok($ret, 1);
328ok($str, "Camel and PERL.");
329
330$str = "P\cBe\x{300}\cBrl and PERL.";
331$ret = $Collator->subst($str, "perl", \&strreverse);
332ok($ret, 1);
333ok($str, "lr\cB\x{300}e\cBP and PERL.");
334
335$str = "P\cBe\x{300}\cBrl and PERL.";
336$ret = $Collator->gsubst($str, "perl", 'Camel');
337ok($ret, 2);
338ok($str, "Camel and Camel.");
339
340$str = "P\cBe\x{300}\cBrl and PERL.";
341$ret = $Collator->gsubst($str, "perl", \&strreverse);
342ok($ret, 2);
343ok($str, "lr\cB\x{300}e\cBP and LREP.");
344
345$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
346$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
347ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> "
348	. "<b>CAMEL</b> horse <b>cAm\0E\0L</b>...");
349
350##### 47
351
352# http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html
353# when the substring includes an ignorable element like a space...
354
355$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
356$Collator->gsubst($str, "camel horse", sub { "<b>$_[0]</b>" });
357ok($str, "Camel donkey zebra came\x{301}l <b>CAMEL horse</b> cAm\0E\0L...");
358
359$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
360$Collator->gsubst($str, "camel horse", sub { "=$_[0]=" });
361ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
362
363$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
364$Collator->gsubst($str, "camel-horse", sub { "=$_[0]=" });
365ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
366
367$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
368$Collator->gsubst($str, "camelhorse", sub { "=$_[0]=" });
369ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
370
371$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
372$Collator->gsubst($str, "  ca  mel  hor  se  ", sub { "=$_[0]=" });
373ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
374
375$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
376$Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" });
377ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
378
379##### 53
380
381$Collator->change(level => 3);
382
383$str = "P\cBe\x{300}\cBrl and PERL.";
384$ret = $Collator->subst($str, "perl", "Camel");
385ok(! $ret);
386ok($str, "P\cBe\x{300}\cBrl and PERL.");
387
388$str = "P\cBe\x{300}\cBrl and PERL.";
389$ret = $Collator->subst($str, "perl", \&strreverse);
390ok(! $ret);
391ok($str, "P\cBe\x{300}\cBrl and PERL.");
392
393$str = "P\cBe\x{300}\cBrl and PERL.";
394$ret = $Collator->gsubst($str, "perl", "Camel");
395ok($ret, 0);
396ok($str, "P\cBe\x{300}\cBrl and PERL.");
397
398$str = "P\cBe\x{300}\cBrl and PERL.";
399$ret = $Collator->gsubst($str, "perl", \&strreverse);
400ok($ret, 0);
401ok($str, "P\cBe\x{300}\cBrl and PERL.");
402
403$Collator->change(%old_level);
404
405##### 61
406
407$str = "Perl and Camel";
408$ret = $Collator->gsubst($str, "\cA\cA\0", "AB");
409ok($ret, 15);
410ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB");
411
412$str = '';
413$ret = $Collator->subst($str, "", "ABC");
414ok($ret, 1);
415ok($str, "ABC");
416
417$str = '';
418$ret = $Collator->gsubst($str, "", "ABC");
419ok($ret, 1);
420ok($str, "ABC");
421
422$str = 'PPPPP';
423$ret = $Collator->gsubst($str, 'PP', "ABC");
424ok($ret, 2);
425ok($str, "ABCABCP");
426
427##### 69
428
429# Shifted; ignorable after variable
430
431($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!");
432ok($ret, "?\x{300}!\x{301}\x{344}");
433
434$Collator->change(alternate => 'Non-ignorable');
435
436($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!");
437ok($ret, undef);
438
439##### 71
440
441# Now preprocess is defined.
442
443$Collator->change(preprocess => sub {''});
444
445eval { $Collator->index("", "") };
446ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
447
448eval { $Collator->index("a", "a") };
449ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
450
451eval { $Collator->match("", "") };
452ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
453
454eval { $Collator->match("a", "a") };
455ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
456
457$Collator->change(preprocess => sub { uc shift });
458
459eval { $Collator->index("", "") };
460ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
461
462eval { $Collator->index("a", "a") };
463ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
464
465eval { $Collator->match("", "") };
466ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
467
468eval { $Collator->match("a", "a") };
469ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
470
471##### 79
472
473eval { require Unicode::Normalize };
474my $has_norm = !$@;
475
476if ($has_norm) {
477    # Now preprocess and normalization are defined.
478
479    $Collator->change(normalization => 'NFD');
480
481    eval { $Collator->index("", "") };
482    ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
483
484    eval { $Collator->index("a", "a") };
485    ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
486
487    eval { $Collator->match("", "") };
488    ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
489
490    eval { $Collator->match("a", "a") };
491    ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
492} else {
493    ok(1) for 1..4;
494}
495
496$Collator->change(preprocess => undef);
497
498if ($has_norm) {
499    # Now only normalization is defined.
500
501    eval { $Collator->index("", "") };
502    ok($@ && $@ =~ /Don't use Normalization with index\(\)/);
503
504    eval { $Collator->index("a", "a") };
505    ok($@ && $@ =~ /Don't use Normalization with index\(\)/);
506
507    eval { $Collator->match("", "") };
508    ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/);
509
510    eval { $Collator->match("a", "a") };
511    ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/);
512
513    $Collator->change(normalization => undef);
514} else {
515    ok(1) for 1..4;
516}
517
518##### 87
519
520# Now preprocess and normalization are undef.
521
522eval { $Collator->index("", "") };
523ok(!$@);
524
525eval { $Collator->index("a", "a") };
526ok(!$@);
527
528eval { $Collator->match("", "") };
529ok(!$@);
530
531eval { $Collator->match("a", "a") };
532ok(!$@);
533
534##### 91
535