1
2BEGIN {
3    if ($ENV{PERL_CORE}) {
4	chdir('t') if -d 't';
5	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6    }
7}
8
9use strict;
10use warnings;
11BEGIN { $| = 1; print "1..17\n"; }
12my $count = 0;
13sub ok ($;$) {
14    my $p = my $r = shift;
15    if (@_) {
16	my $x = shift;
17	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18    }
19    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
22use Unicode::Collate;
23
24ok(1);
25
26sub _pack_U   { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
29#########################
30
31my $code = sub {
32    my $line = shift;
33    $line =~ s/\[\.0000\..{4}\..{4}([.\]])/[.0000.0000.0000$1/g;
34    return $line;
35  };
36
37#####
38
39my $Collator = Unicode::Collate->new(
40  table => 'keys.txt', normalization => undef, rewrite => $code,
41);
42
43ok($Collator->eq("camel", "came\x{300}l"));
44ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
45ok($Collator->lt("camel", "Camel"));
46{
47  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
48  $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
49  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
50}
51
52# 5
53
54my $rewriteDUCET = Unicode::Collate->new(
55  normalization => undef, rewrite => $code,
56);
57
58ok($rewriteDUCET->eq("camel", "came\x{300}l"));
59ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l"));
60ok($rewriteDUCET->lt("camel", "Camel"));
61{
62  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
63  $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" });
64  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
65}
66
67# 9
68
69my $undef_hira = Unicode::Collate->new(
70  table => 'keys.txt',
71  normalization => undef,
72  level => 1,
73  rewrite => sub {
74    my $line = shift;
75    return '' if $line =~ /HIRAGANA/;
76    return $line;
77  },
78);
79
80my $hiragana = "\x{3042}\x{3044}";
81my $katakana = "\x{30A2}\x{30A4}";
82my $cjkkanji = "\x{4E00}";
83
84# HIRAGANA are undefined via rewrite
85# So they are after CJK Unified Ideographs.
86
87ok($undef_hira->lt("abc", "perl"));
88ok($undef_hira->lt("", "ABC"));
89ok($undef_hira->lt($katakana, $hiragana));
90ok($undef_hira->lt($katakana, $cjkkanji));
91ok($undef_hira->lt($cjkkanji, $hiragana));
92
93$Collator->change(level => 1);
94ok($Collator->eq($katakana, $hiragana));
95ok($Collator->lt($katakana, $cjkkanji));
96ok($Collator->gt($cjkkanji, $hiragana));
97
98# 17
99