1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..17\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36my $code = sub {
37    my $line = shift;
38    $line =~ s/\[\.0000\..{4}\..{4}([.\]])/[.0000.0000.0000$1/g;
39    return $line;
40  };
41
42#####
43
44my $Collator = Unicode::Collate->new(
45  table => 'keys.txt', normalization => undef, rewrite => $code,
46);
47
48ok($Collator->eq("camel", "came\x{300}l"));
49ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
50ok($Collator->lt("camel", "Camel"));
51{
52  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
53  $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
54  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
55}
56
57# 5
58
59my $rewriteDUCET = Unicode::Collate->new(
60  normalization => undef, rewrite => $code,
61);
62
63ok($rewriteDUCET->eq("camel", "came\x{300}l"));
64ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l"));
65ok($rewriteDUCET->lt("camel", "Camel"));
66{
67  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
68  $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" });
69  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
70}
71
72# 9
73
74my $undef_hira = Unicode::Collate->new(
75  table => 'keys.txt',
76  normalization => undef,
77  level => 1,
78  rewrite => sub {
79    my $line = shift;
80    return '' if $line =~ /HIRAGANA/;
81    return $line;
82  },
83);
84
85my $hiragana = "\x{3042}\x{3044}";
86my $katakana = "\x{30A2}\x{30A4}";
87my $cjkkanji = "\x{4E00}";
88
89# HIRAGANA are undefined via rewrite
90# So they are after CJK Unified Ideographs.
91
92ok($undef_hira->lt("abc", "perl"));
93ok($undef_hira->lt("", "ABC"));
94ok($undef_hira->lt($katakana, $hiragana));
95ok($undef_hira->lt($katakana, $cjkkanji));
96ok($undef_hira->lt($cjkkanji, $hiragana));
97
98$Collator->change(level => 1);
99ok($Collator->eq($katakana, $hiragana));
100ok($Collator->lt($katakana, $cjkkanji));
101ok($Collator->gt($cjkkanji, $hiragana));
102
103# 17
104