1
2BEGIN {
3    unless (5.008 <= $]) {
4	print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n";
5	print $@;
6	exit;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..90\n"; }
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31sub _pack_U   { Unicode::Collate::pack_U(@_) }
32sub _unpack_U { Unicode::Collate::unpack_U(@_) }
33
34#########################
35
36no warnings 'utf8';
37
38# Unicode 6.0 Sorting
39#
40# Special Database Values. The data files for CLDR provide
41# special weights for two noncharacters:
42#
43# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range
44#    in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings
45#    starting with "sch" plus those that sort equivalently.
46# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields,
47#    allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John".
48
49my $entry = <<'ENTRIES';
50FFFE  ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE>
51FFFF  ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF>
52ENTRIES
53
54my @disilva = ("di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva");
55my @dsf = map "$_\x{FFFE}Fred", @disilva;
56my @dsj = map "$_\x{FFFE}John", @disilva;
57my @dsJ = map        "$_ John", @disilva;
58
59for my $norm (undef, 'NFD') {
60    if (defined $norm) {
61	eval { require Unicode::Normalize };
62	if ($@) {
63	    ok(1) for 1..34; # silent skip
64	    next;
65	}
66    }
67
68    my $coll = Unicode::Collate->new(
69	table => 'keys.txt',
70	level => 1,
71	normalization => $norm,
72	UCA_Version => 22,
73	entry => $entry,
74    );
75
76    # 1..4
77    ok($coll->lt("\x{FFFD}",   "\x{FFFF}"));
78    ok($coll->lt("\x{1FFFD}",  "\x{1FFFF}"));
79    ok($coll->lt("\x{2FFFD}",  "\x{2FFFF}"));
80    ok($coll->lt("\x{10FFFD}", "\x{10FFFF}"));
81
82    # 5..14
83    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
84    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
85    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
86    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
87    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
88    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
89    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
90    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
91    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
92    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
93
94    # 15..16
95    ok($coll->gt("perl\x{FFFF}AB", "perl\x{FFFF}"));
96    ok($coll->lt("perl\x{FFFF}\x{10FFFF}", "perl\x{FFFF}\x{FFFF}"));
97
98    $coll->change(level => 4);
99
100    # 17..25
101    for my $i (0 .. $#disilva - 1) {
102	ok($coll->lt($dsf[$i], $dsf[$i+1]));
103	ok($coll->lt($dsj[$i], $dsj[$i+1]));
104	ok($coll->lt($dsJ[$i], $dsJ[$i+1]));
105    }
106
107    # 26
108    ok($coll->lt($dsf[-1], $dsj[0]));
109
110    $coll->change(level => 1);
111
112    # 27..34
113    for my $i (0 .. $#disilva) {
114	ok($coll->lt($dsf[$i], $dsJ[$i]));
115	ok($coll->lt($dsj[$i], $dsJ[$i]));
116    }
117}
118
119# 69
120
121{
122    my $coll = Unicode::Collate->new(
123	table => 'keys.txt',
124	normalization => undef,
125	highestFFFF => 1,
126	minimalFFFE => 1,
127    );
128
129    $coll->change(level => 1);
130    ok($coll->lt("perl\x{FFFD}",   "perl\x{FFFF}"));
131    ok($coll->lt("perl\x{1FFFD}",  "perl\x{FFFF}"));
132    ok($coll->lt("perl\x{1FFFE}",  "perl\x{FFFF}"));
133    ok($coll->lt("perl\x{1FFFF}",  "perl\x{FFFF}"));
134    ok($coll->lt("perl\x{2FFFD}",  "perl\x{FFFF}"));
135    ok($coll->lt("perl\x{2FFFE}",  "perl\x{FFFF}"));
136    ok($coll->lt("perl\x{2FFFF}",  "perl\x{FFFF}"));
137    ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
138    ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
139    ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
140
141# 79
142
143    $coll->change(level => 3);
144    my @list = (
145	"ab\x{FFFE}a",
146	"Ab\x{FFFE}a",
147	"ab\x{FFFE}c",
148	"Ab\x{FFFE}c",
149	"ab\x{FFFE}xyz",
150	"abc\x{FFFE}def",
151	"abc\x{FFFE}xYz",
152	"aBc\x{FFFE}xyz",
153	"abcX\x{FFFE}def",
154	"abcx\x{FFFE}xyz",
155	"b\x{FFFE}aaa",
156	"bbb\x{FFFE}a",
157    );
158    my $p = shift @list;
159    for my $c (@list) {
160	ok($coll->lt($p, $c));
161	$p = $c;
162    }
163}
164
165# 90
166