1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17 18BEGIN { 19 use Unicode::Collate; 20 21# unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { 22# print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". 23# " needed for this test\n"; 24 unless (5.008 <= $]) { 25 print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n"; 26 print $@; 27 exit; 28 } 29} 30 31use strict; 32use warnings; 33BEGIN { $| = 1; print "1..156\n"; } # 81 + 5 x @Versions 34my $count = 0; 35sub ok ($;$) { 36 my $p = my $r = shift; 37 if (@_) { 38 my $x = shift; 39 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 40 } 41 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 42} 43 44ok(1); 45 46######################### 47 48no warnings 'utf8'; 49 50# NULL is tailorable but illegal code points are not. 51# illegal code points should be always ingored 52# (cf. UCA, 7.1.1 Illegal code points). 53 54my $entry = <<'ENTRIES'; 550000 ; [.0020.0000.0000.0000] # [0000] NULL 560001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 57FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 58FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 59D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 60DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 61FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 62FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 630002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 6410FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 65110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 660041 ; [.1000.0020.0008.0041] # latin A 670041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 680041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 69ENTRIES 70 71################## 72 73my $illeg = Unicode::Collate->new( 74 entry => $entry, 75 level => 1, 76 table => undef, 77 normalization => undef, 78 UCA_Version => 20, 79); 80 81# 2..12 82ok($illeg->lt("", "\x00")); 83ok($illeg->lt("", "\x01")); 84ok($illeg->eq("", "\x{FFFE}")); 85ok($illeg->eq("", "\x{FFFF}")); 86ok($illeg->eq("", "\x{D800}")); 87ok($illeg->eq("", "\x{DFFF}")); 88ok($illeg->eq("", "\x{FDD0}")); 89ok($illeg->eq("", "\x{FDEF}")); 90ok($illeg->lt("", "\x02")); 91ok($illeg->eq("", "\x{10FFFF}")); 92ok($illeg->eq("", "\x{110000}")); 93 94# 13..22 95ok($illeg->lt("\x00", "\x01")); 96ok($illeg->lt("\x01", "\x02")); 97ok($illeg->ne("\0", "\x{D800}")); 98ok($illeg->ne("\0", "\x{DFFF}")); 99ok($illeg->ne("\0", "\x{FDD0}")); 100ok($illeg->ne("\0", "\x{FDEF}")); 101ok($illeg->ne("\0", "\x{FFFE}")); 102ok($illeg->ne("\0", "\x{FFFF}")); 103ok($illeg->ne("\0", "\x{10FFFF}")); 104ok($illeg->ne("\0", "\x{110000}")); 105 106# 23..26 107ok($illeg->eq("A", "A\x{FFFF}")); 108ok($illeg->gt("A\0", "A\x{FFFF}")); 109ok($illeg->lt("A", "A\0")); 110ok($illeg->lt("AA", "A\0")); 111 112################## 113 114my $nonch = Unicode::Collate->new( 115 entry => $entry, 116 level => 1, 117 table => undef, 118 normalization => undef, 119 UCA_Version => 22, 120); 121 122# 27..37 123ok($nonch->lt("", "\x00")); 124ok($nonch->lt("", "\x01")); 125ok($nonch->lt("", "\x{FFFE}")); 126ok($nonch->lt("", "\x{FFFF}")); 127ok($nonch->lt("", "\x{D800}")); 128ok($nonch->lt("", "\x{DFFF}")); 129ok($nonch->lt("", "\x{FDD0}")); 130ok($nonch->lt("", "\x{FDEF}")); 131ok($nonch->lt("", "\x02")); 132ok($nonch->lt("", "\x{10FFFF}")); 133ok($nonch->lt("", "\x{110000}")); 134 135# 38..47 136ok($nonch->lt("\x00", "\x01")); 137ok($nonch->lt("\x01", "\x{FFFE}")); 138ok($nonch->lt("\x{FFFE}", "\x{FFFF}")); 139ok($nonch->lt("\x{FFFF}", "\x{D800}")); 140ok($nonch->lt("\x{D800}", "\x{DFFF}")); 141ok($nonch->lt("\x{DFFF}", "\x{FDD0}")); 142ok($nonch->lt("\x{FDD0}", "\x{FDEF}")); 143ok($nonch->lt("\x{FDEF}", "\x02")); 144ok($nonch->lt("\x02", "\x{10FFFF}")); 145ok($nonch->lt("\x{10FFFF}", "\x{110000}")); 146 147# 48..51 148ok($nonch->lt("A", "A\x{FFFF}")); 149ok($nonch->lt("A\0", "A\x{FFFF}")); 150ok($nonch->lt("A", "A\0")); 151ok($nonch->lt("AA", "A\0")); 152 153################## 154 155my $Collator = Unicode::Collate->new( 156 table => 'keys.txt', 157 level => 1, 158 normalization => undef, 159 UCA_Version => 8, 160); 161 162my @ret = ( 163 "Pe\x{300}\x{301}", 164 "Pe\x{300}\0\0\x{301}", 165 "Pe\x{DA00}\x{301}\x{DFFF}", 166 "Pe\x{FFFF}\x{301}", 167 "Pe\x{110000}\x{301}", 168 "Pe\x{300}\x{d801}\x{301}", 169 "Pe\x{300}\x{ffff}\x{301}", 170 "Pe\x{300}\x{110000}\x{301}", 171 "Pe\x{D9ab}\x{DFFF}", 172 "Pe\x{FFFF}", 173 "Pe\x{110000}", 174 "Pe\x{300}\x{D800}\x{DFFF}", 175 "Pe\x{300}\x{FFFF}", 176 "Pe\x{300}\x{110000}", 177); 178 179# 52..65 180for my $ret (@ret) { 181 my $str = $ret."rl"; 182 my($match) = $Collator->match($str, "pe"); 183 ok($match eq $ret); 184} 185 186################## 187 188my $out = Unicode::Collate->new( 189 level => 1, 190 table => undef, 191 normalization => undef, 192 overrideOut => sub { 0xFFFD }, 193); 194 195my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36); 196 197for my $v (@Versions) { 198 $out->change(UCA_Version => $v); 199 ok($out->cmp('', "\x{10FFFF}") == ($v >= 22 ? -1 : 0)); 200 ok($out->cmp('', "\x{110000}") == ($v >= 22 ? -1 : 0)); 201 ok($out->cmp('ABC', "\x{110000}") == ($v >= 22 ? -1 : 1)); 202 ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1)); 203 ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ? 0 : 0)); 204} 205 206# x+66..x+77 207ok($out->lt('ABC', "\x{123456}")); 208ok($out->lt("\x{FFFD}", "\x{123456}")); 209 210$out->change(overrideOut => sub {()}); 211 212ok($out->eq('', "\x{123456}")); 213ok($out->gt('ABC', "\x{123456}")); 214ok($out->gt("\x{FFFD}", "\x{123456}")); 215 216$out->change(overrideOut => undef); 217ok($out->lt('', "\x{123456}")); 218ok($out->eq("\x{FFFD}", "\x{123456}")); 219 220$out->change(overrideOut => sub { 0xFFFD }); 221 222ok($out->lt('', "\x{123456}")); 223ok($out->lt('ABC', "\x{123456}")); 224ok($out->lt("\x{FFFD}", "\x{123456}")); 225 226$out->change(overrideOut => 0); 227ok($out->lt('', "\x{123456}")); 228ok($out->eq("\x{FFFD}", "\x{123456}")); 229 230$out->change(overrideOut => sub { undef }); 231ok($out->lt('', "\x{123456}")); 232ok($out->eq("\x{FFFD}", "\x{123456}")); 233ok($out->eq("\x{FFFD}", "\x{21FFFFF}")); 234ok($out->eq("\x{FFFD}", "\x{2200000}")); 235 236