1 2BEGIN { 3 unless (5.008 <= $]) { 4 print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n"; 5 print $@; 6 exit; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14use strict; 15use warnings; 16BEGIN { $| = 1; print "1..176\n"; } # 81 + 5 x @Versions 17my $count = 0; 18sub ok ($;$) { 19 my $p = my $r = shift; 20 if (@_) { 21 my $x = shift; 22 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23 } 24 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25} 26 27use Unicode::Collate; 28 29ok(1); 30 31sub _pack_U { Unicode::Collate::pack_U(@_) } 32sub _unpack_U { Unicode::Collate::unpack_U(@_) } 33 34######################### 35 36no warnings 'utf8'; 37 38# NULL is tailorable but illegal code points are not. 39# illegal code points should be always ingored 40# (cf. UCA, 7.1.1 Illegal code points). 41 42my $entry = <<'ENTRIES'; 430000 ; [.0020.0000.0000.0000] # [0000] NULL 440001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 45FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 46FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 47D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 48DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 49FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 50FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 510002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 5210FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 53110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 540041 ; [.1000.0020.0008.0041] # latin A 550041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 560041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 57ENTRIES 58 59################## 60 61my $illeg = Unicode::Collate->new( 62 entry => $entry, 63 level => 1, 64 table => undef, 65 normalization => undef, 66 UCA_Version => 20, 67); 68 69# 2..12 70ok($illeg->lt("", "\x00")); 71ok($illeg->lt("", "\x01")); 72ok($illeg->eq("", "\x{FFFE}")); 73ok($illeg->eq("", "\x{FFFF}")); 74ok($illeg->eq("", "\x{D800}")); 75ok($illeg->eq("", "\x{DFFF}")); 76ok($illeg->eq("", "\x{FDD0}")); 77ok($illeg->eq("", "\x{FDEF}")); 78ok($illeg->lt("", "\x02")); 79ok($illeg->eq("", "\x{10FFFF}")); 80ok($illeg->eq("", "\x{110000}")); 81 82# 13..22 83ok($illeg->lt("\x00", "\x01")); 84ok($illeg->lt("\x01", "\x02")); 85ok($illeg->ne("\0", "\x{D800}")); 86ok($illeg->ne("\0", "\x{DFFF}")); 87ok($illeg->ne("\0", "\x{FDD0}")); 88ok($illeg->ne("\0", "\x{FDEF}")); 89ok($illeg->ne("\0", "\x{FFFE}")); 90ok($illeg->ne("\0", "\x{FFFF}")); 91ok($illeg->ne("\0", "\x{10FFFF}")); 92ok($illeg->ne("\0", "\x{110000}")); 93 94# 23..26 95ok($illeg->eq("A", "A\x{FFFF}")); 96ok($illeg->gt("A\0", "A\x{FFFF}")); 97ok($illeg->lt("A", "A\0")); 98ok($illeg->lt("AA", "A\0")); 99 100################## 101 102my $nonch = Unicode::Collate->new( 103 entry => $entry, 104 level => 1, 105 table => undef, 106 normalization => undef, 107 UCA_Version => 22, 108); 109 110# 27..37 111ok($nonch->lt("", "\x00")); 112ok($nonch->lt("", "\x01")); 113ok($nonch->lt("", "\x{FFFE}")); 114ok($nonch->lt("", "\x{FFFF}")); 115ok($nonch->lt("", "\x{D800}")); 116ok($nonch->lt("", "\x{DFFF}")); 117ok($nonch->lt("", "\x{FDD0}")); 118ok($nonch->lt("", "\x{FDEF}")); 119ok($nonch->lt("", "\x02")); 120ok($nonch->lt("", "\x{10FFFF}")); 121ok($nonch->lt("", "\x{110000}")); 122 123# 38..47 124ok($nonch->lt("\x00", "\x01")); 125ok($nonch->lt("\x01", "\x{FFFE}")); 126ok($nonch->lt("\x{FFFE}", "\x{FFFF}")); 127ok($nonch->lt("\x{FFFF}", "\x{D800}")); 128ok($nonch->lt("\x{D800}", "\x{DFFF}")); 129ok($nonch->lt("\x{DFFF}", "\x{FDD0}")); 130ok($nonch->lt("\x{FDD0}", "\x{FDEF}")); 131ok($nonch->lt("\x{FDEF}", "\x02")); 132ok($nonch->lt("\x02", "\x{10FFFF}")); 133ok($nonch->lt("\x{10FFFF}", "\x{110000}")); 134 135# 48..51 136ok($nonch->lt("A", "A\x{FFFF}")); 137ok($nonch->lt("A\0", "A\x{FFFF}")); 138ok($nonch->lt("A", "A\0")); 139ok($nonch->lt("AA", "A\0")); 140 141################## 142 143my $Collator = Unicode::Collate->new( 144 table => 'keys.txt', 145 level => 1, 146 normalization => undef, 147 UCA_Version => 8, 148); 149 150my @ret = ( 151 "Pe\x{300}\x{301}", 152 "Pe\x{300}\0\0\x{301}", 153 "Pe\x{DA00}\x{301}\x{DFFF}", 154 "Pe\x{FFFF}\x{301}", 155 "Pe\x{110000}\x{301}", 156 "Pe\x{300}\x{d801}\x{301}", 157 "Pe\x{300}\x{ffff}\x{301}", 158 "Pe\x{300}\x{110000}\x{301}", 159 "Pe\x{D9ab}\x{DFFF}", 160 "Pe\x{FFFF}", 161 "Pe\x{110000}", 162 "Pe\x{300}\x{D800}\x{DFFF}", 163 "Pe\x{300}\x{FFFF}", 164 "Pe\x{300}\x{110000}", 165); 166 167# 52..65 168for my $ret (@ret) { 169 my $str = $ret."rl"; 170 my($match) = $Collator->match($str, "pe"); 171 ok($match eq $ret); 172} 173 174################## 175 176my $out = Unicode::Collate->new( 177 level => 1, 178 table => undef, 179 normalization => undef, 180 overrideOut => sub { 0xFFFD }, 181); 182 183my @Versions = ( 8, 9, 11, 14, 16, 18, 20, 22, 24, 26, 184 28, 30, 32, 34, 36, 38, 40, 41, 43); 185 186for my $v (@Versions) { 187 $out->change(UCA_Version => $v); 188 ok($out->cmp('', "\x{10FFFF}") == ($v >= 22 ? -1 : 0)); 189 ok($out->cmp('', "\x{110000}") == ($v >= 22 ? -1 : 0)); 190 ok($out->cmp('ABC', "\x{110000}") == ($v >= 22 ? -1 : 1)); 191 ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1)); 192 ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ? 0 : 0)); 193} 194 195# x+66..x+77 196ok($out->lt('ABC', "\x{123456}")); 197ok($out->lt("\x{FFFD}", "\x{123456}")); 198 199$out->change(overrideOut => sub {()}); 200 201ok($out->eq('', "\x{123456}")); 202ok($out->gt('ABC', "\x{123456}")); 203ok($out->gt("\x{FFFD}", "\x{123456}")); 204 205$out->change(overrideOut => undef); 206ok($out->lt('', "\x{123456}")); 207ok($out->eq("\x{FFFD}", "\x{123456}")); 208 209$out->change(overrideOut => sub { 0xFFFD }); 210 211ok($out->lt('', "\x{123456}")); 212ok($out->lt('ABC', "\x{123456}")); 213ok($out->lt("\x{FFFD}", "\x{123456}")); 214 215$out->change(overrideOut => 0); 216ok($out->lt('', "\x{123456}")); 217ok($out->eq("\x{FFFD}", "\x{123456}")); 218 219$out->change(overrideOut => sub { undef }); 220ok($out->lt('', "\x{123456}")); 221ok($out->eq("\x{FFFD}", "\x{123456}")); 222ok($out->eq("\x{FFFD}", "\x{21FFFFF}")); 223ok($out->eq("\x{FFFD}", "\x{2200000}")); 224 225