1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17 18BEGIN { 19 use Unicode::Collate; 20 21# unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { 22# print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". 23# " needed for this test\n"; 24 unless (5.008 <= $]) { 25 print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n"; 26 print $@; 27 exit; 28 } 29} 30 31use strict; 32use warnings; 33BEGIN { $| = 1; print "1..90\n"; } 34my $count = 0; 35sub ok ($;$) { 36 my $p = my $r = shift; 37 if (@_) { 38 my $x = shift; 39 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 40 } 41 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 42} 43 44ok(1); 45 46######################### 47 48no warnings 'utf8'; 49 50# Unicode 6.0 Sorting 51# 52# Special Database Values. The data files for CLDR provide 53# special weights for two noncharacters: 54# 55# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range 56# in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings 57# starting with "sch" plus those that sort equivalently. 58# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields, 59# allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John". 60 61my $entry = <<'ENTRIES'; 62FFFE ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE> 63FFFF ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF> 64ENTRIES 65 66my @disilva = ("di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva"); 67my @dsf = map "$_\x{FFFE}Fred", @disilva; 68my @dsj = map "$_\x{FFFE}John", @disilva; 69my @dsJ = map "$_ John", @disilva; 70 71for my $norm (undef, 'NFD') { 72 if (defined $norm) { 73 eval { require Unicode::Normalize }; 74 if ($@) { 75 ok(1) for 1..34; # silent skip 76 next; 77 } 78 } 79 80 my $coll = Unicode::Collate->new( 81 table => 'keys.txt', 82 level => 1, 83 normalization => $norm, 84 UCA_Version => 22, 85 entry => $entry, 86 ); 87 88 # 1..4 89 ok($coll->lt("\x{FFFD}", "\x{FFFF}")); 90 ok($coll->lt("\x{1FFFD}", "\x{1FFFF}")); 91 ok($coll->lt("\x{2FFFD}", "\x{2FFFF}")); 92 ok($coll->lt("\x{10FFFD}", "\x{10FFFF}")); 93 94 # 5..14 95 ok($coll->lt("perl\x{FFFD}", "perl\x{FFFF}")); 96 ok($coll->lt("perl\x{1FFFD}", "perl\x{FFFF}")); 97 ok($coll->lt("perl\x{1FFFE}", "perl\x{FFFF}")); 98 ok($coll->lt("perl\x{1FFFF}", "perl\x{FFFF}")); 99 ok($coll->lt("perl\x{2FFFD}", "perl\x{FFFF}")); 100 ok($coll->lt("perl\x{2FFFE}", "perl\x{FFFF}")); 101 ok($coll->lt("perl\x{2FFFF}", "perl\x{FFFF}")); 102 ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}")); 103 ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}")); 104 ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}")); 105 106 # 15..16 107 ok($coll->gt("perl\x{FFFF}AB", "perl\x{FFFF}")); 108 ok($coll->lt("perl\x{FFFF}\x{10FFFF}", "perl\x{FFFF}\x{FFFF}")); 109 110 $coll->change(level => 4); 111 112 # 17..25 113 for my $i (0 .. $#disilva - 1) { 114 ok($coll->lt($dsf[$i], $dsf[$i+1])); 115 ok($coll->lt($dsj[$i], $dsj[$i+1])); 116 ok($coll->lt($dsJ[$i], $dsJ[$i+1])); 117 } 118 119 # 26 120 ok($coll->lt($dsf[-1], $dsj[0])); 121 122 $coll->change(level => 1); 123 124 # 27..34 125 for my $i (0 .. $#disilva) { 126 ok($coll->lt($dsf[$i], $dsJ[$i])); 127 ok($coll->lt($dsj[$i], $dsJ[$i])); 128 } 129} 130 131# 69 132 133{ 134 my $coll = Unicode::Collate->new( 135 table => 'keys.txt', 136 normalization => undef, 137 highestFFFF => 1, 138 minimalFFFE => 1, 139 ); 140 141 $coll->change(level => 1); 142 ok($coll->lt("perl\x{FFFD}", "perl\x{FFFF}")); 143 ok($coll->lt("perl\x{1FFFD}", "perl\x{FFFF}")); 144 ok($coll->lt("perl\x{1FFFE}", "perl\x{FFFF}")); 145 ok($coll->lt("perl\x{1FFFF}", "perl\x{FFFF}")); 146 ok($coll->lt("perl\x{2FFFD}", "perl\x{FFFF}")); 147 ok($coll->lt("perl\x{2FFFE}", "perl\x{FFFF}")); 148 ok($coll->lt("perl\x{2FFFF}", "perl\x{FFFF}")); 149 ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}")); 150 ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}")); 151 ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}")); 152 153# 79 154 155 $coll->change(level => 3); 156 my @list = ( 157 "ab\x{FFFE}a", 158 "Ab\x{FFFE}a", 159 "ab\x{FFFE}c", 160 "Ab\x{FFFE}c", 161 "ab\x{FFFE}xyz", 162 "abc\x{FFFE}def", 163 "abc\x{FFFE}xYz", 164 "aBc\x{FFFE}xyz", 165 "abcX\x{FFFE}def", 166 "abcx\x{FFFE}xyz", 167 "b\x{FFFE}aaa", 168 "bbb\x{FFFE}a", 169 ); 170 my $p = shift @list; 171 for my $c (@list) { 172 ok($coll->lt($p, $c)); 173 $p = $c; 174 } 175} 176 177# 90 178