1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use strict; 18use warnings; 19BEGIN { $| = 1; print "1..65\n"; } 20my $count = 0; 21sub ok ($;$) { 22 my $p = my $r = shift; 23 if (@_) { 24 my $x = shift; 25 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 26 } 27 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 28} 29 30use Unicode::Collate; 31 32ok(1); 33 34######################### 35 36##### 2..31 37 38{ 39 my $all_undef_8 = Unicode::Collate->new( 40 table => undef, 41 normalization => undef, 42 overrideCJK => undef, 43 overrideHangul => undef, 44 UCA_Version => 8, 45 ); 46 # All in the Unicode code point order. 47 # No hangul decomposition. 48 49 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 50 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 51 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 52 ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 53 ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); 54 ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); 55 # U+ABFF: not assigned 56 57 # a hangul syllable is decomposed into jamo. 58 $all_undef_8->change(overrideHangul => 0); 59 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 60 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 61 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 62 ok($all_undef_8->gt("\x{4E00}", "\x{AC00}")); 63 ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); 64 ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); 65 66 # CJK defined < Jamo undefined 67 $all_undef_8->change(overrideCJK => 0); 68 ok($all_undef_8->gt("\x{1100}", "\x{3402}")); 69 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 70 ok($all_undef_8->gt("\x{4DFF}", "\x{4E00}")); 71 ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 72 ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); 73 ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); 74 75 # CJK undefined > Jamo undefined 76 $all_undef_8->change(overrideCJK => undef); 77 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 78 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 79 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 80 ok($all_undef_8->gt("\x{4E00}", "\x{AC00}")); 81 ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); 82 ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); 83 84 $all_undef_8->change(overrideHangul => undef); 85 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 86 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 87 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 88 ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 89 ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); 90 ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); 91} 92 93##### 32..38 94 95{ 96 my $all_undef_9 = Unicode::Collate->new( 97 table => undef, 98 normalization => undef, 99 overrideCJK => undef, 100 overrideHangul => undef, 101 UCA_Version => 9, 102 ); 103 # CJK Ideo. < CJK ext A/B < Others. 104 # No hangul decomposition. 105 106 ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); 107 ok($all_undef_9->lt("\x{3402}", "\x{20000}")); 108 ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); 109 ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); 110 ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); 111 # U+ABFF: not assigned 112 113 # a hangul syllable is decomposed into jamo. 114 $all_undef_9->change(overrideHangul => 0); 115 ok($all_undef_9->eq("\x{AC00}", "\x{1100}\x{1161}")); 116 ok($all_undef_9->lt("\x{AC00}", "\x{ABFF}")); 117} 118 119##### 39..46 120 121{ 122 my $ignoreHangul = Unicode::Collate->new( 123 table => undef, 124 normalization => undef, 125 overrideHangul => sub {()}, 126 entry => 'AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL', 127 ); 128 # All Hangul Syllables except U+AE00 are ignored. 129 130 ok($ignoreHangul->eq("\x{AC00}", "")); 131 ok($ignoreHangul->lt("\x{AC00}", "\0")); 132 ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); 133 ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. 134 ok($ignoreHangul->eq("Pe\x{AC00}rl", "Perl")); 135 ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); 136 # 'r' is unassigned. 137 138 $ignoreHangul->change(overrideHangul => 0); 139 ok($ignoreHangul->eq("\x{AC00}", "\x{1100}\x{1161}")); 140 141 $ignoreHangul->change(overrideHangul => undef); 142 ok($ignoreHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); 143} 144 145##### 47..51 146 147{ 148 my $undefHangul = Unicode::Collate->new( 149 table => undef, 150 normalization => undef, 151 overrideHangul => sub { 152 my $u = shift; 153 return $u == 0xAE00 ? 0x100 : undef; 154 } 155 ); 156 # All Hangul Syllables except U+AE00 are undefined. 157 158 ok($undefHangul->lt("\x{AE00}", "r")); 159 ok($undefHangul->gt("\x{AC00}", "r")); 160 ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); 161 ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. 162 ok($undefHangul->lt("\x{AC00}", "\x{B000}")); 163} 164 165##### 52..55 166 167{ 168 my $undefCJK = Unicode::Collate->new( 169 table => undef, 170 normalization => undef, 171 overrideCJK => sub { 172 my $u = shift; 173 return $u == 0x4E00 ? 0x100 : undef; 174 } 175 ); 176 # All CJK Ideographs except U+4E00 are undefined. 177 178 ok($undefCJK->lt("\x{4E00}", "r")); 179 ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned 180 ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); 181 ok($undefCJK->lt("\x{5000}", "\x{6000}")); 182} 183 184##### 56..60 185 186{ 187 my $cpHangul = Unicode::Collate->new( 188 table => undef, 189 normalization => undef, 190 overrideHangul => sub { shift } 191 ); 192 193 ok($cpHangul->lt("\x{AC00}", "\x{AC01}")); 194 ok($cpHangul->lt("\x{AC01}", "\x{D7A3}")); 195 ok($cpHangul->lt("\x{D7A3}", "r")); 196 ok($cpHangul->lt("r", "\x{D7A4}")); 197 ok($cpHangul->lt("\x{D7A3}", "\x{4E00}")); 198} 199 200##### 61..65 201 202{ 203 my $arrayHangul = Unicode::Collate->new( 204 table => undef, 205 normalization => undef, 206 overrideHangul => sub { 207 my $u = shift; 208 return [$u, 0x20, 0x2, $u]; 209 } 210 ); 211 212 ok($arrayHangul->lt("\x{AC00}", "\x{AC01}")); 213 ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}")); 214 ok($arrayHangul->lt("\x{D7A3}", "r")); 215 ok($arrayHangul->lt("r", "\x{D7A4}")); 216 ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}")); 217} 218 219