1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..65\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31##### 2..31 32 33{ 34 my $all_undef_8 = Unicode::Collate->new( 35 table => undef, 36 normalization => undef, 37 overrideCJK => undef, 38 overrideHangul => undef, 39 UCA_Version => 8, 40 ); 41 # All in the Unicode code point order. 42 # No hangul decomposition. 43 44 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 45 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 46 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 47 ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 48 ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); 49 ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); 50 # U+ABFF: not assigned 51 52 # a hangul syllable is decomposed into jamo. 53 $all_undef_8->change(overrideHangul => 0); 54 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 55 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 56 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 57 ok($all_undef_8->gt("\x{4E00}", "\x{AC00}")); 58 ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); 59 ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); 60 61 # CJK defined < Jamo undefined 62 $all_undef_8->change(overrideCJK => 0); 63 ok($all_undef_8->gt("\x{1100}", "\x{3402}")); 64 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 65 ok($all_undef_8->gt("\x{4DFF}", "\x{4E00}")); 66 ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 67 ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); 68 ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); 69 70 # CJK undefined > Jamo undefined 71 $all_undef_8->change(overrideCJK => undef); 72 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 73 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 74 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 75 ok($all_undef_8->gt("\x{4E00}", "\x{AC00}")); 76 ok($all_undef_8->eq("\x{AC00}", "\x{1100}\x{1161}")); 77 ok($all_undef_8->lt("\x{AC00}", "\x{ABFF}")); 78 79 $all_undef_8->change(overrideHangul => undef); 80 ok($all_undef_8->lt("\x{1100}", "\x{3402}")); 81 ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 82 ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 83 ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 84 ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); 85 ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); 86} 87 88##### 32..38 89 90{ 91 my $all_undef_9 = Unicode::Collate->new( 92 table => undef, 93 normalization => undef, 94 overrideCJK => undef, 95 overrideHangul => undef, 96 UCA_Version => 9, 97 ); 98 # CJK Ideo. < CJK ext A/B < Others. 99 # No hangul decomposition. 100 101 ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); 102 ok($all_undef_9->lt("\x{3402}", "\x{20000}")); 103 ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); 104 ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); 105 ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); 106 # U+ABFF: not assigned 107 108 # a hangul syllable is decomposed into jamo. 109 $all_undef_9->change(overrideHangul => 0); 110 ok($all_undef_9->eq("\x{AC00}", "\x{1100}\x{1161}")); 111 ok($all_undef_9->lt("\x{AC00}", "\x{ABFF}")); 112} 113 114##### 39..46 115 116{ 117 my $ignoreHangul = Unicode::Collate->new( 118 table => undef, 119 normalization => undef, 120 overrideHangul => sub {()}, 121 entry => 'AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL', 122 ); 123 # All Hangul Syllables except U+AE00 are ignored. 124 125 ok($ignoreHangul->eq("\x{AC00}", "")); 126 ok($ignoreHangul->lt("\x{AC00}", "\0")); 127 ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); 128 ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. 129 ok($ignoreHangul->eq("Pe\x{AC00}rl", "Perl")); 130 ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); 131 # 'r' is unassigned. 132 133 $ignoreHangul->change(overrideHangul => 0); 134 ok($ignoreHangul->eq("\x{AC00}", "\x{1100}\x{1161}")); 135 136 $ignoreHangul->change(overrideHangul => undef); 137 ok($ignoreHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); 138} 139 140##### 47..51 141 142{ 143 my $undefHangul = Unicode::Collate->new( 144 table => undef, 145 normalization => undef, 146 overrideHangul => sub { 147 my $u = shift; 148 return $u == 0xAE00 ? 0x100 : undef; 149 } 150 ); 151 # All Hangul Syllables except U+AE00 are undefined. 152 153 ok($undefHangul->lt("\x{AE00}", "r")); 154 ok($undefHangul->gt("\x{AC00}", "r")); 155 ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}")); 156 ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. 157 ok($undefHangul->lt("\x{AC00}", "\x{B000}")); 158} 159 160##### 52..55 161 162{ 163 my $undefCJK = Unicode::Collate->new( 164 table => undef, 165 normalization => undef, 166 overrideCJK => sub { 167 my $u = shift; 168 return $u == 0x4E00 ? 0x100 : undef; 169 } 170 ); 171 # All CJK Ideographs except U+4E00 are undefined. 172 173 ok($undefCJK->lt("\x{4E00}", "r")); 174 ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned 175 ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); 176 ok($undefCJK->lt("\x{5000}", "\x{6000}")); 177} 178 179##### 56..60 180 181{ 182 my $cpHangul = Unicode::Collate->new( 183 table => undef, 184 normalization => undef, 185 overrideHangul => sub { shift } 186 ); 187 188 ok($cpHangul->lt("\x{AC00}", "\x{AC01}")); 189 ok($cpHangul->lt("\x{AC01}", "\x{D7A3}")); 190 ok($cpHangul->lt("\x{D7A3}", "r")); 191 ok($cpHangul->lt("r", "\x{D7A4}")); 192 ok($cpHangul->lt("\x{D7A3}", "\x{4E00}")); 193} 194 195##### 61..65 196 197{ 198 my $arrayHangul = Unicode::Collate->new( 199 table => undef, 200 normalization => undef, 201 overrideHangul => sub { 202 my $u = shift; 203 return [$u, 0x20, 0x2, $u]; 204 } 205 ); 206 207 ok($arrayHangul->lt("\x{AC00}", "\x{AC01}")); 208 ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}")); 209 ok($arrayHangul->lt("\x{D7A3}", "r")); 210 ok($arrayHangul->lt("r", "\x{D7A4}")); 211 ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}")); 212} 213 214