1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use strict; 18use warnings; 19BEGIN { $| = 1; print "1..32\n"; } 20my $count = 0; 21sub ok ($;$) { 22 my $p = my $r = shift; 23 if (@_) { 24 my $x = shift; 25 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 26 } 27 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 28} 29 30use Unicode::Collate; 31 32ok(1); 33 34######################### 35 36{ 37 # Table is undefined, then no entry is defined. 38 my $undef_table = Unicode::Collate->new( 39 table => undef, 40 normalization => undef, 41 level => 1, 42 ); 43 44 # in the Unicode code point order 45 ok($undef_table->lt('', 'A')); 46 ok($undef_table->lt('ABC', 'B')); 47 48 # Hangul should be decomposed (even w/o Unicode::Normalize). 49 ok($undef_table->lt("Perl", "\x{AC00}")); 50 ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); 51 ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); 52 ok($undef_table->lt("\x{AE00}", "\x{3042}")); 53 54 # U+AC00: Hangul GA 55 # U+AE00: Hangul GEUL 56 # U+3042: Hiragana A 57 58 # Weight for CJK Ideographs is defined, though. 59 ok($undef_table->lt("", "\x{4E00}")); 60 ok($undef_table->lt("\x{4E8C}","ABC")); 61 ok($undef_table->lt("\x{4E00}","\x{3042}")); 62 ok($undef_table->lt("\x{4E00}","\x{4E8C}")); 63 64# 11 65 66 # U+4E00: Ideograph "ONE" 67 # U+4E8C: Ideograph "TWO" 68 69 for my $v ('', 8, 9, 11, 14) { 70 $undef_table->change(UCA_Version => $v) if $v; 71 ok($undef_table->lt("\x{4E00}","\0")); 72 } 73} 74 75# 16 76 77{ 78 my $onlyABC = Unicode::Collate->new( 79 table => undef, 80 normalization => undef, 81 entry => << 'ENTRIES', 820061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A 830041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 840062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B 850042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B 860063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C 870043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C 88ENTRIES 89 ); 90 ok( 91 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), 92 join(':', qw/ A aB Ab ABA BAC cAc cc / ), 93 ); 94} 95 96# 17 97 98{ 99 my $few_entries = Unicode::Collate->new( 100 entry => <<'ENTRIES', 1010050 ; [.0101.0020.0002.0050] # P 1020045 ; [.0102.0020.0002.0045] # E 1030052 ; [.0103.0020.0002.0052] # R 104004C ; [.0104.0020.0002.004C] # L 1051100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G 1061175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I 1075B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" 108ENTRIES 109 table => undef, 110 normalization => undef, 111 ); 112 # defined before undefined 113 my $sortABC = join '', 114 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); 115 116 ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); 117 118 ok($few_entries->lt('E', 'D')); 119 ok($few_entries->lt("\x{5B57}", "\x{4E00}")); 120 ok($few_entries->lt("\x{AE30}", "\x{AC00}")); 121 122 # Hangul must be decomposed. 123 ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); 124} 125 126# 22 127 128{ 129 my $highestNUL = Unicode::Collate->new( 130 table => undef, 131 normalization => undef, 132 level => 1, 133 entry => '0000 ; [.FFFE.0020.0005.0000]', 134 ); 135 136 for my $v ('', 8, 9, 11, 14) { 137 $highestNUL->change(UCA_Version => $v) if $v; 138 ok($highestNUL->lt("abc\x{4E00}", "abc\0")); 139 ok($highestNUL->lt("abc\x{E0000}","abc\0")); 140 } 141} 142 143# 32 144