1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..32\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31{ 32 # Table is undefined, then no entry is defined. 33 my $undef_table = Unicode::Collate->new( 34 table => undef, 35 normalization => undef, 36 level => 1, 37 ); 38 39 # in the Unicode code point order 40 ok($undef_table->lt('', 'A')); 41 ok($undef_table->lt('ABC', 'B')); 42 43 # Hangul should be decomposed (even w/o Unicode::Normalize). 44 ok($undef_table->lt("Perl", "\x{AC00}")); 45 ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); 46 ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); 47 ok($undef_table->lt("\x{AE00}", "\x{3042}")); 48 49 # U+AC00: Hangul GA 50 # U+AE00: Hangul GEUL 51 # U+3042: Hiragana A 52 53 # Weight for CJK Ideographs is defined, though. 54 ok($undef_table->lt("", "\x{4E00}")); 55 ok($undef_table->lt("\x{4E8C}","ABC")); 56 ok($undef_table->lt("\x{4E00}","\x{3042}")); 57 ok($undef_table->lt("\x{4E00}","\x{4E8C}")); 58 59# 11 60 61 # U+4E00: Ideograph "ONE" 62 # U+4E8C: Ideograph "TWO" 63 64 for my $v ('', 8, 9, 11, 14) { 65 $undef_table->change(UCA_Version => $v) if $v; 66 ok($undef_table->lt("\x{4E00}","\0")); 67 } 68} 69 70# 16 71 72{ 73 my $onlyABC = Unicode::Collate->new( 74 table => undef, 75 normalization => undef, 76 entry => << 'ENTRIES', 770061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A 780041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 790062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B 800042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B 810063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C 820043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C 83ENTRIES 84 ); 85 ok( 86 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), 87 join(':', qw/ A aB Ab ABA BAC cAc cc / ), 88 ); 89} 90 91# 17 92 93{ 94 my $few_entries = Unicode::Collate->new( 95 entry => <<'ENTRIES', 960050 ; [.0101.0020.0002.0050] # P 970045 ; [.0102.0020.0002.0045] # E 980052 ; [.0103.0020.0002.0052] # R 99004C ; [.0104.0020.0002.004C] # L 1001100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G 1011175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I 1025B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" 103ENTRIES 104 table => undef, 105 normalization => undef, 106 ); 107 # defined before undefined 108 my $sortABC = join '', 109 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); 110 111 ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); 112 113 ok($few_entries->lt('E', 'D')); 114 ok($few_entries->lt("\x{5B57}", "\x{4E00}")); 115 ok($few_entries->lt("\x{AE30}", "\x{AC00}")); 116 117 # Hangul must be decomposed. 118 ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); 119} 120 121# 22 122 123{ 124 my $highestNUL = Unicode::Collate->new( 125 table => undef, 126 normalization => undef, 127 level => 1, 128 entry => '0000 ; [.FFFE.0020.0005.0000]', 129 ); 130 131 for my $v ('', 8, 9, 11, 14) { 132 $highestNUL->change(UCA_Version => $v) if $v; 133 ok($highestNUL->lt("abc\x{4E00}", "abc\0")); 134 ok($highestNUL->lt("abc\x{E0000}","abc\0")); 135 } 136} 137 138# 32 139