1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..41\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31my $trad = Unicode::Collate->new( 32 table => 'keys.txt', 33 normalization => undef, 34 ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, 35 level => 3, 36 entry => << 'ENTRIES', 37 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish 38 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish 39 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish 40ENTRIES 41); 42# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C 43# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D 44 45##### 2..3 46 47ok( 48 join(':', $trad->sort( qw/ acha aca ada acia acka / ) ), 49 join(':', qw/ aca acia acka acha ada / ), 50); 51 52ok( 53 join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ), 54 join(':', qw/ ACA ACIA ACKA ACHA ADA / ), 55); 56 57##### 4..7 58 59ok($trad->gt("ocho", "oc\cAho")); # UCA v14 60ok($trad->gt("ocho", "oc\0\cA\0\cBho")); # UCA v14 61ok($trad->eq("-", "")); 62ok($trad->gt("ocho", "oc-ho")); 63 64##### 8..11 65 66$trad->change(UCA_Version => 9); 67 68ok($trad->eq("ocho", "oc\cAho")); # UCA v9 69ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9 70ok($trad->eq("-", "")); 71ok($trad->gt("ocho", "oc-ho")); 72 73##### 12..15 74 75$trad->change(UCA_Version => 8); 76 77ok($trad->gt("ocho", "oc\cAho")); 78ok($trad->gt("ocho", "oc\0\cA\0\cBho")); 79ok($trad->eq("-", "")); 80ok($trad->gt("ocho", "oc-ho")); 81 82 83##### 16..19 84 85$trad->change(UCA_Version => 9); 86 87my $hiragana = "\x{3042}\x{3044}"; 88my $katakana = "\x{30A2}\x{30A4}"; 89 90# HIRAGANA and KATAKANA are ignorable via ignoreName 91ok($trad->eq($hiragana, "")); 92ok($trad->eq("", $katakana)); 93ok($trad->eq($hiragana, $katakana)); 94ok($trad->eq($katakana, $hiragana)); 95 96 97##### 20..31 98 99# According to Conformance Test (UCA_Version == 9 or 11), 100# a L3-ignorable is treated as a completely ignorable. 101 102my $L3ignorable = Unicode::Collate->new( 103 alternate => 'Non-ignorable', 104 level => 3, 105 table => undef, 106 normalization => undef, 107 UCA_Version => 9, 108 entry => <<'ENTRIES', 1090000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 1100001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429) 1110591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA 1121D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM 1130021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK 11409BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA 11509C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E 11609CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O 11709C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O 1181D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE 1191D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK 1201D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA 1211D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK 122ENTRIES 123); 124 125ok($L3ignorable->lt("\cA", "!")); 126ok($L3ignorable->lt("\x{591}", "!")); 127ok($L3ignorable->eq("\cA", "\x{591}")); 128ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9C7}\cA\x{9BE}A")); 129ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9C7}\x{591}\x{9BE}A")); 130ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9C7}\x{1D165}\x{9BE}A")); 131ok($L3ignorable->eq("\x{9C7}\x{9BE}A", "\x{9CB}A")); 132ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}")); 133ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}")); 134ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}")); 135ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}")); 136ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}")); 137 138##### 32..41 139 140my $c = Unicode::Collate->new( 141 table => 'keys.txt', 142 normalization => undef, 143 level => 1, 144 UCA_Version => 14, 145 entry => << 'ENTRIES', 146034F ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER 1470063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish 1480043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish 1490043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish 150ENTRIES 151); 152# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C 153# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D 154 155ok($c->gt("ocho", "oc\x00\x00ho")); 156ok($c->gt("ocho", "oc\cAho")); 157ok($c->gt("ocho", "oc\x{34F}ho")); 158ok($c->gt("ocio", "oc\x{34F}ho")); 159ok($c->lt("ocgo", "oc\x{34F}ho")); 160ok($c->lt("oceo", "oc\x{34F}ho")); 161 162ok($c->viewSortKey("ocho"), "[0B4B 0A3F 0B4B | | |]"); 163ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); 164ok($c->viewSortKey("oc\cAho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); 165ok($c->viewSortKey("oc\x{34F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); 166 167 168