1BEGIN { 2 unless ("A" eq pack('U', 0x41)) { 3 print "1..0 # Unicode::Collate " . 4 "cannot stringify a Unicode code point\n"; 5 exit 0; 6 } 7 if ($ENV{PERL_CORE}) { 8 chdir('t') if -d 't'; 9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 10 } 11} 12 13use Test; 14BEGIN { plan tests => 41 }; 15 16use strict; 17use warnings; 18use Unicode::Collate; 19 20ok(1); 21 22my $trad = Unicode::Collate->new( 23 table => 'keys.txt', 24 normalization => undef, 25 ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, 26 level => 3, 27 entry => << 'ENTRIES', 28 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish 29 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish 30 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish 31ENTRIES 32); 33# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C 34# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D 35 36##### 2..3 37 38ok( 39 join(':', $trad->sort( qw/ acha aca ada acia acka / ) ), 40 join(':', qw/ aca acia acka acha ada / ), 41); 42 43ok( 44 join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ), 45 join(':', qw/ ACA ACIA ACKA ACHA ADA / ), 46); 47 48##### 4..7 49 50ok($trad->gt("ocho", "oc\cAho")); # UCA v14 51ok($trad->gt("ocho", "oc\0\cA\0\cBho")); # UCA v14 52ok($trad->eq("-", "")); 53ok($trad->gt("ocho", "oc-ho")); 54 55##### 8..11 56 57$trad->change(UCA_Version => 9); 58 59ok($trad->eq("ocho", "oc\cAho")); # UCA v9 60ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9 61ok($trad->eq("-", "")); 62ok($trad->gt("ocho", "oc-ho")); 63 64##### 12..15 65 66$trad->change(UCA_Version => 8); 67 68ok($trad->gt("ocho", "oc\cAho")); 69ok($trad->gt("ocho", "oc\0\cA\0\cBho")); 70ok($trad->eq("-", "")); 71ok($trad->gt("ocho", "oc-ho")); 72 73 74##### 16..19 75 76$trad->change(UCA_Version => 9); 77 78my $hiragana = "\x{3042}\x{3044}"; 79my $katakana = "\x{30A2}\x{30A4}"; 80 81# HIRAGANA and KATAKANA are ignorable via ignoreName 82ok($trad->eq($hiragana, "")); 83ok($trad->eq("", $katakana)); 84ok($trad->eq($hiragana, $katakana)); 85ok($trad->eq($katakana, $hiragana)); 86 87 88##### 20..31 89 90# According to Conformance Test (UCA_Version == 9 or 11), 91# a L3-ignorable is treated as a completely ignorable. 92 93my $L3ignorable = Unicode::Collate->new( 94 alternate => 'Non-ignorable', 95 level => 3, 96 table => undef, 97 normalization => undef, 98 UCA_Version => 9, 99 entry => <<'ENTRIES', 1000000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 1010001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429) 1020591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA 1031D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM 1040021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK 10509BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA 10609C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E 10709CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O 10809C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O 1091D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE 1101D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK 1111D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA 1121D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK 113ENTRIES 114); 115 116ok($L3ignorable->lt("\cA", "!")); 117ok($L3ignorable->lt("\x{591}", "!")); 118ok($L3ignorable->eq("\cA", "\x{591}")); 119ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A")); 120ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A")); 121ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A")); 122ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A")); 123ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}")); 124ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}")); 125ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}")); 126ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}")); 127ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}")); 128 129##### 32..41 130 131my $c = Unicode::Collate->new( 132 table => 'keys.txt', 133 normalization => undef, 134 level => 1, 135 UCA_Version => 14, 136 entry => << 'ENTRIES', 137034F ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER 1380063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish 1390043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish 1400043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish 141ENTRIES 142); 143# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C 144# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D 145 146ok($c->gt("ocho", "oc\x00\x00ho")); 147ok($c->gt("ocho", "oc\cAho")); 148ok($c->gt("ocho", "oc\x{034F}ho")); 149ok($c->gt("ocio", "oc\x{034F}ho")); 150ok($c->lt("ocgo", "oc\x{034F}ho")); 151ok($c->lt("oceo", "oc\x{034F}ho")); 152 153ok($c->viewSortKey("ocho"), "[0B4B 0A3F 0B4B | | |]"); 154ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); 155ok($c->viewSortKey("oc\cAho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); 156ok($c->viewSortKey("oc\x{034F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]"); 157 158 159