1#!perl -w 2BEGIN { 3 if (ord("A") != 65) { 4 print "1..0 # Skip: EBCDIC\n"; 5 exit 0; 6 } 7 chdir 't' if -d 't'; 8 @INC = '../lib'; 9 @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself 10 require Config; import Config; 11 if ($Config{'extensions'} !~ /\bStorable\b/) { 12 print "1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n"; 13 exit 0; 14 } 15} 16 17use strict; 18use Unicode::UCD; 19use Test::More; 20 21BEGIN { plan tests => 239 }; 22 23use Unicode::UCD 'charinfo'; 24 25my $charinfo; 26 27$charinfo = charinfo(0x41); 28 29is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); 30is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); 31is($charinfo->{category}, 'Lu'); 32is($charinfo->{combining}, '0'); 33is($charinfo->{bidi}, 'L'); 34is($charinfo->{decomposition}, ''); 35is($charinfo->{decimal}, ''); 36is($charinfo->{digit}, ''); 37is($charinfo->{numeric}, ''); 38is($charinfo->{mirrored}, 'N'); 39is($charinfo->{unicode10}, ''); 40is($charinfo->{comment}, ''); 41is($charinfo->{upper}, ''); 42is($charinfo->{lower}, '0061'); 43is($charinfo->{title}, ''); 44is($charinfo->{block}, 'Basic Latin'); 45is($charinfo->{script}, 'Latin'); 46 47$charinfo = charinfo(0x100); 48 49is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); 50is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); 51is($charinfo->{category}, 'Lu'); 52is($charinfo->{combining}, '0'); 53is($charinfo->{bidi}, 'L'); 54is($charinfo->{decomposition}, '0041 0304'); 55is($charinfo->{decimal}, ''); 56is($charinfo->{digit}, ''); 57is($charinfo->{numeric}, ''); 58is($charinfo->{mirrored}, 'N'); 59is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); 60is($charinfo->{comment}, ''); 61is($charinfo->{upper}, ''); 62is($charinfo->{lower}, '0101'); 63is($charinfo->{title}, ''); 64is($charinfo->{block}, 'Latin Extended-A'); 65is($charinfo->{script}, 'Latin'); 66 67# 0x0590 is in the Hebrew block but unused. 68 69$charinfo = charinfo(0x590); 70 71is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); 72is($charinfo->{name}, undef); 73is($charinfo->{category}, undef); 74is($charinfo->{combining}, undef); 75is($charinfo->{bidi}, undef); 76is($charinfo->{decomposition}, undef); 77is($charinfo->{decimal}, undef); 78is($charinfo->{digit}, undef); 79is($charinfo->{numeric}, undef); 80is($charinfo->{mirrored}, undef); 81is($charinfo->{unicode10}, undef); 82is($charinfo->{comment}, undef); 83is($charinfo->{upper}, undef); 84is($charinfo->{lower}, undef); 85is($charinfo->{title}, undef); 86is($charinfo->{block}, undef); 87is($charinfo->{script}, undef); 88 89# 0x05d0 is in the Hebrew block and used. 90 91$charinfo = charinfo(0x5d0); 92 93is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); 94is($charinfo->{name}, 'HEBREW LETTER ALEF'); 95is($charinfo->{category}, 'Lo'); 96is($charinfo->{combining}, '0'); 97is($charinfo->{bidi}, 'R'); 98is($charinfo->{decomposition}, ''); 99is($charinfo->{decimal}, ''); 100is($charinfo->{digit}, ''); 101is($charinfo->{numeric}, ''); 102is($charinfo->{mirrored}, 'N'); 103is($charinfo->{unicode10}, ''); 104is($charinfo->{comment}, ''); 105is($charinfo->{upper}, ''); 106is($charinfo->{lower}, ''); 107is($charinfo->{title}, ''); 108is($charinfo->{block}, 'Hebrew'); 109is($charinfo->{script}, 'Hebrew'); 110 111# An open syllable in Hangul. 112 113$charinfo = charinfo(0xAC00); 114 115is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); 116is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); 117is($charinfo->{category}, 'Lo'); 118is($charinfo->{combining}, '0'); 119is($charinfo->{bidi}, 'L'); 120is($charinfo->{decomposition}, undef); 121is($charinfo->{decimal}, ''); 122is($charinfo->{digit}, ''); 123is($charinfo->{numeric}, ''); 124is($charinfo->{mirrored}, 'N'); 125is($charinfo->{unicode10}, ''); 126is($charinfo->{comment}, ''); 127is($charinfo->{upper}, ''); 128is($charinfo->{lower}, ''); 129is($charinfo->{title}, ''); 130is($charinfo->{block}, 'Hangul Syllables'); 131is($charinfo->{script}, 'Hangul'); 132 133# A closed syllable in Hangul. 134 135$charinfo = charinfo(0xAE00); 136 137is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); 138is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); 139is($charinfo->{category}, 'Lo'); 140is($charinfo->{combining}, '0'); 141is($charinfo->{bidi}, 'L'); 142is($charinfo->{decomposition}, undef); 143is($charinfo->{decimal}, ''); 144is($charinfo->{digit}, ''); 145is($charinfo->{numeric}, ''); 146is($charinfo->{mirrored}, 'N'); 147is($charinfo->{unicode10}, ''); 148is($charinfo->{comment}, ''); 149is($charinfo->{upper}, ''); 150is($charinfo->{lower}, ''); 151is($charinfo->{title}, ''); 152is($charinfo->{block}, 'Hangul Syllables'); 153is($charinfo->{script}, 'Hangul'); 154 155$charinfo = charinfo(0x1D400); 156 157is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); 158is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); 159is($charinfo->{category}, 'Lu'); 160is($charinfo->{combining}, '0'); 161is($charinfo->{bidi}, 'L'); 162is($charinfo->{decomposition}, '<font> 0041'); 163is($charinfo->{decimal}, ''); 164is($charinfo->{digit}, ''); 165is($charinfo->{numeric}, ''); 166is($charinfo->{mirrored}, 'N'); 167is($charinfo->{unicode10}, ''); 168is($charinfo->{comment}, ''); 169is($charinfo->{upper}, ''); 170is($charinfo->{lower}, ''); 171is($charinfo->{title}, ''); 172is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); 173is($charinfo->{script}, 'Common'); 174 175$charinfo = charinfo(0x9FBA); #Bug 58428 176 177is($charinfo->{code}, '9FBA', 'U+9FBA'); 178is($charinfo->{name}, 'CJK UNIFIED IDEOGRAPH-9FBA'); 179is($charinfo->{category}, 'Lo'); 180is($charinfo->{combining}, '0'); 181is($charinfo->{bidi}, 'L'); 182is($charinfo->{decomposition}, ''); 183is($charinfo->{decimal}, ''); 184is($charinfo->{digit}, ''); 185is($charinfo->{numeric}, ''); 186is($charinfo->{mirrored}, 'N'); 187is($charinfo->{unicode10}, ''); 188is($charinfo->{comment}, ''); 189is($charinfo->{upper}, ''); 190is($charinfo->{lower}, ''); 191is($charinfo->{title}, ''); 192is($charinfo->{block}, 'CJK Unified Ideographs'); 193is($charinfo->{script}, 'Han'); 194 195use Unicode::UCD qw(charblock charscript); 196 197# 0x0590 is in the Hebrew block but unused. 198 199is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); 200is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); 201 202$charinfo = charinfo(0xbe); 203 204is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); 205is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); 206is($charinfo->{category}, 'No'); 207is($charinfo->{combining}, '0'); 208is($charinfo->{bidi}, 'ON'); 209is($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); 210is($charinfo->{decimal}, ''); 211is($charinfo->{digit}, ''); 212is($charinfo->{numeric}, '3/4'); 213is($charinfo->{mirrored}, 'N'); 214is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); 215is($charinfo->{comment}, ''); 216is($charinfo->{upper}, ''); 217is($charinfo->{lower}, ''); 218is($charinfo->{title}, ''); 219is($charinfo->{block}, 'Latin-1 Supplement'); 220is($charinfo->{script}, 'Common'); 221 222use Unicode::UCD qw(charblocks charscripts); 223 224my $charblocks = charblocks(); 225 226ok(exists $charblocks->{Thai}, 'Thai charblock exists'); 227is($charblocks->{Thai}->[0]->[0], hex('0e00')); 228ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); 229 230my $charscripts = charscripts(); 231 232ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); 233is($charscripts->{Armenian}->[0]->[0], hex('0531')); 234ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); 235 236my $charscript; 237 238$charscript = charscript("12ab"); 239is($charscript, 'Ethiopic', 'Ethiopic charscript'); 240 241$charscript = charscript("0x12ab"); 242is($charscript, 'Ethiopic'); 243 244$charscript = charscript("U+12ab"); 245is($charscript, 'Ethiopic'); 246 247my $ranges; 248 249$ranges = charscript('Ogham'); 250is($ranges->[1]->[0], hex('1681'), 'Ogham charscript'); 251is($ranges->[1]->[1], hex('169a')); 252 253use Unicode::UCD qw(charinrange); 254 255$ranges = charscript('Cherokee'); 256ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); 257ok( charinrange($ranges, "13a0")); 258ok( charinrange($ranges, "13f4")); 259ok(!charinrange($ranges, "13f5")); 260 261use Unicode::UCD qw(general_categories); 262 263my $gc = general_categories(); 264 265ok(exists $gc->{L}, 'has L'); 266is($gc->{L}, 'Letter', 'L is Letter'); 267is($gc->{Lu}, 'UppercaseLetter', 'Lu is UppercaseLetter'); 268 269use Unicode::UCD qw(bidi_types); 270 271my $bt = bidi_types(); 272 273ok(exists $bt->{L}, 'has L'); 274is($bt->{L}, 'Left-to-Right', 'L is Left-to-Right'); 275is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic'); 276 277# If this fails, then maybe one should look at the Unicode changes to see 278# what else might need to be updated. 279is(Unicode::UCD::UnicodeVersion, '5.1.0', 'UnicodeVersion'); 280 281use Unicode::UCD qw(compexcl); 282 283ok(!compexcl(0x0100), 'compexcl'); 284ok( compexcl(0x0958)); 285 286use Unicode::UCD qw(casefold); 287 288my $casefold; 289 290$casefold = casefold(0x41); 291 292is($casefold->{code}, '0041', 'casefold 0x41 code'); 293is($casefold->{status}, 'C', 'casefold 0x41 status'); 294is($casefold->{mapping}, '0061', 'casefold 0x41 mapping'); 295is($casefold->{full}, '0061', 'casefold 0x41 full'); 296is($casefold->{simple}, '0061', 'casefold 0x41 simple'); 297is($casefold->{turkic}, "", 'casefold 0x41 turkic'); 298 299$casefold = casefold(0xdf); 300 301is($casefold->{code}, '00DF', 'casefold 0xDF code'); 302is($casefold->{status}, 'F', 'casefold 0xDF status'); 303is($casefold->{mapping}, '0073 0073', 'casefold 0xDF mapping'); 304is($casefold->{full}, '0073 0073', 'casefold 0xDF full'); 305is($casefold->{simple}, "", 'casefold 0xDF simple'); 306is($casefold->{turkic}, "", 'casefold 0xDF turkic'); 307 308# Do different tests depending on if version <= 3.1, or not. 309(my $version = Unicode::UCD::UnicodeVersion) =~ /^(\d+)\.(\d+)/; 310if (defined $1 && ($1 <= 2 || $1 == 3 && defined $2 && $2 <= 1)) { 311 $casefold = casefold(0x130); 312 313 is($casefold->{code}, '0130', 'casefold 0x130 code'); 314 is($casefold->{status}, 'I' , 'casefold 0x130 status'); 315 is($casefold->{mapping}, '0069', 'casefold 0x130 mapping'); 316 is($casefold->{full}, '0069', 'casefold 0x130 full'); 317 is($casefold->{simple}, "0069", 'casefold 0x130 simple'); 318 is($casefold->{turkic}, "0069", 'casefold 0x130 turkic'); 319 320 $casefold = casefold(0x131); 321 322 is($casefold->{code}, '0131', 'casefold 0x131 code'); 323 is($casefold->{status}, 'I' , 'casefold 0x131 status'); 324 is($casefold->{mapping}, '0069', 'casefold 0x131 mapping'); 325 is($casefold->{full}, '0069', 'casefold 0x131 full'); 326 is($casefold->{simple}, "0069", 'casefold 0x131 simple'); 327 is($casefold->{turkic}, "0069", 'casefold 0x131 turkic'); 328} else { 329 $casefold = casefold(0x49); 330 331 is($casefold->{code}, '0049', 'casefold 0x49 code'); 332 is($casefold->{status}, 'C' , 'casefold 0x49 status'); 333 is($casefold->{mapping}, '0069', 'casefold 0x49 mapping'); 334 is($casefold->{full}, '0069', 'casefold 0x49 full'); 335 is($casefold->{simple}, "0069", 'casefold 0x49 simple'); 336 is($casefold->{turkic}, "0131", 'casefold 0x49 turkic'); 337 338 $casefold = casefold(0x130); 339 340 is($casefold->{code}, '0130', 'casefold 0x130 code'); 341 is($casefold->{status}, 'F' , 'casefold 0x130 status'); 342 is($casefold->{mapping}, '0069 0307', 'casefold 0x130 mapping'); 343 is($casefold->{full}, '0069 0307', 'casefold 0x130 full'); 344 is($casefold->{simple}, "", 'casefold 0x130 simple'); 345 is($casefold->{turkic}, "0069", 'casefold 0x130 turkic'); 346} 347 348$casefold = casefold(0x1F88); 349 350is($casefold->{code}, '1F88', 'casefold 0x1F88 code'); 351is($casefold->{status}, 'S' , 'casefold 0x1F88 status'); 352is($casefold->{mapping}, '1F80', 'casefold 0x1F88 mapping'); 353is($casefold->{full}, '1F00 03B9', 'casefold 0x1F88 full'); 354is($casefold->{simple}, '1F80', 'casefold 0x1F88 simple'); 355is($casefold->{turkic}, "", 'casefold 0x1F88 turkic'); 356 357ok(!casefold(0x20)); 358 359use Unicode::UCD qw(casespec); 360 361my $casespec; 362 363ok(!casespec(0x41)); 364 365$casespec = casespec(0xdf); 366 367ok($casespec->{code} eq '00DF' && 368 $casespec->{lower} eq '00DF' && 369 $casespec->{title} eq '0053 0073' && 370 $casespec->{upper} eq '0053 0053' && 371 !defined $casespec->{condition}, 'casespec 0xDF'); 372 373$casespec = casespec(0x307); 374 375ok($casespec->{az}->{code} eq '0307' && 376 !defined $casespec->{az}->{lower} && 377 $casespec->{az}->{title} eq '0307' && 378 $casespec->{az}->{upper} eq '0307' && 379 $casespec->{az}->{condition} eq 'az After_I', 380 'casespec 0x307'); 381 382# perl #7305 UnicodeCD::compexcl is weird 383 384for (1) {my $a=compexcl $_} 385ok(1, 'compexcl read-only $_: perl #7305'); 386map {compexcl $_} %{{1=>2}}; 387ok(1, 'compexcl read-only hash: perl #7305'); 388 389is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)"); 390is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)"); 391is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)"); 392is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)"); 393is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)"); 394is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)"); 395is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)"); 396is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)"); 397is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)"); 398is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)"); 399is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)"); 400is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); 401 402{ 403 my $r1 = charscript('Latin'); 404 my $n1 = @$r1; 405 is($n1, 42, "number of ranges in Latin script (Unicode 5.1.0)"); 406 shift @$r1 while @$r1; 407 my $r2 = charscript('Latin'); 408 is(@$r2, $n1, "modifying results should not mess up internal caches"); 409} 410 411{ 412 is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD"); 413} 414 415use Unicode::UCD qw(namedseq); 416 417is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq"); 418is(namedseq("KATAKANA LETTER AINU Q"), undef); 419is(namedseq(), undef); 420is(namedseq(qw(foo bar)), undef); 421my @ns = namedseq("KATAKANA LETTER AINU P"); 422is(scalar @ns, 2); 423is($ns[0], 0x31F7); 424is($ns[1], 0x309A); 425my %ns = namedseq(); 426is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}"); 427@ns = namedseq(42); 428is(@ns, 0); 429 430