1a24d4bc9Ssthen# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! 2a24d4bc9Ssthen# This file is machine-generated by lib/unicore/mktables from the Unicode 3*fd823929Safresh1# database, Version 15.0.0. Any changes made here will be lost! 4a24d4bc9Ssthen 5a24d4bc9Ssthen 6a24d4bc9Ssthen# !!!!!!! INTERNAL PERL USE ONLY !!!!!!! 7a24d4bc9Ssthen# This file is for internal use by core Perl only. The format and even the 8a24d4bc9Ssthen# name or existence of this file are subject to change without notice. Don't 91c8f5fdcSafresh1# use it directly. Use Unicode::UCD to access the Unicode character data 101c8f5fdcSafresh1# base. 11a24d4bc9Ssthen 12a24d4bc9Ssthen 132d7c0476Safresh1 14a24d4bc9Ssthenpackage charnames; 15a24d4bc9Ssthen 16a24d4bc9Ssthen# This module contains machine-generated tables and code for the 17a24d4bc9Ssthen# algorithmically-determinable Unicode character names. The following 18a24d4bc9Ssthen# routines can be used to translate between name and code point and vice versa 19a24d4bc9Ssthen 20a24d4bc9Ssthen{ # Closure 21a24d4bc9Ssthen 22a24d4bc9Ssthen # Matches legal code point. 4-6 hex numbers, If there are 6, the first 23a24d4bc9Ssthen # two must be 10; if there are 5, the first must not be a 0. Written this 24a24d4bc9Ssthen # way to decrease backtracking. The first regex allows the code point to 25a24d4bc9Ssthen # be at the end of a word, but to work properly, the word shouldn't end 26a24d4bc9Ssthen # with a valid hex character. The second one won't match a code point at 27a24d4bc9Ssthen # the end of a word, and doesn't have the run-on issue 28a24d4bc9Ssthen my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b)/; 29a24d4bc9Ssthen my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b))/; 30a24d4bc9Ssthen 31e9ce3842Safresh1 # In the following hash, the keys are the bases of names which include 32e9ce3842Safresh1 # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value 33a24d4bc9Ssthen # of each key is another hash which is used to get the low and high ends 34a24d4bc9Ssthen # for each range of code points that apply to the name. 35a24d4bc9Ssthen my %names_ending_in_code_point = ( 36a24d4bc9Ssthen'CJK COMPATIBILITY IDEOGRAPH' => 37a24d4bc9Ssthen{ 38a24d4bc9Ssthen'high' => 39a24d4bc9Ssthen[ 40a24d4bc9Ssthen64109, 41a24d4bc9Ssthen64217, 42a24d4bc9Ssthen195101, 43a24d4bc9Ssthen], 44a24d4bc9Ssthen'low' => 45a24d4bc9Ssthen[ 46a24d4bc9Ssthen63744, 47a24d4bc9Ssthen64112, 48a24d4bc9Ssthen194560, 49a24d4bc9Ssthen], 50a24d4bc9Ssthen}, 51a24d4bc9Ssthen'CJK UNIFIED IDEOGRAPH' => 52a24d4bc9Ssthen{ 53a24d4bc9Ssthen'high' => 54a24d4bc9Ssthen[ 554957e791Safresh119903, 562d7c0476Safresh140959, 572d7c0476Safresh1173791, 58*fd823929Safresh1177977, 59a24d4bc9Ssthen178205, 60e7543419Safresh1183969, 6181c48ce5Safresh1191456, 624957e791Safresh1201546, 63*fd823929Safresh1205743, 64a24d4bc9Ssthen], 65a24d4bc9Ssthen'low' => 66a24d4bc9Ssthen[ 67a24d4bc9Ssthen13312, 68a24d4bc9Ssthen19968, 69a24d4bc9Ssthen131072, 70a24d4bc9Ssthen173824, 71a24d4bc9Ssthen177984, 72e7543419Safresh1178208, 7381c48ce5Safresh1183984, 744957e791Safresh1196608, 75*fd823929Safresh1201552, 764957e791Safresh1], 774957e791Safresh1}, 784957e791Safresh1'KHITAN SMALL SCRIPT CHARACTER' => 794957e791Safresh1{ 804957e791Safresh1'high' => 814957e791Safresh1[ 824957e791Safresh1101589, 834957e791Safresh1], 844957e791Safresh1'low' => 854957e791Safresh1[ 864957e791Safresh1101120, 8781c48ce5Safresh1], 8881c48ce5Safresh1}, 8981c48ce5Safresh1'NUSHU CHARACTER' => 9081c48ce5Safresh1{ 9181c48ce5Safresh1'high' => 9281c48ce5Safresh1[ 9381c48ce5Safresh1111355, 9481c48ce5Safresh1], 9581c48ce5Safresh1'low' => 9681c48ce5Safresh1[ 9781c48ce5Safresh1110960, 9881c48ce5Safresh1], 9981c48ce5Safresh1}, 10081c48ce5Safresh1'TANGUT IDEOGRAPH' => 10181c48ce5Safresh1{ 10281c48ce5Safresh1'high' => 10381c48ce5Safresh1[ 10423a1f8f6Safresh1100343, 10581c48ce5Safresh1], 10681c48ce5Safresh1'low' => 10781c48ce5Safresh1[ 10881c48ce5Safresh194208, 109a24d4bc9Ssthen], 110a24d4bc9Ssthen}, 1114957e791Safresh1'TANGUT IDEOGRAPH SUPPLEMENT' => 1124957e791Safresh1{ 1134957e791Safresh1'high' => 1144957e791Safresh1[ 1154957e791Safresh1101640, 1164957e791Safresh1], 1174957e791Safresh1'low' => 1184957e791Safresh1[ 1194957e791Safresh1101632, 1204957e791Safresh1], 1214957e791Safresh1}, 122a24d4bc9Ssthen 123a24d4bc9Ssthen ); 124a24d4bc9Ssthen 125a24d4bc9Ssthen # The following hash is a copy of the previous one, except is for loose 126a24d4bc9Ssthen # matching, so each name has blanks and dashes squeezed out 127a24d4bc9Ssthen my %loose_names_ending_in_code_point = ( 128a24d4bc9Ssthen'CJKCOMPATIBILITYIDEOGRAPH' => 129a24d4bc9Ssthen{ 130a24d4bc9Ssthen'high' => 131a24d4bc9Ssthen[ 132a24d4bc9Ssthen64109, 133a24d4bc9Ssthen64217, 134a24d4bc9Ssthen195101, 135a24d4bc9Ssthen], 136a24d4bc9Ssthen'low' => 137a24d4bc9Ssthen[ 138a24d4bc9Ssthen63744, 139a24d4bc9Ssthen64112, 140a24d4bc9Ssthen194560, 141a24d4bc9Ssthen], 142a24d4bc9Ssthen}, 143a24d4bc9Ssthen'CJKUNIFIEDIDEOGRAPH' => 144a24d4bc9Ssthen{ 145a24d4bc9Ssthen'high' => 146a24d4bc9Ssthen[ 1474957e791Safresh119903, 1482d7c0476Safresh140959, 1492d7c0476Safresh1173791, 150*fd823929Safresh1177977, 151a24d4bc9Ssthen178205, 152e7543419Safresh1183969, 15381c48ce5Safresh1191456, 1544957e791Safresh1201546, 155*fd823929Safresh1205743, 156a24d4bc9Ssthen], 157a24d4bc9Ssthen'low' => 158a24d4bc9Ssthen[ 159a24d4bc9Ssthen13312, 160a24d4bc9Ssthen19968, 161a24d4bc9Ssthen131072, 162a24d4bc9Ssthen173824, 163a24d4bc9Ssthen177984, 164e7543419Safresh1178208, 16581c48ce5Safresh1183984, 1664957e791Safresh1196608, 167*fd823929Safresh1201552, 1684957e791Safresh1], 1694957e791Safresh1}, 1704957e791Safresh1'KHITANSMALLSCRIPTCHARACTER' => 1714957e791Safresh1{ 1724957e791Safresh1'high' => 1734957e791Safresh1[ 1744957e791Safresh1101589, 1754957e791Safresh1], 1764957e791Safresh1'low' => 1774957e791Safresh1[ 1784957e791Safresh1101120, 17981c48ce5Safresh1], 18081c48ce5Safresh1}, 18181c48ce5Safresh1'NUSHUCHARACTER' => 18281c48ce5Safresh1{ 18381c48ce5Safresh1'high' => 18481c48ce5Safresh1[ 18581c48ce5Safresh1111355, 18681c48ce5Safresh1], 18781c48ce5Safresh1'low' => 18881c48ce5Safresh1[ 18981c48ce5Safresh1110960, 19081c48ce5Safresh1], 19181c48ce5Safresh1}, 19281c48ce5Safresh1'TANGUTIDEOGRAPH' => 19381c48ce5Safresh1{ 19481c48ce5Safresh1'high' => 19581c48ce5Safresh1[ 19623a1f8f6Safresh1100343, 19781c48ce5Safresh1], 19881c48ce5Safresh1'low' => 19981c48ce5Safresh1[ 20081c48ce5Safresh194208, 201a24d4bc9Ssthen], 202a24d4bc9Ssthen}, 2034957e791Safresh1'TANGUTIDEOGRAPHSUPPLEMENT' => 2044957e791Safresh1{ 2054957e791Safresh1'high' => 2064957e791Safresh1[ 2074957e791Safresh1101640, 2084957e791Safresh1], 2094957e791Safresh1'low' => 2104957e791Safresh1[ 2114957e791Safresh1101632, 2124957e791Safresh1], 2134957e791Safresh1}, 214a24d4bc9Ssthen 215a24d4bc9Ssthen ); 216a24d4bc9Ssthen 217a24d4bc9Ssthen # And the following array gives the inverse mapping from code points to 218a24d4bc9Ssthen # names. Lowest code points are first 2194957e791Safresh1 @code_points_ending_in_code_point = ( 220a24d4bc9Ssthen 221a24d4bc9Ssthen{ 2224957e791Safresh1'high' => 19903, 2234957e791Safresh1'legal' => 2244957e791Safresh1' 2254957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 226a24d4bc9Ssthen'low' => 13312, 227a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH', 228a24d4bc9Ssthen}, 229a24d4bc9Ssthen{ 2302d7c0476Safresh1'high' => 40959, 2314957e791Safresh1'legal' => 2324957e791Safresh1' 2334957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 234a24d4bc9Ssthen'low' => 19968, 235a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH', 236a24d4bc9Ssthen}, 237a24d4bc9Ssthen{ 238a24d4bc9Ssthen'high' => 64109, 2394957e791Safresh1'legal' => 2404957e791Safresh1' 2414957e791Safresh1 -0123456789ABCDEFGHIJKLMOPRTY', 242a24d4bc9Ssthen'low' => 63744, 243a24d4bc9Ssthen'name' => 'CJK COMPATIBILITY IDEOGRAPH', 244a24d4bc9Ssthen}, 245a24d4bc9Ssthen{ 246a24d4bc9Ssthen'high' => 64217, 2474957e791Safresh1'legal' => 2484957e791Safresh1' 2494957e791Safresh1 -0123456789ABCDEFGHIJKLMOPRTY', 250a24d4bc9Ssthen'low' => 64112, 251a24d4bc9Ssthen'name' => 'CJK COMPATIBILITY IDEOGRAPH', 252a24d4bc9Ssthen}, 253a24d4bc9Ssthen{ 25423a1f8f6Safresh1'high' => 100343, 2554957e791Safresh1'legal' => 2564957e791Safresh1' 2574957e791Safresh1 -0123456789ABCDEFGHINOPRTU', 25881c48ce5Safresh1'low' => 94208, 25981c48ce5Safresh1'name' => 'TANGUT IDEOGRAPH', 26081c48ce5Safresh1}, 26181c48ce5Safresh1{ 2624957e791Safresh1'high' => 101589, 2634957e791Safresh1'legal' => 2644957e791Safresh1' 2654957e791Safresh1 -0123456789ABCDEFHIKLMNPRST', 2664957e791Safresh1'low' => 101120, 2674957e791Safresh1'name' => 'KHITAN SMALL SCRIPT CHARACTER', 2684957e791Safresh1}, 2694957e791Safresh1{ 2704957e791Safresh1'high' => 101640, 2714957e791Safresh1'legal' => 2724957e791Safresh1' 2734957e791Safresh1 -0123456789ABCDEFGHILMNOPRSTU', 2744957e791Safresh1'low' => 101632, 2754957e791Safresh1'name' => 'TANGUT IDEOGRAPH SUPPLEMENT', 2764957e791Safresh1}, 2774957e791Safresh1{ 27881c48ce5Safresh1'high' => 111355, 2794957e791Safresh1'legal' => 2804957e791Safresh1' 2814957e791Safresh1 -0123456789ABCDEFHNRSTU', 28281c48ce5Safresh1'low' => 110960, 28381c48ce5Safresh1'name' => 'NUSHU CHARACTER', 28481c48ce5Safresh1}, 28581c48ce5Safresh1{ 2862d7c0476Safresh1'high' => 173791, 2874957e791Safresh1'legal' => 2884957e791Safresh1' 2894957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 290a24d4bc9Ssthen'low' => 131072, 291a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH', 292a24d4bc9Ssthen}, 293a24d4bc9Ssthen{ 294*fd823929Safresh1'high' => 177977, 2954957e791Safresh1'legal' => 2964957e791Safresh1' 2974957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 298a24d4bc9Ssthen'low' => 173824, 299a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH', 300a24d4bc9Ssthen}, 301a24d4bc9Ssthen{ 302a24d4bc9Ssthen'high' => 178205, 3034957e791Safresh1'legal' => 3044957e791Safresh1' 3054957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 306a24d4bc9Ssthen'low' => 177984, 307a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH', 308a24d4bc9Ssthen}, 309a24d4bc9Ssthen{ 310e7543419Safresh1'high' => 183969, 3114957e791Safresh1'legal' => 3124957e791Safresh1' 3134957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 314e7543419Safresh1'low' => 178208, 315e7543419Safresh1'name' => 'CJK UNIFIED IDEOGRAPH', 316e7543419Safresh1}, 317e7543419Safresh1{ 31881c48ce5Safresh1'high' => 191456, 3194957e791Safresh1'legal' => 3204957e791Safresh1' 3214957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 32281c48ce5Safresh1'low' => 183984, 32381c48ce5Safresh1'name' => 'CJK UNIFIED IDEOGRAPH', 32481c48ce5Safresh1}, 32581c48ce5Safresh1{ 326a24d4bc9Ssthen'high' => 195101, 3274957e791Safresh1'legal' => 3284957e791Safresh1' 3294957e791Safresh1 -0123456789ABCDEFGHIJKLMOPRTY', 330a24d4bc9Ssthen'low' => 194560, 331a24d4bc9Ssthen'name' => 'CJK COMPATIBILITY IDEOGRAPH', 332a24d4bc9Ssthen}, 3334957e791Safresh1{ 3344957e791Safresh1'high' => 201546, 3354957e791Safresh1'legal' => 3364957e791Safresh1' 3374957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU', 3384957e791Safresh1'low' => 196608, 3394957e791Safresh1'name' => 'CJK UNIFIED IDEOGRAPH', 3404957e791Safresh1}, 341*fd823929Safresh1{ 342*fd823929Safresh1'high' => 205743, 343*fd823929Safresh1'legal' => 344*fd823929Safresh1' 345*fd823929Safresh1 -0123456789ABCDEFGHIJKNOPRU', 346*fd823929Safresh1'low' => 201552, 347*fd823929Safresh1'name' => 'CJK UNIFIED IDEOGRAPH', 348*fd823929Safresh1}, 349a24d4bc9Ssthen, 350a24d4bc9Ssthen 351a24d4bc9Ssthen ); 352a24d4bc9Ssthen 3534957e791Safresh1 # Is exportable, make read-only 3544957e791Safresh1 Internals::SvREADONLY(@code_points_ending_in_code_point, 1); 3554957e791Safresh1 356a24d4bc9Ssthen # Convert from code point to Jamo short name for use in composing Hangul 357a24d4bc9Ssthen # syllable names 358a24d4bc9Ssthen my %Jamo = ( 359a24d4bc9Ssthen4352 => 'G', 360a24d4bc9Ssthen4353 => 'GG', 361a24d4bc9Ssthen4354 => 'N', 362a24d4bc9Ssthen4355 => 'D', 363a24d4bc9Ssthen4356 => 'DD', 364a24d4bc9Ssthen4357 => 'R', 365a24d4bc9Ssthen4358 => 'M', 366a24d4bc9Ssthen4359 => 'B', 367a24d4bc9Ssthen4360 => 'BB', 368a24d4bc9Ssthen4361 => 'S', 369a24d4bc9Ssthen4362 => 'SS', 370a24d4bc9Ssthen4363 => '', 371a24d4bc9Ssthen4364 => 'J', 372a24d4bc9Ssthen4365 => 'JJ', 373a24d4bc9Ssthen4366 => 'C', 374a24d4bc9Ssthen4367 => 'K', 375a24d4bc9Ssthen4368 => 'T', 376a24d4bc9Ssthen4369 => 'P', 377a24d4bc9Ssthen4370 => 'H', 378a24d4bc9Ssthen4449 => 'A', 379a24d4bc9Ssthen4450 => 'AE', 380a24d4bc9Ssthen4451 => 'YA', 381a24d4bc9Ssthen4452 => 'YAE', 382a24d4bc9Ssthen4453 => 'EO', 383a24d4bc9Ssthen4454 => 'E', 384a24d4bc9Ssthen4455 => 'YEO', 385a24d4bc9Ssthen4456 => 'YE', 386a24d4bc9Ssthen4457 => 'O', 387a24d4bc9Ssthen4458 => 'WA', 388a24d4bc9Ssthen4459 => 'WAE', 389a24d4bc9Ssthen4460 => 'OE', 390a24d4bc9Ssthen4461 => 'YO', 391a24d4bc9Ssthen4462 => 'U', 392a24d4bc9Ssthen4463 => 'WEO', 393a24d4bc9Ssthen4464 => 'WE', 394a24d4bc9Ssthen4465 => 'WI', 395a24d4bc9Ssthen4466 => 'YU', 396a24d4bc9Ssthen4467 => 'EU', 397a24d4bc9Ssthen4468 => 'YI', 398a24d4bc9Ssthen4469 => 'I', 399a24d4bc9Ssthen4520 => 'G', 400a24d4bc9Ssthen4521 => 'GG', 401a24d4bc9Ssthen4522 => 'GS', 402a24d4bc9Ssthen4523 => 'N', 403a24d4bc9Ssthen4524 => 'NJ', 404a24d4bc9Ssthen4525 => 'NH', 405a24d4bc9Ssthen4526 => 'D', 406a24d4bc9Ssthen4527 => 'L', 407a24d4bc9Ssthen4528 => 'LG', 408a24d4bc9Ssthen4529 => 'LM', 409a24d4bc9Ssthen4530 => 'LB', 410a24d4bc9Ssthen4531 => 'LS', 411a24d4bc9Ssthen4532 => 'LT', 412a24d4bc9Ssthen4533 => 'LP', 413a24d4bc9Ssthen4534 => 'LH', 414a24d4bc9Ssthen4535 => 'M', 415a24d4bc9Ssthen4536 => 'B', 416a24d4bc9Ssthen4537 => 'BS', 417a24d4bc9Ssthen4538 => 'S', 418a24d4bc9Ssthen4539 => 'SS', 419a24d4bc9Ssthen4540 => 'NG', 420a24d4bc9Ssthen4541 => 'J', 421a24d4bc9Ssthen4542 => 'C', 422a24d4bc9Ssthen4543 => 'K', 423a24d4bc9Ssthen4544 => 'T', 424a24d4bc9Ssthen4545 => 'P', 425a24d4bc9Ssthen4546 => 'H', 426a24d4bc9Ssthen 427a24d4bc9Ssthen ); 428a24d4bc9Ssthen 429a24d4bc9Ssthen # Leading consonant (can be null) 430a24d4bc9Ssthen my %Jamo_L = ( 431a24d4bc9Ssthen'' => 11, 432a24d4bc9Ssthen'B' => 7, 433a24d4bc9Ssthen'BB' => 8, 434a24d4bc9Ssthen'C' => 14, 435a24d4bc9Ssthen'D' => 3, 436a24d4bc9Ssthen'DD' => 4, 437a24d4bc9Ssthen'G' => 0, 438a24d4bc9Ssthen'GG' => 1, 439a24d4bc9Ssthen'H' => 18, 440a24d4bc9Ssthen'J' => 12, 441a24d4bc9Ssthen'JJ' => 13, 442a24d4bc9Ssthen'K' => 15, 443a24d4bc9Ssthen'M' => 6, 444a24d4bc9Ssthen'N' => 2, 445a24d4bc9Ssthen'P' => 17, 446a24d4bc9Ssthen'R' => 5, 447a24d4bc9Ssthen'S' => 9, 448a24d4bc9Ssthen'SS' => 10, 449a24d4bc9Ssthen'T' => 16, 450a24d4bc9Ssthen 451a24d4bc9Ssthen ); 452a24d4bc9Ssthen 453a24d4bc9Ssthen # Vowel 454a24d4bc9Ssthen my %Jamo_V = ( 455a24d4bc9Ssthen'A' => 0, 456a24d4bc9Ssthen'AE' => 1, 457a24d4bc9Ssthen'E' => 5, 458a24d4bc9Ssthen'EO' => 4, 459a24d4bc9Ssthen'EU' => 18, 460a24d4bc9Ssthen'I' => 20, 461a24d4bc9Ssthen'O' => 8, 462a24d4bc9Ssthen'OE' => 11, 463a24d4bc9Ssthen'U' => 13, 464a24d4bc9Ssthen'WA' => 9, 465a24d4bc9Ssthen'WAE' => 10, 466a24d4bc9Ssthen'WE' => 15, 467a24d4bc9Ssthen'WEO' => 14, 468a24d4bc9Ssthen'WI' => 16, 469a24d4bc9Ssthen'YA' => 2, 470a24d4bc9Ssthen'YAE' => 3, 471a24d4bc9Ssthen'YE' => 7, 472a24d4bc9Ssthen'YEO' => 6, 473a24d4bc9Ssthen'YI' => 19, 474a24d4bc9Ssthen'YO' => 12, 475a24d4bc9Ssthen'YU' => 17, 476a24d4bc9Ssthen 477a24d4bc9Ssthen ); 478a24d4bc9Ssthen 479a24d4bc9Ssthen # Optional trailing consonant 480a24d4bc9Ssthen my %Jamo_T = ( 481a24d4bc9Ssthen'B' => 17, 482a24d4bc9Ssthen'BS' => 18, 483a24d4bc9Ssthen'C' => 23, 484a24d4bc9Ssthen'D' => 7, 485a24d4bc9Ssthen'G' => 1, 486a24d4bc9Ssthen'GG' => 2, 487a24d4bc9Ssthen'GS' => 3, 488a24d4bc9Ssthen'H' => 27, 489a24d4bc9Ssthen'J' => 22, 490a24d4bc9Ssthen'K' => 24, 491a24d4bc9Ssthen'L' => 8, 492a24d4bc9Ssthen'LB' => 11, 493a24d4bc9Ssthen'LG' => 9, 494a24d4bc9Ssthen'LH' => 15, 495a24d4bc9Ssthen'LM' => 10, 496a24d4bc9Ssthen'LP' => 14, 497a24d4bc9Ssthen'LS' => 12, 498a24d4bc9Ssthen'LT' => 13, 499a24d4bc9Ssthen'M' => 16, 500a24d4bc9Ssthen'N' => 4, 501a24d4bc9Ssthen'NG' => 21, 502a24d4bc9Ssthen'NH' => 6, 503a24d4bc9Ssthen'NJ' => 5, 504a24d4bc9Ssthen'P' => 26, 505a24d4bc9Ssthen'S' => 19, 506a24d4bc9Ssthen'SS' => 20, 507a24d4bc9Ssthen'T' => 25, 508a24d4bc9Ssthen 509a24d4bc9Ssthen ); 510a24d4bc9Ssthen 511a24d4bc9Ssthen # Computed re that splits up a Hangul name into LVT or LV syllables 512a24d4bc9Ssthen my $syllable_re = qr/(|B|BB|C|D|DD|G|GG|H|J|JJ|K|M|N|P|R|S|SS|T)(A|AE|E|EO|EU|I|O|OE|U|WA|WAE|WE|WEO|WI|YA|YAE|YE|YEO|YI|YO|YU)(B|BS|C|D|G|GG|GS|H|J|K|L|LB|LG|LH|LM|LP|LS|LT|M|N|NG|NH|NJ|P|S|SS|T)?/; 513a24d4bc9Ssthen 514a24d4bc9Ssthen my $HANGUL_SYLLABLE = "HANGUL SYLLABLE "; 515a24d4bc9Ssthen my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE"; 516a24d4bc9Ssthen 517a24d4bc9Ssthen # These constants names and values were taken from the Unicode standard, 518a24d4bc9Ssthen # version 5.1, section 3.12. They are used in conjunction with Hangul 519a24d4bc9Ssthen # syllables 520a24d4bc9Ssthen my $SBase = 0xAC00; 521a24d4bc9Ssthen my $LBase = 0x1100; 522a24d4bc9Ssthen my $VBase = 0x1161; 523a24d4bc9Ssthen my $TBase = 0x11A7; 524a24d4bc9Ssthen my $SCount = 11172; 525a24d4bc9Ssthen my $LCount = 19; 526a24d4bc9Ssthen my $VCount = 21; 527a24d4bc9Ssthen my $TCount = 28; 528a24d4bc9Ssthen my $NCount = $VCount * $TCount; 529a24d4bc9Ssthen 530a24d4bc9Ssthen sub name_to_code_point_special { 531a24d4bc9Ssthen my ($name, $loose) = @_; 532a24d4bc9Ssthen 533a24d4bc9Ssthen # Returns undef if not one of the specially handled names; otherwise 534a24d4bc9Ssthen # returns the code point equivalent to the input name 535a24d4bc9Ssthen # $loose is non-zero if to use loose matching, 'name' in that case 536a24d4bc9Ssthen # must be input as upper case with all blanks and dashes squeezed out. 537a24d4bc9Ssthen 538a24d4bc9Ssthen if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//) 539a24d4bc9Ssthen || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//)) 540a24d4bc9Ssthen { 541a24d4bc9Ssthen return if $name !~ qr/^$syllable_re$/; 542a24d4bc9Ssthen my $L = $Jamo_L{$1}; 543a24d4bc9Ssthen my $V = $Jamo_V{$2}; 544a24d4bc9Ssthen my $T = (defined $3) ? $Jamo_T{$3} : 0; 545a24d4bc9Ssthen return ($L * $VCount + $V) * $TCount + $T + $SBase; 546a24d4bc9Ssthen } 547a24d4bc9Ssthen 548a24d4bc9Ssthen # Name must end in 'code_point' for this to handle. 549a24d4bc9Ssthen return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x) 550a24d4bc9Ssthen || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x)); 551a24d4bc9Ssthen 552a24d4bc9Ssthen my $base = $1; 553a24d4bc9Ssthen my $code_point = CORE::hex $2; 554a24d4bc9Ssthen my $names_ref; 555a24d4bc9Ssthen 556a24d4bc9Ssthen if ($loose) { 557a24d4bc9Ssthen $names_ref = \%loose_names_ending_in_code_point; 558a24d4bc9Ssthen } 559a24d4bc9Ssthen else { 560a24d4bc9Ssthen return if $base !~ s/-$//; 561a24d4bc9Ssthen $names_ref = \%names_ending_in_code_point; 562a24d4bc9Ssthen } 563a24d4bc9Ssthen 564a24d4bc9Ssthen # Name must be one of the ones which has the code point in it. 565a24d4bc9Ssthen return if ! $names_ref->{$base}; 566a24d4bc9Ssthen 567a24d4bc9Ssthen # Look through the list of ranges that apply to this name to see if 568a24d4bc9Ssthen # the code point is in one of them. 569a24d4bc9Ssthen for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) { 570a24d4bc9Ssthen return if $names_ref->{$base}{'low'}->[$i] > $code_point; 571a24d4bc9Ssthen next if $names_ref->{$base}{'high'}->[$i] < $code_point; 572a24d4bc9Ssthen 573a24d4bc9Ssthen # Here, the code point is in the range. 574a24d4bc9Ssthen return $code_point; 575a24d4bc9Ssthen } 576a24d4bc9Ssthen 577a24d4bc9Ssthen # Here, looked like the name had a code point number in it, but 578a24d4bc9Ssthen # did not match one of the valid ones. 579a24d4bc9Ssthen return; 580a24d4bc9Ssthen } 581a24d4bc9Ssthen 582a24d4bc9Ssthen sub code_point_to_name_special { 583a24d4bc9Ssthen my $code_point = shift; 584a24d4bc9Ssthen 585a24d4bc9Ssthen # Returns the name of a code point if algorithmically determinable; 586a24d4bc9Ssthen # undef if not 587a24d4bc9Ssthen 588a24d4bc9Ssthen # If in the Hangul range, calculate the name based on Unicode's 589a24d4bc9Ssthen # algorithm 590a24d4bc9Ssthen if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) { 591a24d4bc9Ssthen use integer; 592a24d4bc9Ssthen my $SIndex = $code_point - $SBase; 593a24d4bc9Ssthen my $L = $LBase + $SIndex / $NCount; 594a24d4bc9Ssthen my $V = $VBase + ($SIndex % $NCount) / $TCount; 595a24d4bc9Ssthen my $T = $TBase + $SIndex % $TCount; 596a24d4bc9Ssthen $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}"; 597a24d4bc9Ssthen $name .= $Jamo{$T} if $T != $TBase; 598a24d4bc9Ssthen return $name; 599a24d4bc9Ssthen } 600a24d4bc9Ssthen 601a24d4bc9Ssthen # Look through list of these code points for one in range. 602a24d4bc9Ssthen foreach my $hash (@code_points_ending_in_code_point) { 603a24d4bc9Ssthen return if $code_point < $hash->{'low'}; 604a24d4bc9Ssthen if ($code_point <= $hash->{'high'}) { 605a24d4bc9Ssthen return sprintf("%s-%04X", $hash->{'name'}, $code_point); 606a24d4bc9Ssthen } 607a24d4bc9Ssthen } 608a24d4bc9Ssthen return; # None found 609a24d4bc9Ssthen } 610a24d4bc9Ssthen} # End closure 611a24d4bc9Ssthen 612a24d4bc9Ssthen1; 613