xref: /openbsd/gnu/usr.bin/perl/lib/unicore/Name.pm (revision fd823929)
1a24d4bc9Ssthen# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
2a24d4bc9Ssthen# This file is machine-generated by lib/unicore/mktables from the Unicode
3*fd823929Safresh1# database, Version 15.0.0.  Any changes made here will be lost!
4a24d4bc9Ssthen
5a24d4bc9Ssthen
6a24d4bc9Ssthen# !!!!!!!   INTERNAL PERL USE ONLY   !!!!!!!
7a24d4bc9Ssthen# This file is for internal use by core Perl only.  The format and even the
8a24d4bc9Ssthen# name or existence of this file are subject to change without notice.  Don't
91c8f5fdcSafresh1# use it directly.  Use Unicode::UCD to access the Unicode character data
101c8f5fdcSafresh1# base.
11a24d4bc9Ssthen
12a24d4bc9Ssthen
132d7c0476Safresh1
14a24d4bc9Ssthenpackage charnames;
15a24d4bc9Ssthen
16a24d4bc9Ssthen# This module contains machine-generated tables and code for the
17a24d4bc9Ssthen# algorithmically-determinable Unicode character names.  The following
18a24d4bc9Ssthen# routines can be used to translate between name and code point and vice versa
19a24d4bc9Ssthen
20a24d4bc9Ssthen{ # Closure
21a24d4bc9Ssthen
22a24d4bc9Ssthen    # Matches legal code point.  4-6 hex numbers, If there are 6, the first
23a24d4bc9Ssthen    # two must be 10; if there are 5, the first must not be a 0.  Written this
24a24d4bc9Ssthen    # way to decrease backtracking.  The first regex allows the code point to
25a24d4bc9Ssthen    # be at the end of a word, but to work properly, the word shouldn't end
26a24d4bc9Ssthen    # with a valid hex character.  The second one won't match a code point at
27a24d4bc9Ssthen    # the end of a word, and doesn't have the run-on issue
28a24d4bc9Ssthen    my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b)/;
29a24d4bc9Ssthen    my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b))/;
30a24d4bc9Ssthen
31e9ce3842Safresh1    # In the following hash, the keys are the bases of names which include
32e9ce3842Safresh1    # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01.  The value
33a24d4bc9Ssthen    # of each key is another hash which is used to get the low and high ends
34a24d4bc9Ssthen    # for each range of code points that apply to the name.
35a24d4bc9Ssthen    my %names_ending_in_code_point = (
36a24d4bc9Ssthen'CJK COMPATIBILITY IDEOGRAPH' =>
37a24d4bc9Ssthen{
38a24d4bc9Ssthen'high' =>
39a24d4bc9Ssthen[
40a24d4bc9Ssthen64109,
41a24d4bc9Ssthen64217,
42a24d4bc9Ssthen195101,
43a24d4bc9Ssthen],
44a24d4bc9Ssthen'low' =>
45a24d4bc9Ssthen[
46a24d4bc9Ssthen63744,
47a24d4bc9Ssthen64112,
48a24d4bc9Ssthen194560,
49a24d4bc9Ssthen],
50a24d4bc9Ssthen},
51a24d4bc9Ssthen'CJK UNIFIED IDEOGRAPH' =>
52a24d4bc9Ssthen{
53a24d4bc9Ssthen'high' =>
54a24d4bc9Ssthen[
554957e791Safresh119903,
562d7c0476Safresh140959,
572d7c0476Safresh1173791,
58*fd823929Safresh1177977,
59a24d4bc9Ssthen178205,
60e7543419Safresh1183969,
6181c48ce5Safresh1191456,
624957e791Safresh1201546,
63*fd823929Safresh1205743,
64a24d4bc9Ssthen],
65a24d4bc9Ssthen'low' =>
66a24d4bc9Ssthen[
67a24d4bc9Ssthen13312,
68a24d4bc9Ssthen19968,
69a24d4bc9Ssthen131072,
70a24d4bc9Ssthen173824,
71a24d4bc9Ssthen177984,
72e7543419Safresh1178208,
7381c48ce5Safresh1183984,
744957e791Safresh1196608,
75*fd823929Safresh1201552,
764957e791Safresh1],
774957e791Safresh1},
784957e791Safresh1'KHITAN SMALL SCRIPT CHARACTER' =>
794957e791Safresh1{
804957e791Safresh1'high' =>
814957e791Safresh1[
824957e791Safresh1101589,
834957e791Safresh1],
844957e791Safresh1'low' =>
854957e791Safresh1[
864957e791Safresh1101120,
8781c48ce5Safresh1],
8881c48ce5Safresh1},
8981c48ce5Safresh1'NUSHU CHARACTER' =>
9081c48ce5Safresh1{
9181c48ce5Safresh1'high' =>
9281c48ce5Safresh1[
9381c48ce5Safresh1111355,
9481c48ce5Safresh1],
9581c48ce5Safresh1'low' =>
9681c48ce5Safresh1[
9781c48ce5Safresh1110960,
9881c48ce5Safresh1],
9981c48ce5Safresh1},
10081c48ce5Safresh1'TANGUT IDEOGRAPH' =>
10181c48ce5Safresh1{
10281c48ce5Safresh1'high' =>
10381c48ce5Safresh1[
10423a1f8f6Safresh1100343,
10581c48ce5Safresh1],
10681c48ce5Safresh1'low' =>
10781c48ce5Safresh1[
10881c48ce5Safresh194208,
109a24d4bc9Ssthen],
110a24d4bc9Ssthen},
1114957e791Safresh1'TANGUT IDEOGRAPH SUPPLEMENT' =>
1124957e791Safresh1{
1134957e791Safresh1'high' =>
1144957e791Safresh1[
1154957e791Safresh1101640,
1164957e791Safresh1],
1174957e791Safresh1'low' =>
1184957e791Safresh1[
1194957e791Safresh1101632,
1204957e791Safresh1],
1214957e791Safresh1},
122a24d4bc9Ssthen
123a24d4bc9Ssthen    );
124a24d4bc9Ssthen
125a24d4bc9Ssthen    # The following hash is a copy of the previous one, except is for loose
126a24d4bc9Ssthen    # matching, so each name has blanks and dashes squeezed out
127a24d4bc9Ssthen    my %loose_names_ending_in_code_point = (
128a24d4bc9Ssthen'CJKCOMPATIBILITYIDEOGRAPH' =>
129a24d4bc9Ssthen{
130a24d4bc9Ssthen'high' =>
131a24d4bc9Ssthen[
132a24d4bc9Ssthen64109,
133a24d4bc9Ssthen64217,
134a24d4bc9Ssthen195101,
135a24d4bc9Ssthen],
136a24d4bc9Ssthen'low' =>
137a24d4bc9Ssthen[
138a24d4bc9Ssthen63744,
139a24d4bc9Ssthen64112,
140a24d4bc9Ssthen194560,
141a24d4bc9Ssthen],
142a24d4bc9Ssthen},
143a24d4bc9Ssthen'CJKUNIFIEDIDEOGRAPH' =>
144a24d4bc9Ssthen{
145a24d4bc9Ssthen'high' =>
146a24d4bc9Ssthen[
1474957e791Safresh119903,
1482d7c0476Safresh140959,
1492d7c0476Safresh1173791,
150*fd823929Safresh1177977,
151a24d4bc9Ssthen178205,
152e7543419Safresh1183969,
15381c48ce5Safresh1191456,
1544957e791Safresh1201546,
155*fd823929Safresh1205743,
156a24d4bc9Ssthen],
157a24d4bc9Ssthen'low' =>
158a24d4bc9Ssthen[
159a24d4bc9Ssthen13312,
160a24d4bc9Ssthen19968,
161a24d4bc9Ssthen131072,
162a24d4bc9Ssthen173824,
163a24d4bc9Ssthen177984,
164e7543419Safresh1178208,
16581c48ce5Safresh1183984,
1664957e791Safresh1196608,
167*fd823929Safresh1201552,
1684957e791Safresh1],
1694957e791Safresh1},
1704957e791Safresh1'KHITANSMALLSCRIPTCHARACTER' =>
1714957e791Safresh1{
1724957e791Safresh1'high' =>
1734957e791Safresh1[
1744957e791Safresh1101589,
1754957e791Safresh1],
1764957e791Safresh1'low' =>
1774957e791Safresh1[
1784957e791Safresh1101120,
17981c48ce5Safresh1],
18081c48ce5Safresh1},
18181c48ce5Safresh1'NUSHUCHARACTER' =>
18281c48ce5Safresh1{
18381c48ce5Safresh1'high' =>
18481c48ce5Safresh1[
18581c48ce5Safresh1111355,
18681c48ce5Safresh1],
18781c48ce5Safresh1'low' =>
18881c48ce5Safresh1[
18981c48ce5Safresh1110960,
19081c48ce5Safresh1],
19181c48ce5Safresh1},
19281c48ce5Safresh1'TANGUTIDEOGRAPH' =>
19381c48ce5Safresh1{
19481c48ce5Safresh1'high' =>
19581c48ce5Safresh1[
19623a1f8f6Safresh1100343,
19781c48ce5Safresh1],
19881c48ce5Safresh1'low' =>
19981c48ce5Safresh1[
20081c48ce5Safresh194208,
201a24d4bc9Ssthen],
202a24d4bc9Ssthen},
2034957e791Safresh1'TANGUTIDEOGRAPHSUPPLEMENT' =>
2044957e791Safresh1{
2054957e791Safresh1'high' =>
2064957e791Safresh1[
2074957e791Safresh1101640,
2084957e791Safresh1],
2094957e791Safresh1'low' =>
2104957e791Safresh1[
2114957e791Safresh1101632,
2124957e791Safresh1],
2134957e791Safresh1},
214a24d4bc9Ssthen
215a24d4bc9Ssthen    );
216a24d4bc9Ssthen
217a24d4bc9Ssthen    # And the following array gives the inverse mapping from code points to
218a24d4bc9Ssthen    # names.  Lowest code points are first
2194957e791Safresh1    @code_points_ending_in_code_point = (
220a24d4bc9Ssthen
221a24d4bc9Ssthen{
2224957e791Safresh1'high' => 19903,
2234957e791Safresh1'legal' =>
2244957e791Safresh1'
2254957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
226a24d4bc9Ssthen'low' => 13312,
227a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH',
228a24d4bc9Ssthen},
229a24d4bc9Ssthen{
2302d7c0476Safresh1'high' => 40959,
2314957e791Safresh1'legal' =>
2324957e791Safresh1'
2334957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
234a24d4bc9Ssthen'low' => 19968,
235a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH',
236a24d4bc9Ssthen},
237a24d4bc9Ssthen{
238a24d4bc9Ssthen'high' => 64109,
2394957e791Safresh1'legal' =>
2404957e791Safresh1'
2414957e791Safresh1 -0123456789ABCDEFGHIJKLMOPRTY',
242a24d4bc9Ssthen'low' => 63744,
243a24d4bc9Ssthen'name' => 'CJK COMPATIBILITY IDEOGRAPH',
244a24d4bc9Ssthen},
245a24d4bc9Ssthen{
246a24d4bc9Ssthen'high' => 64217,
2474957e791Safresh1'legal' =>
2484957e791Safresh1'
2494957e791Safresh1 -0123456789ABCDEFGHIJKLMOPRTY',
250a24d4bc9Ssthen'low' => 64112,
251a24d4bc9Ssthen'name' => 'CJK COMPATIBILITY IDEOGRAPH',
252a24d4bc9Ssthen},
253a24d4bc9Ssthen{
25423a1f8f6Safresh1'high' => 100343,
2554957e791Safresh1'legal' =>
2564957e791Safresh1'
2574957e791Safresh1 -0123456789ABCDEFGHINOPRTU',
25881c48ce5Safresh1'low' => 94208,
25981c48ce5Safresh1'name' => 'TANGUT IDEOGRAPH',
26081c48ce5Safresh1},
26181c48ce5Safresh1{
2624957e791Safresh1'high' => 101589,
2634957e791Safresh1'legal' =>
2644957e791Safresh1'
2654957e791Safresh1 -0123456789ABCDEFHIKLMNPRST',
2664957e791Safresh1'low' => 101120,
2674957e791Safresh1'name' => 'KHITAN SMALL SCRIPT CHARACTER',
2684957e791Safresh1},
2694957e791Safresh1{
2704957e791Safresh1'high' => 101640,
2714957e791Safresh1'legal' =>
2724957e791Safresh1'
2734957e791Safresh1 -0123456789ABCDEFGHILMNOPRSTU',
2744957e791Safresh1'low' => 101632,
2754957e791Safresh1'name' => 'TANGUT IDEOGRAPH SUPPLEMENT',
2764957e791Safresh1},
2774957e791Safresh1{
27881c48ce5Safresh1'high' => 111355,
2794957e791Safresh1'legal' =>
2804957e791Safresh1'
2814957e791Safresh1 -0123456789ABCDEFHNRSTU',
28281c48ce5Safresh1'low' => 110960,
28381c48ce5Safresh1'name' => 'NUSHU CHARACTER',
28481c48ce5Safresh1},
28581c48ce5Safresh1{
2862d7c0476Safresh1'high' => 173791,
2874957e791Safresh1'legal' =>
2884957e791Safresh1'
2894957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
290a24d4bc9Ssthen'low' => 131072,
291a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH',
292a24d4bc9Ssthen},
293a24d4bc9Ssthen{
294*fd823929Safresh1'high' => 177977,
2954957e791Safresh1'legal' =>
2964957e791Safresh1'
2974957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
298a24d4bc9Ssthen'low' => 173824,
299a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH',
300a24d4bc9Ssthen},
301a24d4bc9Ssthen{
302a24d4bc9Ssthen'high' => 178205,
3034957e791Safresh1'legal' =>
3044957e791Safresh1'
3054957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
306a24d4bc9Ssthen'low' => 177984,
307a24d4bc9Ssthen'name' => 'CJK UNIFIED IDEOGRAPH',
308a24d4bc9Ssthen},
309a24d4bc9Ssthen{
310e7543419Safresh1'high' => 183969,
3114957e791Safresh1'legal' =>
3124957e791Safresh1'
3134957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
314e7543419Safresh1'low' => 178208,
315e7543419Safresh1'name' => 'CJK UNIFIED IDEOGRAPH',
316e7543419Safresh1},
317e7543419Safresh1{
31881c48ce5Safresh1'high' => 191456,
3194957e791Safresh1'legal' =>
3204957e791Safresh1'
3214957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
32281c48ce5Safresh1'low' => 183984,
32381c48ce5Safresh1'name' => 'CJK UNIFIED IDEOGRAPH',
32481c48ce5Safresh1},
32581c48ce5Safresh1{
326a24d4bc9Ssthen'high' => 195101,
3274957e791Safresh1'legal' =>
3284957e791Safresh1'
3294957e791Safresh1 -0123456789ABCDEFGHIJKLMOPRTY',
330a24d4bc9Ssthen'low' => 194560,
331a24d4bc9Ssthen'name' => 'CJK COMPATIBILITY IDEOGRAPH',
332a24d4bc9Ssthen},
3334957e791Safresh1{
3344957e791Safresh1'high' => 201546,
3354957e791Safresh1'legal' =>
3364957e791Safresh1'
3374957e791Safresh1 -0123456789ABCDEFGHIJKNOPRU',
3384957e791Safresh1'low' => 196608,
3394957e791Safresh1'name' => 'CJK UNIFIED IDEOGRAPH',
3404957e791Safresh1},
341*fd823929Safresh1{
342*fd823929Safresh1'high' => 205743,
343*fd823929Safresh1'legal' =>
344*fd823929Safresh1'
345*fd823929Safresh1 -0123456789ABCDEFGHIJKNOPRU',
346*fd823929Safresh1'low' => 201552,
347*fd823929Safresh1'name' => 'CJK UNIFIED IDEOGRAPH',
348*fd823929Safresh1},
349a24d4bc9Ssthen,
350a24d4bc9Ssthen
351a24d4bc9Ssthen    );
352a24d4bc9Ssthen
3534957e791Safresh1    # Is exportable, make read-only
3544957e791Safresh1    Internals::SvREADONLY(@code_points_ending_in_code_point, 1);
3554957e791Safresh1
356a24d4bc9Ssthen    # Convert from code point to Jamo short name for use in composing Hangul
357a24d4bc9Ssthen    # syllable names
358a24d4bc9Ssthen    my %Jamo = (
359a24d4bc9Ssthen4352 => 'G',
360a24d4bc9Ssthen4353 => 'GG',
361a24d4bc9Ssthen4354 => 'N',
362a24d4bc9Ssthen4355 => 'D',
363a24d4bc9Ssthen4356 => 'DD',
364a24d4bc9Ssthen4357 => 'R',
365a24d4bc9Ssthen4358 => 'M',
366a24d4bc9Ssthen4359 => 'B',
367a24d4bc9Ssthen4360 => 'BB',
368a24d4bc9Ssthen4361 => 'S',
369a24d4bc9Ssthen4362 => 'SS',
370a24d4bc9Ssthen4363 => '',
371a24d4bc9Ssthen4364 => 'J',
372a24d4bc9Ssthen4365 => 'JJ',
373a24d4bc9Ssthen4366 => 'C',
374a24d4bc9Ssthen4367 => 'K',
375a24d4bc9Ssthen4368 => 'T',
376a24d4bc9Ssthen4369 => 'P',
377a24d4bc9Ssthen4370 => 'H',
378a24d4bc9Ssthen4449 => 'A',
379a24d4bc9Ssthen4450 => 'AE',
380a24d4bc9Ssthen4451 => 'YA',
381a24d4bc9Ssthen4452 => 'YAE',
382a24d4bc9Ssthen4453 => 'EO',
383a24d4bc9Ssthen4454 => 'E',
384a24d4bc9Ssthen4455 => 'YEO',
385a24d4bc9Ssthen4456 => 'YE',
386a24d4bc9Ssthen4457 => 'O',
387a24d4bc9Ssthen4458 => 'WA',
388a24d4bc9Ssthen4459 => 'WAE',
389a24d4bc9Ssthen4460 => 'OE',
390a24d4bc9Ssthen4461 => 'YO',
391a24d4bc9Ssthen4462 => 'U',
392a24d4bc9Ssthen4463 => 'WEO',
393a24d4bc9Ssthen4464 => 'WE',
394a24d4bc9Ssthen4465 => 'WI',
395a24d4bc9Ssthen4466 => 'YU',
396a24d4bc9Ssthen4467 => 'EU',
397a24d4bc9Ssthen4468 => 'YI',
398a24d4bc9Ssthen4469 => 'I',
399a24d4bc9Ssthen4520 => 'G',
400a24d4bc9Ssthen4521 => 'GG',
401a24d4bc9Ssthen4522 => 'GS',
402a24d4bc9Ssthen4523 => 'N',
403a24d4bc9Ssthen4524 => 'NJ',
404a24d4bc9Ssthen4525 => 'NH',
405a24d4bc9Ssthen4526 => 'D',
406a24d4bc9Ssthen4527 => 'L',
407a24d4bc9Ssthen4528 => 'LG',
408a24d4bc9Ssthen4529 => 'LM',
409a24d4bc9Ssthen4530 => 'LB',
410a24d4bc9Ssthen4531 => 'LS',
411a24d4bc9Ssthen4532 => 'LT',
412a24d4bc9Ssthen4533 => 'LP',
413a24d4bc9Ssthen4534 => 'LH',
414a24d4bc9Ssthen4535 => 'M',
415a24d4bc9Ssthen4536 => 'B',
416a24d4bc9Ssthen4537 => 'BS',
417a24d4bc9Ssthen4538 => 'S',
418a24d4bc9Ssthen4539 => 'SS',
419a24d4bc9Ssthen4540 => 'NG',
420a24d4bc9Ssthen4541 => 'J',
421a24d4bc9Ssthen4542 => 'C',
422a24d4bc9Ssthen4543 => 'K',
423a24d4bc9Ssthen4544 => 'T',
424a24d4bc9Ssthen4545 => 'P',
425a24d4bc9Ssthen4546 => 'H',
426a24d4bc9Ssthen
427a24d4bc9Ssthen    );
428a24d4bc9Ssthen
429a24d4bc9Ssthen    # Leading consonant (can be null)
430a24d4bc9Ssthen    my %Jamo_L = (
431a24d4bc9Ssthen'' => 11,
432a24d4bc9Ssthen'B' => 7,
433a24d4bc9Ssthen'BB' => 8,
434a24d4bc9Ssthen'C' => 14,
435a24d4bc9Ssthen'D' => 3,
436a24d4bc9Ssthen'DD' => 4,
437a24d4bc9Ssthen'G' => 0,
438a24d4bc9Ssthen'GG' => 1,
439a24d4bc9Ssthen'H' => 18,
440a24d4bc9Ssthen'J' => 12,
441a24d4bc9Ssthen'JJ' => 13,
442a24d4bc9Ssthen'K' => 15,
443a24d4bc9Ssthen'M' => 6,
444a24d4bc9Ssthen'N' => 2,
445a24d4bc9Ssthen'P' => 17,
446a24d4bc9Ssthen'R' => 5,
447a24d4bc9Ssthen'S' => 9,
448a24d4bc9Ssthen'SS' => 10,
449a24d4bc9Ssthen'T' => 16,
450a24d4bc9Ssthen
451a24d4bc9Ssthen    );
452a24d4bc9Ssthen
453a24d4bc9Ssthen    # Vowel
454a24d4bc9Ssthen    my %Jamo_V = (
455a24d4bc9Ssthen'A' => 0,
456a24d4bc9Ssthen'AE' => 1,
457a24d4bc9Ssthen'E' => 5,
458a24d4bc9Ssthen'EO' => 4,
459a24d4bc9Ssthen'EU' => 18,
460a24d4bc9Ssthen'I' => 20,
461a24d4bc9Ssthen'O' => 8,
462a24d4bc9Ssthen'OE' => 11,
463a24d4bc9Ssthen'U' => 13,
464a24d4bc9Ssthen'WA' => 9,
465a24d4bc9Ssthen'WAE' => 10,
466a24d4bc9Ssthen'WE' => 15,
467a24d4bc9Ssthen'WEO' => 14,
468a24d4bc9Ssthen'WI' => 16,
469a24d4bc9Ssthen'YA' => 2,
470a24d4bc9Ssthen'YAE' => 3,
471a24d4bc9Ssthen'YE' => 7,
472a24d4bc9Ssthen'YEO' => 6,
473a24d4bc9Ssthen'YI' => 19,
474a24d4bc9Ssthen'YO' => 12,
475a24d4bc9Ssthen'YU' => 17,
476a24d4bc9Ssthen
477a24d4bc9Ssthen    );
478a24d4bc9Ssthen
479a24d4bc9Ssthen    # Optional trailing consonant
480a24d4bc9Ssthen    my %Jamo_T = (
481a24d4bc9Ssthen'B' => 17,
482a24d4bc9Ssthen'BS' => 18,
483a24d4bc9Ssthen'C' => 23,
484a24d4bc9Ssthen'D' => 7,
485a24d4bc9Ssthen'G' => 1,
486a24d4bc9Ssthen'GG' => 2,
487a24d4bc9Ssthen'GS' => 3,
488a24d4bc9Ssthen'H' => 27,
489a24d4bc9Ssthen'J' => 22,
490a24d4bc9Ssthen'K' => 24,
491a24d4bc9Ssthen'L' => 8,
492a24d4bc9Ssthen'LB' => 11,
493a24d4bc9Ssthen'LG' => 9,
494a24d4bc9Ssthen'LH' => 15,
495a24d4bc9Ssthen'LM' => 10,
496a24d4bc9Ssthen'LP' => 14,
497a24d4bc9Ssthen'LS' => 12,
498a24d4bc9Ssthen'LT' => 13,
499a24d4bc9Ssthen'M' => 16,
500a24d4bc9Ssthen'N' => 4,
501a24d4bc9Ssthen'NG' => 21,
502a24d4bc9Ssthen'NH' => 6,
503a24d4bc9Ssthen'NJ' => 5,
504a24d4bc9Ssthen'P' => 26,
505a24d4bc9Ssthen'S' => 19,
506a24d4bc9Ssthen'SS' => 20,
507a24d4bc9Ssthen'T' => 25,
508a24d4bc9Ssthen
509a24d4bc9Ssthen    );
510a24d4bc9Ssthen
511a24d4bc9Ssthen    # Computed re that splits up a Hangul name into LVT or LV syllables
512a24d4bc9Ssthen    my $syllable_re = qr/(|B|BB|C|D|DD|G|GG|H|J|JJ|K|M|N|P|R|S|SS|T)(A|AE|E|EO|EU|I|O|OE|U|WA|WAE|WE|WEO|WI|YA|YAE|YE|YEO|YI|YO|YU)(B|BS|C|D|G|GG|GS|H|J|K|L|LB|LG|LH|LM|LP|LS|LT|M|N|NG|NH|NJ|P|S|SS|T)?/;
513a24d4bc9Ssthen
514a24d4bc9Ssthen    my $HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
515a24d4bc9Ssthen    my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
516a24d4bc9Ssthen
517a24d4bc9Ssthen    # These constants names and values were taken from the Unicode standard,
518a24d4bc9Ssthen    # version 5.1, section 3.12.  They are used in conjunction with Hangul
519a24d4bc9Ssthen    # syllables
520a24d4bc9Ssthen    my $SBase = 0xAC00;
521a24d4bc9Ssthen    my $LBase = 0x1100;
522a24d4bc9Ssthen    my $VBase = 0x1161;
523a24d4bc9Ssthen    my $TBase = 0x11A7;
524a24d4bc9Ssthen    my $SCount = 11172;
525a24d4bc9Ssthen    my $LCount = 19;
526a24d4bc9Ssthen    my $VCount = 21;
527a24d4bc9Ssthen    my $TCount = 28;
528a24d4bc9Ssthen    my $NCount = $VCount * $TCount;
529a24d4bc9Ssthen
530a24d4bc9Ssthen    sub name_to_code_point_special {
531a24d4bc9Ssthen        my ($name, $loose) = @_;
532a24d4bc9Ssthen
533a24d4bc9Ssthen        # Returns undef if not one of the specially handled names; otherwise
534a24d4bc9Ssthen        # returns the code point equivalent to the input name
535a24d4bc9Ssthen        # $loose is non-zero if to use loose matching, 'name' in that case
536a24d4bc9Ssthen        # must be input as upper case with all blanks and dashes squeezed out.
537a24d4bc9Ssthen
538a24d4bc9Ssthen        if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
539a24d4bc9Ssthen            || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
540a24d4bc9Ssthen        {
541a24d4bc9Ssthen            return if $name !~ qr/^$syllable_re$/;
542a24d4bc9Ssthen            my $L = $Jamo_L{$1};
543a24d4bc9Ssthen            my $V = $Jamo_V{$2};
544a24d4bc9Ssthen            my $T = (defined $3) ? $Jamo_T{$3} : 0;
545a24d4bc9Ssthen            return ($L * $VCount + $V) * $TCount + $T + $SBase;
546a24d4bc9Ssthen        }
547a24d4bc9Ssthen
548a24d4bc9Ssthen        # Name must end in 'code_point' for this to handle.
549a24d4bc9Ssthen        return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
550a24d4bc9Ssthen                   || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
551a24d4bc9Ssthen
552a24d4bc9Ssthen        my $base = $1;
553a24d4bc9Ssthen        my $code_point = CORE::hex $2;
554a24d4bc9Ssthen        my $names_ref;
555a24d4bc9Ssthen
556a24d4bc9Ssthen        if ($loose) {
557a24d4bc9Ssthen            $names_ref = \%loose_names_ending_in_code_point;
558a24d4bc9Ssthen        }
559a24d4bc9Ssthen        else {
560a24d4bc9Ssthen            return if $base !~ s/-$//;
561a24d4bc9Ssthen            $names_ref = \%names_ending_in_code_point;
562a24d4bc9Ssthen        }
563a24d4bc9Ssthen
564a24d4bc9Ssthen        # Name must be one of the ones which has the code point in it.
565a24d4bc9Ssthen        return if ! $names_ref->{$base};
566a24d4bc9Ssthen
567a24d4bc9Ssthen        # Look through the list of ranges that apply to this name to see if
568a24d4bc9Ssthen        # the code point is in one of them.
569a24d4bc9Ssthen        for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
570a24d4bc9Ssthen            return if $names_ref->{$base}{'low'}->[$i] > $code_point;
571a24d4bc9Ssthen            next if $names_ref->{$base}{'high'}->[$i] < $code_point;
572a24d4bc9Ssthen
573a24d4bc9Ssthen            # Here, the code point is in the range.
574a24d4bc9Ssthen            return $code_point;
575a24d4bc9Ssthen        }
576a24d4bc9Ssthen
577a24d4bc9Ssthen        # Here, looked like the name had a code point number in it, but
578a24d4bc9Ssthen        # did not match one of the valid ones.
579a24d4bc9Ssthen        return;
580a24d4bc9Ssthen    }
581a24d4bc9Ssthen
582a24d4bc9Ssthen    sub code_point_to_name_special {
583a24d4bc9Ssthen        my $code_point = shift;
584a24d4bc9Ssthen
585a24d4bc9Ssthen        # Returns the name of a code point if algorithmically determinable;
586a24d4bc9Ssthen        # undef if not
587a24d4bc9Ssthen
588a24d4bc9Ssthen        # If in the Hangul range, calculate the name based on Unicode's
589a24d4bc9Ssthen        # algorithm
590a24d4bc9Ssthen        if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
591a24d4bc9Ssthen            use integer;
592a24d4bc9Ssthen            my $SIndex = $code_point - $SBase;
593a24d4bc9Ssthen            my $L = $LBase + $SIndex / $NCount;
594a24d4bc9Ssthen            my $V = $VBase + ($SIndex % $NCount) / $TCount;
595a24d4bc9Ssthen            my $T = $TBase + $SIndex % $TCount;
596a24d4bc9Ssthen            $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
597a24d4bc9Ssthen            $name .= $Jamo{$T} if $T != $TBase;
598a24d4bc9Ssthen            return $name;
599a24d4bc9Ssthen        }
600a24d4bc9Ssthen
601a24d4bc9Ssthen        # Look through list of these code points for one in range.
602a24d4bc9Ssthen        foreach my $hash (@code_points_ending_in_code_point) {
603a24d4bc9Ssthen            return if $code_point < $hash->{'low'};
604a24d4bc9Ssthen            if ($code_point <= $hash->{'high'}) {
605a24d4bc9Ssthen                return sprintf("%s-%04X", $hash->{'name'}, $code_point);
606a24d4bc9Ssthen            }
607a24d4bc9Ssthen        }
608a24d4bc9Ssthen        return;            # None found
609a24d4bc9Ssthen    }
610a24d4bc9Ssthen} # End closure
611a24d4bc9Ssthen
612a24d4bc9Ssthen1;
613