1 /* libhangul
2  * Copyright (C) 2004 - 2009 Choe Hwanjin
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
22 
23 #include <stdlib.h>
24 
25 #include "hangul.h"
26 
27 /**
28  * @defgroup hangulctype 한글 글자 조작
29  *
30  * @section hangulctype 한글 글자 조작
31  * libhangul은 한글 각 글자를 구분하고 조작하는데 사용할 수 있는 몇가지 함수를
32  * 제공한다.  libhangul의 글자 구분 함수의 인터페이스에서 글자의 기본 단위는
33  * UCS4 코드값이다.
34  */
35 
36 /**
37  * @file hangulctype.c
38  */
39 
40 /**
41  * @ingroup hangulctype
42  * @typedef ucschar
43  * @brief UCS4 코드 단위의 글자 코드 값
44  *
45  * UCS4 코드 값을 저장한다. libhangul에서 사용하는 문자열의 기본단위이다.
46  * preedit 문자열과 commit 문자열 모두 ucschar 포인터 형으로 전달된다.
47  * 이 스트링은  C 스트링과 유사하게 0으로 끝난다.
48  * 유니코드 값이 한글의 어떤 범주에 속하는지 확인하는 함수도 모두 ucschar 형을
49  * 사용한다.
50  */
51 
52 static const ucschar syllable_base  = 0xac00;
53 static const ucschar choseong_base  = 0x1100;
54 static const ucschar jungseong_base = 0x1161;
55 static const ucschar jongseong_base = 0x11a7;
56 static const int njungseong = 21;
57 static const int njongseong = 28;
58 
59 /**
60  * @ingroup hangulctype
61  * @brief 초성인지 확인하는 함수
62  * @param c UCS4 코드 값
63  * @return @a c 가 초성에 해당하면 true를 리턴함, 아니면 false
64  *
65  * @a c 로 주어진 UCS4 코드가 초성인지 확인한다.
66  * Unicode 5.2 지원
67  */
68 bool
hangul_is_choseong(ucschar c)69 hangul_is_choseong(ucschar c)
70 {
71     return (c >= 0x1100 && c <= 0x115f) ||
72 	   (c >= 0xa960 && c <= 0xa97c);
73 ;
74 }
75 
76 /**
77  * @ingroup hangulctype
78  * @brief 중성인지 확인하는 함수
79  * @param c UCS4 코드 값
80  * @return @a c 가 중성에 해당하면 true를 리턴함, 아니면 false
81  *
82  * @a c 로 주어진 UCS4 코드가 중성인지 확인한다.
83  * Unicode 5.2 지원
84  */
85 bool
hangul_is_jungseong(ucschar c)86 hangul_is_jungseong(ucschar c)
87 {
88     return (c >= 0x1160 && c <= 0x11a7) ||
89 	   (c >= 0xd7b0 && c <= 0xd7c6);
90 }
91 
92 /**
93  * @ingroup hangulctype
94  * @brief 종성인지 확인하는 함수
95  * @param c UCS4 코드 값
96  * @return @a c 가 종성에 해당하면 true를 리턴함, 아니면 false
97  *
98  * @a c 로 주어진 UCS4 코드가 종성인지 확인한다.
99  * Unicode 5.2 지원
100  */
101 bool
hangul_is_jongseong(ucschar c)102 hangul_is_jongseong(ucschar c)
103 {
104     return (c >= 0x11a8 && c <= 0x11ff) ||
105 	   (c >= 0xd7cb && c <= 0xd7fb);
106 }
107 
108 bool
hangul_is_combining_mark(ucschar c)109 hangul_is_combining_mark(ucschar c)
110 {
111     return  c == 0x302e || c == 0x302f  ||
112 	   (c >= 0x0300 && c <= 0x036F) ||
113 	   (c >= 0x1dc0 && c <= 0x1dff) ||
114 	   (c >= 0xfe20 && c <= 0xfe2f);
115 }
116 
117 /**
118  * @ingroup hangulctype
119  * @brief 초성이고 조합 가능한지 확인
120  */
121 bool
hangul_is_choseong_conjoinable(ucschar c)122 hangul_is_choseong_conjoinable(ucschar c)
123 {
124     return c >= 0x1100 && c <= 0x1112;
125 }
126 
127 /**
128  * @ingroup hangulctype
129  * @brief 중성이고 조합 가능한지 확인
130  */
131 bool
hangul_is_jungseong_conjoinable(ucschar c)132 hangul_is_jungseong_conjoinable(ucschar c)
133 {
134     return c >= 0x1161 && c <= 0x1175;
135 }
136 
137 /**
138  * @ingroup hangulctype
139  * @brief 종성이고 조합 가능한지 확인
140  */
141 bool
hangul_is_jongseong_conjoinable(ucschar c)142 hangul_is_jongseong_conjoinable(ucschar c)
143 {
144     return c >= 0x11a7 && c <= 0x11c2;
145 }
146 
147 /**
148  * @ingroup hangulctype
149  * @brief 한글 음절 인지 확
150  * @param c UCS4 코드 값
151  * @return @a c가 한글 음절 코드이면 true, 그 외에는 false
152  *
153  * 이 함수는 @a c로 주어진 UCS4 코드가 현대 한글 음절에 해당하는지
154  * 확인한다.
155  */
156 bool
hangul_is_syllable(ucschar c)157 hangul_is_syllable(ucschar c)
158 {
159     return c >= 0xac00 && c <= 0xd7a3;
160 }
161 
162 /**
163  * @ingroup hangulctype
164  * @brief 자모 인지 확인
165  * @param c UCS4 코드 값
166  * @return @a c 가 자모 코드이면 true를 리턴, 그외에는 false
167  *
168  * @a c 로 주어진 UCS4 코드가 자모 코드인지 확인한다.
169  * Unicode 5.2 지원
170  */
171 bool
hangul_is_jamo(ucschar c)172 hangul_is_jamo(ucschar c)
173 {
174     return hangul_is_choseong(c) ||
175 	   hangul_is_jungseong(c) ||
176 	   hangul_is_jongseong(c);
177 }
178 
179 /**
180  * @ingroup hangulctype
181  * @brief 호환 자모인지 확인
182  * @param c UCS4 코드 값
183  * @return @a c가 호환자모이면 true, 그 외에는 false
184  *
185  * 이 함수는 @a c로 주어진 UCS4 코드가 호환 자모인지 확인한다.
186  */
187 bool
hangul_is_cjamo(ucschar c)188 hangul_is_cjamo(ucschar c)
189 {
190     return c >= 0x3131 && c <= 0x318e;
191 }
192 
193 /**
194  * @ingroup hangulctype
195  * @brief 자모 코드를 대응하는 호환 자모로 변환
196  * @param c 변환할 UCS4 코드 값
197  * @return @a c 에 대응되는 호환 자모 값, or c
198  *
199  * 이 함수는 @a c 로 주어진 자모 코드와 같은 형태를 가진 호환 자모 값을
200  * 리턴한다.  자모와 같은 형태를 가진 호환 자모가 없는 경우에는 @a c 의
201  * 값을 그대로 리턴한다.
202  */
203 ucschar
hangul_jamo_to_cjamo(ucschar c)204 hangul_jamo_to_cjamo(ucschar c)
205 {
206     static unsigned short jamo_table[] = {
207 	0x3131,     /* 0x1100 */
208 	0x3132,     /* 0x1101 */
209 	0x3134,     /* 0x1102 */
210 	0x3137,     /* 0x1103 */
211 	0x3138,     /* 0x1104 */
212 	0x3139,     /* 0x1105 */
213 	0x3141,     /* 0x1106 */
214 	0x3142,     /* 0x1107 */
215 	0x3143,     /* 0x1108 */
216 	0x3145,     /* 0x1109 */
217 	0x3146,     /* 0x110a */
218 	0x3147,     /* 0x110b */
219 	0x3148,     /* 0x110c */
220 	0x3149,     /* 0x110d */
221 	0x314a,     /* 0x110e */
222 	0x314b,     /* 0x110f */
223 	0x314c,     /* 0x1110 */
224 	0x314d,     /* 0x1111 */
225 	0x314e,     /* 0x1112 */
226 	0x0000,     /* 0x1113 */
227 	0x3165,     /* 0x1114 */
228 	0x3166,     /* 0x1115 */
229 	0x0000,     /* 0x1116 */
230 	0x0000,     /* 0x1117 */
231 	0x0000,     /* 0x1118 */
232 	0x0000,     /* 0x1119 */
233 	0x3140,     /* 0x111a */
234 	0x0000,     /* 0x111b */
235 	0x316e,     /* 0x111c */
236 	0x3171,     /* 0x111d */
237 	0x3172,     /* 0x111e */
238 	0x0000,     /* 0x111f */
239 	0x3173,     /* 0x1120 */
240 	0x3144,     /* 0x1121 */
241 	0x3174,     /* 0x1122 */
242 	0x3175,     /* 0x1123 */
243 	0x0000,     /* 0x1124 */
244 	0x0000,     /* 0x1125 */
245 	0x0000,     /* 0x1126 */
246 	0x3176,     /* 0x1127 */
247 	0x0000,     /* 0x1128 */
248 	0x3177,     /* 0x1129 */
249 	0x0000,     /* 0x112a */
250 	0x3178,     /* 0x112b */
251 	0x3179,     /* 0x112c */
252 	0x317a,     /* 0x112d */
253 	0x317b,     /* 0x112e */
254 	0x317c,     /* 0x112f */
255 	0x0000,     /* 0x1130 */
256 	0x0000,     /* 0x1131 */
257 	0x317d,     /* 0x1132 */
258 	0x0000,     /* 0x1133 */
259 	0x0000,     /* 0x1134 */
260 	0x0000,     /* 0x1135 */
261 	0x317e,     /* 0x1136 */
262 	0x0000,     /* 0x1137 */
263 	0x0000,     /* 0x1138 */
264 	0x0000,     /* 0x1139 */
265 	0x0000,     /* 0x113a */
266 	0x0000,     /* 0x113b */
267 	0x0000,     /* 0x113c */
268 	0x0000,     /* 0x113d */
269 	0x0000,     /* 0x113e */
270 	0x0000,     /* 0x113f */
271 	0x317f,     /* 0x1140 */
272 	0x0000,     /* 0x1141 */
273 	0x0000,     /* 0x1142 */
274 	0x0000,     /* 0x1143 */
275 	0x0000,     /* 0x1144 */
276 	0x0000,     /* 0x1145 */
277 	0x0000,     /* 0x1146 */
278 	0x3180,     /* 0x1147 */
279 	0x0000,     /* 0x1148 */
280 	0x0000,     /* 0x1149 */
281 	0x0000,     /* 0x114a */
282 	0x0000,     /* 0x114b */
283 	0x3181,     /* 0x114c */
284 	0x0000,     /* 0x114d */
285 	0x0000,     /* 0x114e */
286 	0x0000,     /* 0x114f */
287 	0x0000,     /* 0x1150 */
288 	0x0000,     /* 0x1151 */
289 	0x0000,     /* 0x1152 */
290 	0x0000,     /* 0x1153 */
291 	0x0000,     /* 0x1154 */
292 	0x0000,     /* 0x1155 */
293 	0x0000,     /* 0x1156 */
294 	0x3184,     /* 0x1157 */
295 	0x3185,     /* 0x1158 */
296 	0x3186,     /* 0x1159 */
297 	0x0000,     /* 0x115a */
298 	0x0000,     /* 0x115b */
299 	0x0000,     /* 0x115c */
300 	0x0000,     /* 0x115d */
301 	0x0000,     /* 0x115e */
302 	0x0000,     /* 0x115f */
303 	0x3164,     /* 0x1160 */
304 	0x314f,     /* 0x1161 */
305 	0x3150,     /* 0x1162 */
306 	0x3151,     /* 0x1163 */
307 	0x3152,     /* 0x1164 */
308 	0x3153,     /* 0x1165 */
309 	0x3154,     /* 0x1166 */
310 	0x3155,     /* 0x1167 */
311 	0x3156,     /* 0x1168 */
312 	0x3157,     /* 0x1169 */
313 	0x3158,     /* 0x116a */
314 	0x3159,     /* 0x116b */
315 	0x315a,     /* 0x116c */
316 	0x315b,     /* 0x116d */
317 	0x315c,     /* 0x116e */
318 	0x315d,     /* 0x116f */
319 	0x315e,     /* 0x1170 */
320 	0x315f,     /* 0x1171 */
321 	0x3160,     /* 0x1172 */
322 	0x3161,     /* 0x1173 */
323 	0x3162,     /* 0x1174 */
324 	0x3163,     /* 0x1175 */
325 	0x0000,     /* 0x1176 */
326 	0x0000,     /* 0x1177 */
327 	0x0000,     /* 0x1178 */
328 	0x0000,     /* 0x1179 */
329 	0x0000,     /* 0x117a */
330 	0x0000,     /* 0x117b */
331 	0x0000,     /* 0x117c */
332 	0x0000,     /* 0x117d */
333 	0x0000,     /* 0x117e */
334 	0x0000,     /* 0x117f */
335 	0x0000,     /* 0x1180 */
336 	0x0000,     /* 0x1181 */
337 	0x0000,     /* 0x1182 */
338 	0x0000,     /* 0x1183 */
339 	0x3187,     /* 0x1184 */
340 	0x3188,     /* 0x1185 */
341 	0x0000,     /* 0x1186 */
342 	0x0000,     /* 0x1187 */
343 	0x3189,     /* 0x1188 */
344 	0x0000,     /* 0x1189 */
345 	0x0000,     /* 0x118a */
346 	0x0000,     /* 0x118b */
347 	0x0000,     /* 0x118c */
348 	0x0000,     /* 0x118d */
349 	0x0000,     /* 0x118e */
350 	0x0000,     /* 0x118f */
351 	0x0000,     /* 0x1190 */
352 	0x318a,     /* 0x1191 */
353 	0x318b,     /* 0x1192 */
354 	0x0000,     /* 0x1193 */
355 	0x318c,     /* 0x1194 */
356 	0x0000,     /* 0x1195 */
357 	0x0000,     /* 0x1196 */
358 	0x0000,     /* 0x1197 */
359 	0x0000,     /* 0x1198 */
360 	0x0000,     /* 0x1199 */
361 	0x0000,     /* 0x119a */
362 	0x0000,     /* 0x119b */
363 	0x0000,     /* 0x119c */
364 	0x0000,     /* 0x119d */
365 	0x318d,     /* 0x119e */
366 	0x0000,     /* 0x119f */
367 	0x0000,     /* 0x11a0 */
368 	0x318e,     /* 0x11a1 */
369 	0x0000,     /* 0x11a2 */
370 	0x0000,     /* 0x11a3 */
371 	0x0000,     /* 0x11a4 */
372 	0x0000,     /* 0x11a5 */
373 	0x0000,     /* 0x11a6 */
374 	0x0000,     /* 0x11a7 */
375 	0x3131,	    /* 0x11a8 */
376 	0x3132,	    /* 0x11a9 */
377 	0x3133,	    /* 0x11aa */
378 	0x3134,	    /* 0x11ab */
379 	0x3135,	    /* 0x11ac */
380 	0x3136,	    /* 0x11ad */
381 	0x3137,	    /* 0x11ae */
382 	0x3139,	    /* 0x11af */
383 	0x313a,	    /* 0x11b0 */
384 	0x313b,	    /* 0x11b1 */
385 	0x313c,	    /* 0x11b2 */
386 	0x313d,	    /* 0x11b3 */
387 	0x313e,	    /* 0x11b4 */
388 	0x313f,	    /* 0x11b5 */
389 	0x3140,	    /* 0x11b6 */
390 	0x3141,	    /* 0x11b7 */
391 	0x3142,	    /* 0x11b8 */
392 	0x3144,	    /* 0x11b9 */
393 	0x3145,	    /* 0x11ba */
394 	0x3146,	    /* 0x11bb */
395 	0x3147,	    /* 0x11bc */
396 	0x3148,	    /* 0x11bd */
397 	0x314a,	    /* 0x11be */
398 	0x314b,	    /* 0x11bf */
399 	0x314c,	    /* 0x11c0 */
400 	0x314d,	    /* 0x11c1 */
401 	0x314e,	    /* 0x11c2 */
402 	0x0000,     /* 0x11c3 */
403 	0x0000,     /* 0x11c4 */
404 	0x0000,     /* 0x11c5 */
405 	0x0000,     /* 0x11c6 */
406 	0x3167,     /* 0x11c7 */
407 	0x3168,     /* 0x11c8 */
408 	0x0000,     /* 0x11c9 */
409 	0x0000,     /* 0x11ca */
410 	0x0000,     /* 0x11cb */
411 	0x3169,     /* 0x11cc */
412 	0x0000,     /* 0x11cd */
413 	0x316a,     /* 0x11ce */
414 	0x0000,     /* 0x11cf */
415 	0x0000,     /* 0x11d0 */
416 	0x0000,     /* 0x11d1 */
417 	0x0000,     /* 0x11d2 */
418 	0x316b,     /* 0x11d3 */
419 	0x0000,     /* 0x11d4 */
420 	0x0000,     /* 0x11d5 */
421 	0x0000,     /* 0x11d6 */
422 	0x316c,     /* 0x11d7 */
423 	0x0000,     /* 0x11d8 */
424 	0x316d,     /* 0x11d9 */
425 	0x0000,     /* 0x11da */
426 	0x0000,     /* 0x11db */
427 	0x0000,     /* 0x11dc */
428 	0x316f,     /* 0x11dd */
429 	0x0000,     /* 0x11de */
430 	0x3170,     /* 0x11df */
431 	0x0000,     /* 0x11e0 */
432 	0x0000,     /* 0x11e1 */
433 	0x0000,     /* 0x11e2 */
434 	0x0000,     /* 0x11e3 */
435 	0x0000,     /* 0x11e4 */
436 	0x0000,     /* 0x11e5 */
437 	0x0000,     /* 0x11e6 */
438 	0x0000,     /* 0x11e7 */
439 	0x0000,     /* 0x11e8 */
440 	0x0000,     /* 0x11e9 */
441 	0x0000,     /* 0x11ea */
442 	0x0000,     /* 0x11eb */
443 	0x0000,     /* 0x11ec */
444 	0x0000,     /* 0x11ed */
445 	0x0000,     /* 0x11ee */
446 	0x0000,     /* 0x11ef */
447 	0x0000,     /* 0x11f0 */
448 	0x3182,     /* 0x11f1 */
449 	0x3183,     /* 0x11f2 */
450 	0x0000,     /* 0x11f3 */
451 	0x0000,     /* 0x11f4 */
452 	0x0000,     /* 0x11f5 */
453 	0x0000,     /* 0x11f6 */
454 	0x0000,     /* 0x11f7 */
455 	0x0000,     /* 0x11f8 */
456 	0x0000,     /* 0x11f9 */
457 	0x0000,     /* 0x11fa */
458 	0x0000,     /* 0x11fb */
459 	0x0000,     /* 0x11fc */
460 	0x0000,     /* 0x11fd */
461 	0x0000,     /* 0x11fe */
462 	0x0000,     /* 0x11ff */
463     };
464 
465     static unsigned short jamo_ext_A_table[] = {
466 	0x0000,     /* 0xa960 */
467 	0x0000,     /* 0xa961 */
468 	0x0000,     /* 0xa962 */
469 	0x0000,     /* 0xa963 */
470 	0x313a,     /* 0xa964 */
471 	0x0000,     /* 0xa965 */
472 	0x316a,     /* 0xa966 */
473 	0x0000,     /* 0xa967 */
474 	0x313b,     /* 0xa968 */
475 	0x313c,     /* 0xa969 */
476 	0x0000,     /* 0xa96a */
477 	0x0000,     /* 0xa96b */
478 	0x313d,     /* 0xa96c */
479 	0x0000,     /* 0xa96d */
480 	0x0000,     /* 0xa96e */
481 	0x0000,     /* 0xa96f */
482 	0x0000,     /* 0xa970 */
483 	0x316f,     /* 0xa971 */
484 	0x0000,     /* 0xa972 */
485 	0x0000,     /* 0xa973 */
486 	0x0000,     /* 0xa974 */
487 	0x0000,     /* 0xa975 */
488 	0x0000,     /* 0xa976 */
489 	0x0000,     /* 0xa977 */
490 	0x0000,     /* 0xa978 */
491 	0x0000,     /* 0xa979 */
492 	0x0000,     /* 0xa97a */
493 	0x0000,     /* 0xa97b */
494 	0x0000,     /* 0xa97c */
495     };
496 
497     static unsigned short jamo_ext_B_table[] = {
498 	0x0000,     /* 0xd7b0 */
499 	0x0000,     /* 0xd7b1 */
500 	0x0000,     /* 0xd7b2 */
501 	0x0000,     /* 0xd7b3 */
502 	0x0000,     /* 0xd7b4 */
503 	0x0000,     /* 0xd7b5 */
504 	0x0000,     /* 0xd7b6 */
505 	0x0000,     /* 0xd7b7 */
506 	0x0000,     /* 0xd7b8 */
507 	0x0000,     /* 0xd7b9 */
508 	0x0000,     /* 0xd7ba */
509 	0x0000,     /* 0xd7bb */
510 	0x0000,     /* 0xd7bc */
511 	0x0000,     /* 0xd7bd */
512 	0x0000,     /* 0xd7be */
513 	0x0000,     /* 0xd7bf */
514 	0x0000,     /* 0xd7c0 */
515 	0x0000,     /* 0xd7c1 */
516 	0x0000,     /* 0xd7c2 */
517 	0x0000,     /* 0xd7c3 */
518 	0x0000,     /* 0xd7c4 */
519 	0x0000,     /* 0xd7c5 */
520 	0x0000,     /* 0xd7c6 */
521 	0x0000,     /* 0xd7c7 */
522 	0x0000,     /* 0xd7c8 */
523 	0x0000,     /* 0xd7c9 */
524 	0x0000,     /* 0xd7ca */
525 	0x0000,     /* 0xd7cb */
526 	0x0000,     /* 0xd7cc */
527 	0x3138,     /* 0xd7cd */
528 	0x0000,     /* 0xd7ce */
529 	0x0000,     /* 0xd7cf */
530 	0x0000,     /* 0xd7d0 */
531 	0x0000,     /* 0xd7d1 */
532 	0x0000,     /* 0xd7d2 */
533 	0x0000,     /* 0xd7d3 */
534 	0x0000,     /* 0xd7d4 */
535 	0x0000,     /* 0xd7d5 */
536 	0x0000,     /* 0xd7d6 */
537 	0x0000,     /* 0xd7d7 */
538 	0x0000,     /* 0xd7d8 */
539 	0x0000,     /* 0xd7d9 */
540 	0x0000,     /* 0xd7da */
541 	0x0000,     /* 0xd7db */
542 	0x0000,     /* 0xd7dc */
543 	0x0000,     /* 0xd7dd */
544 	0x0000,     /* 0xd7de */
545 	0x0000,     /* 0xd7df */
546 	0x0000,     /* 0xd7e0 */
547 	0x0000,     /* 0xd7e1 */
548 	0x0000,     /* 0xd7e2 */
549 	0x3173,     /* 0xd7e3 */
550 	0x0000,     /* 0xd7e4 */
551 	0x0000,     /* 0xd7e5 */
552 	0x3143,     /* 0xd7e6 */
553 	0x3175,     /* 0xd7e7 */
554 	0x3176,     /* 0xd7e8 */
555 	0x0000,     /* 0xd7e9 */
556 	0x0000,     /* 0xd7ea */
557 	0x0000,     /* 0xd7eb */
558 	0x0000,     /* 0xd7ec */
559 	0x0000,     /* 0xd7ed */
560 	0x0000,     /* 0xd7ee */
561 	0x317e,     /* 0xd7ef */
562 	0x0000,     /* 0xd7f0 */
563 	0x0000,     /* 0xd7f1 */
564 	0x0000,     /* 0xd7f2 */
565 	0x0000,     /* 0xd7f3 */
566 	0x0000,     /* 0xd7f4 */
567 	0x0000,     /* 0xd7f5 */
568 	0x0000,     /* 0xd7f6 */
569 	0x0000,     /* 0xd7f7 */
570 	0x0000,     /* 0xd7f8 */
571 	0x3149,     /* 0xd7f9 */
572 	0x0000,     /* 0xd7fa */
573 	0x0000,     /* 0xd7fb */
574     };
575 
576     ucschar ret = 0;
577 
578     if (c >= 0x1100 && c <= 0x11ff) {
579 	ret = jamo_table[c - 0x1100];
580     } else if (c >= 0xa960 && c <= 0xa97c) {
581 	ret = jamo_ext_A_table[c - 0xa960];
582     } else if (c >= 0xd7b0 && c <= 0xd7fb) {
583 	ret = jamo_ext_B_table[c - 0xd7b0];
584     }
585 
586     if (ret == 0)
587 	ret = c;
588 
589     return ret;
590 }
591 
592 ucschar
hangul_choseong_to_jongseong(ucschar c)593 hangul_choseong_to_jongseong(ucschar c)
594 {
595     static const ucschar table[] = {
596 	0x11a8,  /* cho kiyeok               -> jong kiyeok               */
597 	0x11a9,  /* cho ssangkiyeok          -> jong ssangkiyeok          */
598 	0x11ab,  /* cho nieun                -> jong nieun                */
599 	0x11ae,  /* cho tikeut               -> jong tikeut               */
600 	0xd7cd,  /* cho ssangtikeut          -> jong ssangtikeut          */
601 	0x11af,  /* cho rieul                -> jong rieul                */
602 	0x11b7,  /* cho mieum                -> jong mieum                */
603 	0x11b8,  /* cho pieup                -> jong pieup                */
604 	0xd7e6,  /* cho ssangpieup           -> jong ssangpieup           */
605 	0x11ba,  /* cho sios                 -> jong sios                 */
606 	0x11bb,  /* cho ssangsios            -> jong ssangsios            */
607 	0x11bc,  /* cho ieung                -> jong ieung                */
608 	0x11bd,  /* cho cieuc                -> jong cieuc                */
609 	0xd7f9,  /* cho ssangcieuc           -> jong ssangcieuc           */
610 	0x11be,  /* cho chieuch              -> jong chieuch              */
611 	0x11bf,  /* cho khieukh              -> jong khieukh              */
612 	0x11c0,  /* cho thieuth              -> jong thieuth              */
613 	0x11c1,  /* cho phieuph              -> jong phieuph              */
614 	0x11c2,  /* cho hieuh                -> jong hieuh                */
615 	0x11c5,  /* cho nieun-kiyeok         -> jong nieun-kiyeok         */
616 	0x11ff,  /* cho ssangnieun           -> jong ssangnieun           */
617 	0x11c6,  /* cho nieun-tikeut         -> jong nieun-tikeut         */
618 	0,       /* cho nieun-pieup                                      */
619 	0x11ca,  /* cho tikeut-kiyeok        -> jong tikeut-kiyeok        */
620 	0x11cd,  /* cho rieul-nieun          -> jong rieul-nieun          */
621 	0x11d0,  /* cho ssangrieul           -> jong ssangrieul           */
622 	0x11b6,  /* cho rieul-hieuh          -> jong rieul-hieuh          */
623 	0xd7dd,  /* cho kapyeounrieul        -> jong kapyeounrieul        */
624 	0x11dc,  /* cho mieum-pieup          -> jong mieum-pieup          */
625 	0x11e2,  /* cho kapyeounmieum        -> jong kapyeounmieum        */
626 	0,       /* cho pieup-kiyeok                                     */
627 	0,       /* cho pieup-nieun                                      */
628 	0xd7e3,  /* cho pieup-tikeut         -> jong pieup-tikeut         */
629 	0x11b9,  /* cho pieup-sios           -> jong pieup-sios           */
630 	0,       /* cho pieup-sios-kiyeok                                */
631 	0xd7e7,  /* cho pieup-sios-tikeut    -> jong pieup-sios-tikeut    */
632 	0,       /* cho pieup-sios-pieup                                 */
633 	0,       /* cho pieup-ssangsios                                  */
634 	0,       /* cho pieup-sios-cieuc                                 */
635 	0xd7e8,  /* cho pieup-cieuc          -> jong pieup-cieuc          */
636 	0xd7e9,  /* cho pieup-chieuch        -> jong pieup-chieuch        */
637 	0,       /* cho pieup-thieuth                                    */
638 	0x11e4,  /* cho pieup-phieuph        -> jong pieup-phieuph        */
639 	0x11e6,  /* cho kapyeounpieup        -> jong kapyeounpieup        */
640 	0,       /* cho kapyeounssangpieup                               */
641 	0x11e7,  /* cho sios-kiyeok          -> jong sios-kiyeok          */
642 	0,       /* cho sios-nieun                                       */
643 	0x11e8,  /* cho sios-tikeut          -> jong sios-tikeut          */
644 	0x11e9,  /* cho sios-rieul           -> jong sios-rieul           */
645 	0xd7ea,  /* cho sios-mieum           -> jong sios-mieum           */
646 	0x11ea,  /* cho sios-pieup           -> jong sios-pieup           */
647 	0,       /* cho sios-pieup-kiyeok                                */
648 	0,       /* cho sios-ssangsios                                   */
649 	0,       /* cho sios-ieung                                       */
650 	0xd7ef,  /* cho sios-cieuc           -> jong sios-cieuc           */
651 	0xd7f0,  /* cho sios-chieuch         -> jong sios-chieuch         */
652 	0,       /* cho sios-khieukh                                     */
653 	0xd7f1,  /* cho sios-thieuth         -> jong sios-thieuth         */
654 	0,       /* cho sios-phieuph                                     */
655 	0xd7f2,  /* cho sios-hieuh           -> jong sios-hieuh           */
656 	0,       /* cho chitueumsios                                     */
657 	0,       /* cho chitueumssangsios                                */
658 	0,       /* cho ceongchieumsios                                  */
659 	0,       /* cho ceongchieumssangsios                             */
660 	0x11eb,  /* cho pansios              -> jong pansios              */
661 	0x11ec,  /* cho ieung-kiyeok         -> jong ieung-kiyeok         */
662 	0,       /* cho ieung-tikeut                                     */
663 	0,       /* cho ieung-mieum                                      */
664 	0,       /* cho ieung-pieup                                      */
665 	0,       /* cho ieung-sios                                       */
666 	0,       /* cho ieung-pansios                                    */
667 	0x11ee,  /* cho ssangieung           -> jong ssangieung           */
668 	0,       /* cho ieung-cieuc                                      */
669 	0,       /* cho ieung-chieuch                                    */
670 	0,       /* cho ieung-thieuth                                    */
671 	0,       /* cho ieung-phieuph                                    */
672 	0x11f0,  /* cho yesieung             -> jong yesieung             */
673 	0,       /* cho cieuc-ieung                                      */
674 	0,       /* cho chitueumcieuc                                    */
675 	0,       /* cho chitueumssangcieuc                               */
676 	0,       /* cho ceongchieumcieuc                                 */
677 	0,       /* cho ceongchieumssangcieuc                            */
678 	0,       /* cho chieuch-khieukh                                  */
679 	0,       /* cho chieuch-hieuh                                    */
680 	0,       /* cho chitueumchieuch                                  */
681 	0,       /* cho ceongchieumchieuch                               */
682 	0x11f3,  /* cho phieuph-pieup        -> jong phieuph-pieup        */
683 	0x11f4,  /* cho kapyeounphieuph      -> jong kapyeounphieuph      */
684 	0,       /* cho ssanghieuh                                       */
685 	0x11f9,  /* cho yeorinhieuh          -> jong yeorinhieuh          */
686 	0,       /* cho kiyeok-tikeut                                    */
687 	0x11c7,  /* cho nieun-sios           -> jong nieun-sios           */
688 	0x11ac,  /* cho nieun-cieuc          -> jong nieun-cieuc          */
689 	0x11ad,  /* cho nieun-hieuh          -> jong nieun-hieuh          */
690 	0x11cb,  /* cho tikeut-rieul         -> jong tikeut-rieul         */
691 	0,       /* cho filler                                           */
692     };
693 
694     static const ucschar table_ext_a[] = {
695 	0,       /* cho tikeut-mieum                                     */
696 	0xd7cf,  /* cho tikeut-pieup         -> jong tikeut-pieup         */
697 	0xd7d0,  /* cho tikeut-sios          -> jong tikeut-sios          */
698 	0xd7d2,  /* cho tikeut-cieuc         -> jong tikeut-cieuc         */
699 	0x11b0,  /* cho rieul-kiyeok         -> jong rieul-kiyeok         */
700 	0xd7d5,  /* cho rieul-ssangkiyeok    -> jong rieul-ssangkiyeok    */
701 	0x11ce,  /* cho rieul-tikeut         -> jong rieul-tikeut         */
702 	0,       /* cho rieul-ssangtikeut                                */
703 	0x11b1,  /* cho rieul-mieum          -> jong rieul-mieum          */
704 	0x11b2,  /* cho rieul-pieup          -> jong rieul-pieup          */
705 	0,       /* cho rieul-ssangpieup                                 */
706 	0x11d5,  /* cho rieul-kapyeounpieup  -> jong rieul-kapyeounpieup  */
707 	0x11b3,  /* cho rieul-sios           -> jong rieul-sios           */
708 	0,       /* cho rieul-cieuc                                      */
709 	0x11d8,  /* cho rieul-khieukh        -> jong rieul-khieukh        */
710 	0x11da,  /* cho mieum-kiyeok         -> jong mieum-kiyeok         */
711 	0,       /* cho mieum-tikeut                                     */
712 	0x11dd,  /* cho mieum-sios           -> jong mieum-sios           */
713 	0,       /* cho pieup-sios-thieuth                               */
714 	0,       /* cho pieup-khieukh                                    */
715 	0x11e5,  /* cho pieup-hieuh          -> jong pieup-hieuh          */
716 	0,       /* cho ssangsios-pieup                                  */
717 	0,       /* cho ieung-rieul                                      */
718 	0,       /* cho ieung-hieuh                                      */
719 	0,       /* cho ssangcieuc-hieuh                                 */
720 	0,       /* cho ssangthieuth                                     */
721 	0,       /* cho phieuph-hieuh                                    */
722 	0,       /* cho hieuh-sios                                       */
723 	0,       /* cho ssangyeorinhieuh                                 */
724     };
725 
726     if (c >= 0x1100 && c <= 0x115e)
727 	return table[c - 0x1100];
728     else if (c >= 0xa960 && c <= 0xa97c)
729 	return table_ext_a[c - 0xa960];
730 
731     return 0;
732 }
733 
734 ucschar
hangul_jongseong_to_choseong(ucschar c)735 hangul_jongseong_to_choseong(ucschar c)
736 {
737     static const ucschar table[] = {
738 	0x1100,  /* jong kiyeok               -> cho kiyeok               */
739 	0x1101,  /* jong ssangkiyeok          -> cho ssangkiyeok          */
740 	0,       /* jong kiyeok-sios                                      */
741 	0x1102,  /* jong nieun                -> cho nieun                */
742 	0x115c,  /* jong nieun-cieuc          -> cho nieun-cieuc          */
743 	0x115d,  /* jong nieun-hieuh          -> cho nieun-hieuh          */
744 	0x1103,  /* jong tikeut               -> cho tikeut               */
745 	0x1105,  /* jong rieul                -> cho rieul                */
746 	0xa964,  /* jong rieul-kiyeok         -> cho rieul-kiyeok         */
747 	0xa968,  /* jong rieul-mieum          -> cho rieul-mieum          */
748 	0xa969,  /* jong rieul-pieup          -> cho rieul-pieup          */
749 	0xa96c,  /* jong rieul-sios           -> cho rieul-sios           */
750 	0,       /* jong rieul-thieuth                                    */
751 	0,       /* jong rieul-phieuph                                    */
752 	0x111a,  /* jong rieul-hieuh          -> cho rieul-hieuh          */
753 	0x1106,  /* jong mieum                -> cho mieum                */
754 	0x1107,  /* jong pieup                -> cho pieup                */
755 	0x1121,  /* jong pieup-sios           -> cho pieup-sios           */
756 	0x1109,  /* jong sios                 -> cho sios                 */
757 	0x110a,  /* jong ssangsios            -> cho ssangsios            */
758 	0x110b,  /* jong ieung                -> cho ieung                */
759 	0x110c,  /* jong cieuc                -> cho cieuc                */
760 	0x110e,  /* jong chieuch              -> cho chieuch              */
761 	0x110f,  /* jong khieukh              -> cho khieukh              */
762 	0x1110,  /* jong thieuth              -> cho thieuth              */
763 	0x1111,  /* jong phieuph              -> cho phieuph              */
764 	0x1112,  /* jong hieuh                -> cho hieuh                */
765 	0,       /* jong kiyeok-rieul                                     */
766 	0,       /* jong kiyeok-sios-kiyeok                               */
767 	0x1113,  /* jong nieun-kiyeok         -> cho nieun-kiyeok         */
768 	0x1115,  /* jong nieun-tikeut         -> cho nieun-tikeut         */
769 	0x115b,  /* jong nieun-sios           -> cho nieun-sios           */
770 	0,       /* jong nieun-pansios                                    */
771 	0,       /* jong nieun-thieuth                                    */
772 	0x1117,  /* jong tikeut-kiyeok        -> cho tikeut-kiyeok        */
773 	0x115e,  /* jong tikeut-rieul         -> cho tikeut-rieul         */
774 	0,       /* jong rieul-kiyeok-sios                                */
775 	0x1118,  /* jong rieul-nieun          -> cho rieul-nieun          */
776 	0xa966,  /* jong rieul-tikeut         -> cho rieul-tikeut         */
777 	0,       /* jong rieul-tikeut-hieuh                               */
778 	0x1119,  /* jong ssangrieul           -> cho ssangrieul           */
779 	0,       /* jong rieul-mieum-kiyeok                               */
780 	0,       /* jong rieul-mieum-sios                                 */
781 	0,       /* jong rieul-pieup-sios                                 */
782 	0,       /* jong rieul-pieup-hieuh                                */
783 	0xa96b,  /* jong rieul-kapyeounpieup  -> cho rieul-kapyeounpieup  */
784 	0,       /* jong rieul-ssangsios                                  */
785 	0,       /* jong rieul-pansios                                    */
786 	0xa96e,  /* jong rieul-khieukh        -> cho rieul-khieukh        */
787 	0,       /* jong rieul-yeorinhieuh                                */
788 	0xa96f,  /* jong mieum-kiyeok         -> cho mieum-kiyeok         */
789 	0,       /* jong mieum-rieul                                      */
790 	0x111c,  /* jong mieum-pieup          -> cho mieum-pieup          */
791 	0xa971,  /* jong mieum-sios           -> cho mieum-sios           */
792 	0,       /* jong mieum-ssangsios                                  */
793 	0,       /* jong mieum-pansios                                    */
794 	0,       /* jong mieum-chieuch                                    */
795 	0,       /* jong mieum-hieuh                                      */
796 	0x111d,  /* jong kapyeounmieum        -> cho kapyeounmieum        */
797 	0,       /* jong pieup-rieul                                      */
798 	0x112a,  /* jong pieup-phieuph        -> cho pieup-phieuph        */
799 	0xa974,  /* jong pieup-hieuh          -> cho pieup-hieuh          */
800 	0x112b,  /* jong kapyeounpieup        -> cho kapyeounpieup        */
801 	0x112d,  /* jong sios-kiyeok          -> cho sios-kiyeok          */
802 	0x112f,  /* jong sios-tikeut          -> cho sios-tikeut          */
803 	0x1130,  /* jong sios-rieul           -> cho sios-rieul           */
804 	0x1132,  /* jong sios-pieup           -> cho sios-pieup           */
805 	0x1140,  /* jong pansios              -> cho pansios              */
806 	0x1141,  /* jong ieung-kiyeok         -> cho ieung-kiyeok         */
807 	0,       /* jong ieung-ssangkiyeok                                */
808 	0x1147,  /* jong ssangieung           -> cho ssangieung           */
809 	0,       /* jong ieung-khieukh                                    */
810 	0x114c,  /* jong yesieung             -> cho yesieung             */
811 	0,       /* jong yesieung-sios                                    */
812 	0,       /* jong yesieung-pansios                                 */
813 	0x1156,  /* jong phieuph-pieup        -> cho phieuph-pieup        */
814 	0x1157,  /* jong kapyeounphieuph      -> cho kapyeounphieuph      */
815 	0,       /* jong hieuh-nieun                                      */
816 	0,       /* jong hieuh-rieul                                      */
817 	0,       /* jong hieuh-mieum                                      */
818 	0,       /* jong hieuh-pieup                                      */
819 	0x1159,  /* jong yeorinhieuh          -> cho yeorinhieuh          */
820 	0,       /* jong kiyeok-nieun                                     */
821 	0,       /* jong kiyeok-pieup                                     */
822 	0,       /* jong kiyeok-chieuch                                   */
823 	0,       /* jong kiyeok-khieukh                                   */
824 	0,       /* jong kiyeok-hieuh                                     */
825 	0x1114,  /* jong ssangnieun           -> cho ssangnieun           */
826     };
827 
828     static const ucschar table_ext_b[] = {
829 	0,       /* jong nieun-rieul                                      */
830 	0,       /* jong nieun-chieuch                                    */
831 	0x1104,  /* jong ssangtikeut          -> cho ssangtikeut          */
832 	0,       /* jong ssangtikeut-pieup                                */
833 	0xa961,  /* jong tikeut-pieup         -> cho tikeut-pieup         */
834 	0xa962,  /* jong tikeut-sios          -> cho tikeut-sios          */
835 	0,       /* jong tikeut-sios-kiyeok                               */
836 	0xa963,  /* jong tikeut-cieuc         -> cho tikeut-cieuc         */
837 	0,       /* jong tikeut-chieuch                                   */
838 	0,       /* jong tikeut-thieuth                                   */
839 	0xa965,  /* jong rieul-ssangkiyeok    -> cho rieul-ssangkiyeok    */
840 	0,       /* jong rieul-kiyeok-hieuh                               */
841 	0,       /* jong ssangrieul-khieukh                               */
842 	0,       /* jong rieul-mieum-hieuh                                */
843 	0,       /* jong rieul-pieup-tikeut                               */
844 	0,       /* jong rieul-pieup-phieuph                              */
845 	0,       /* jong rieul-yesieung                                   */
846 	0,       /* jong rieul-yeorinhieuh-hieuh                          */
847 	0x111b,  /* jong kapyeounrieul        -> cho kapyeounrieul        */
848 	0,       /* jong mieum-nieun                                      */
849 	0,       /* jong mieum-ssangnieun                                 */
850 	0,       /* jong ssangmieum                                       */
851 	0,       /* jong mieum-pieup-sios                                 */
852 	0,       /* jong mieum-cieuc                                      */
853 	0x1120,  /* jong pieup-tikeut         -> cho pieup-tikeut         */
854 	0,       /* jong pieup-rieul-phieuph                              */
855 	0,       /* jong pieup-mieum                                      */
856 	0x1108,  /* jong ssangpieup           -> cho ssangpieup           */
857 	0x1123,  /* jong pieup-sios-tikeut    -> cho pieup-sios-tikeut    */
858 	0x1127,  /* jong pieup-cieuc          -> cho pieup-cieuc          */
859 	0x1128,  /* jong pieup-chieuch        -> cho pieup-chieuch        */
860 	0x1131,  /* jong sios-mieum           -> cho sios-mieum           */
861 	0,       /* jong sios-kapyeounpieup                               */
862 	0,       /* jong ssangsios-kiyeok                                 */
863 	0,       /* jong ssangsios-tikeut                                 */
864 	0,       /* jong sios-pansios                                     */
865 	0x1136,  /* jong sios-cieuc           -> cho sios-cieuc           */
866 	0x1137,  /* jong sios-chieuch         -> cho sios-chieuch         */
867 	0x1139,  /* jong sios-thieuth         -> cho sios-thieuth         */
868 	0x113b,  /* jong sios-hieuh           -> cho sios-hieuh           */
869 	0,       /* jong pansios-pieup                                    */
870 	0,       /* jong pansios-kapyeounpieup                            */
871 	0,       /* jong yesieung-mieum                                   */
872 	0,       /* jong yesieung-hieuh                                   */
873 	0,       /* jong cieuc-pieup                                      */
874 	0,       /* jong cieuc-ssangpieup                                 */
875 	0x110d,  /* jong ssangcieuc           -> cho ssangcieuc           */
876 	0,       /* jong phieuph-sios                                     */
877 	0,       /* jong phieuph-thieuth                                  */
878     };
879 
880     if (c >= 0x11a8 && c <= 0x11ff)
881 	return table[c - 0x11a8];
882     else if (c >= 0xd7cb && c <= 0xd7fb)
883 	return table_ext_b[c - 0xd7cb];
884 
885     return 0;
886 }
887 
888 void
hangul_jongseong_dicompose(ucschar c,ucschar * jong,ucschar * cho)889 hangul_jongseong_dicompose(ucschar c, ucschar* jong, ucschar* cho)
890 {
891     static ucschar table[][2] = {
892     { 0,      0x1100 }, /* jong kiyeok	      = cho  kiyeok               */
893     { 0x11a8, 0x1100 }, /* jong ssangkiyeok   = jong kiyeok + cho kiyeok  */
894     { 0x11a8, 0x1109 }, /* jong kiyeok-sios   = jong kiyeok + cho sios    */
895     { 0,      0x1102 }, /* jong nieun	      = cho  nieun                */
896     { 0x11ab, 0x110c }, /* jong nieun-cieuc   = jong nieun  + cho cieuc   */
897     { 0x11ab, 0x1112 }, /* jong nieun-hieuh   = jong nieun  + cho hieuh   */
898     { 0,      0x1103 }, /* jong tikeut	      = cho  tikeut               */
899     { 0,      0x1105 }, /* jong rieul         = cho  rieul                */
900     { 0x11af, 0x1100 }, /* jong rieul-kiyeok  = jong rieul  + cho kiyeok  */
901     { 0x11af, 0x1106 }, /* jong rieul-mieum   = jong rieul  + cho mieum   */
902     { 0x11af, 0x1107 }, /* jong rieul-pieup   = jong rieul  + cho pieup   */
903     { 0x11af, 0x1109 }, /* jong rieul-sios    = jong rieul  + cho sios    */
904     { 0x11af, 0x1110 }, /* jong rieul-thieuth = jong rieul  + cho thieuth */
905     { 0x11af, 0x1111 }, /* jong rieul-phieuph = jong rieul  + cho phieuph */
906     { 0x11af, 0x1112 }, /* jong rieul-hieuh   = jong rieul  + cho hieuh   */
907     { 0,      0x1106 }, /* jong mieum         = cho  mieum                */
908     { 0,      0x1107 }, /* jong pieup         = cho  pieup                */
909     { 0x11b8, 0x1109 }, /* jong pieup-sios    = jong pieup  + cho sios    */
910     { 0,      0x1109 }, /* jong sios          = cho  sios                 */
911     { 0x11ba, 0x1109 }, /* jong ssangsios     = jong sios   + cho sios    */
912     { 0,      0x110b }, /* jong ieung         = cho  ieung                */
913     { 0,      0x110c }, /* jong cieuc         = cho  cieuc                */
914     { 0,      0x110e }, /* jong chieuch       = cho  chieuch              */
915     { 0,      0x110f }, /* jong khieukh       = cho  khieukh              */
916     { 0,      0x1110 }, /* jong thieuth       = cho  thieuth              */
917     { 0,      0x1111 }, /* jong phieuph       = cho  phieuph              */
918     { 0,      0x1112 }  /* jong hieuh         = cho  hieuh                */
919     };
920 
921     *jong = table[c - 0x11a8][0];
922     *cho  = table[c - 0x11a8][1];
923 }
924 
925 static int
hangul_jongseong_get_ncomponent(ucschar jong)926 hangul_jongseong_get_ncomponent(ucschar jong)
927 {
928     static const char table[] = {
929 	1, /* kiyeok                            */
930 	2, /* ssangkiyeok                       */
931 	2, /* kiyeok-sios                       */
932 	1, /* nieun                             */
933 	2, /* nieun-cieuc                       */
934 	2, /* nieun-hieuh                       */
935 	1, /* tikeut                            */
936 	1, /* rieul                             */
937 	2, /* rieul-kiyeok                      */
938 	2, /* rieul-mieum                       */
939 	2, /* rieul-pieup                       */
940 	2, /* rieul-sios                        */
941 	2, /* rieul-thieuth                     */
942 	2, /* rieul-phieuph                     */
943 	2, /* rieul-hieuh                       */
944 	1, /* mieum                             */
945 	1, /* pieup                             */
946 	2, /* pieup-sios                        */
947 	1, /* sios                              */
948 	2, /* ssangsios                         */
949 	1, /* ieung                             */
950 	1, /* cieuc                             */
951 	1, /* chieuch                           */
952 	1, /* khieukh                           */
953 	1, /* thieuth                           */
954 	1, /* phieuph                           */
955 	1, /* hieuh                             */
956 	2, /* kiyeok-rieul                      */
957 	3, /* kiyeok-sios-kiyeok                */
958 	2, /* nieun-kiyeok                      */
959 	2, /* nieun-tikeut                      */
960 	2, /* nieun-sios                        */
961 	2, /* nieun-pansios                     */
962 	2, /* nieun-thieuth                     */
963 	2, /* tikeut-kiyeok                     */
964 	2, /* tikeut-rieul                      */
965 	3, /* rieul-kiyeok-sios                 */
966 	2, /* rieul-nieun                       */
967 	2, /* rieul-tikeut                      */
968 	3, /* rieul-tikeut-hieuh                */
969 	2, /* ssangrieul                        */
970 	3, /* rieul-mieum-kiyeok                */
971 	3, /* rieul-mieum-sios                  */
972 	3, /* rieul-pieup-sios                  */
973 	3, /* rieul-pieup-hieuh                 */
974 	3, /* rieul-kapyeounpieup               */
975 	3, /* rieul-ssangsios                   */
976 	2, /* rieul-pansios                     */
977 	2, /* rieul-khieukh                     */
978 	2, /* rieul-yeorinhieuh                 */
979 	2, /* mieum-kiyeok                      */
980 	2, /* mieum-rieul                       */
981 	2, /* mieum-pieup                       */
982 	2, /* mieum-sios                        */
983 	3, /* mieum-ssangsios                   */
984 	2, /* mieum-pansios                     */
985 	2, /* mieum-chieuch                     */
986 	2, /* mieum-hieuh                       */
987 	2, /* kapyeounmieum                     */
988 	2, /* pieup-rieul                       */
989 	2, /* pieup-phieuph                     */
990 	2, /* pieup-hieuh                       */
991 	2, /* kapyeounpieup                     */
992 	2, /* sios-kiyeok                       */
993 	2, /* sios-tikeut                       */
994 	2, /* sios-rieul                        */
995 	2, /* sios-pieup                        */
996 	1, /* pansios                           */
997 	2, /* ieung-kiyeok                      */
998 	3, /* ieung-ssangkiyeok                 */
999 	2, /* ssangieung                        */
1000 	2, /* ieung-khieukh                     */
1001 	1, /* yesieung                          */
1002 	2, /* yesieung-sios                     */
1003 	2, /* yesieung-pansios                  */
1004 	2, /* phieuph-pieup                     */
1005 	2, /* kapyeounphieuph                   */
1006 	2, /* hieuh-nieun                       */
1007 	2, /* hieuh-rieul                       */
1008 	2, /* hieuh-mieum                       */
1009 	2, /* hieuh-pieup                       */
1010 	1, /* yeorinhieuh                       */
1011 	2, /* kiyeok-nieun                      */
1012 	2, /* kiyeok-pieup                      */
1013 	2, /* kiyeok-chieuch                    */
1014 	2, /* kiyeok-khieukh                    */
1015 	2, /* kiyeok-hieuh                      */
1016 	2, /* ssangnieun                        */
1017     };
1018 
1019     static const char table_ext_b[] = {
1020 	2, /* nieun-rieul                       */
1021 	2, /* nieun-chieuch                     */
1022 	2, /* ssangtikeut                       */
1023 	3, /* ssangtikeut-pieup                 */
1024 	2, /* tikeut-pieup                      */
1025 	2, /* tikeut-sios                       */
1026 	3, /* tikeut-sios-kiyeok                */
1027 	2, /* tikeut-cieuc                      */
1028 	2, /* tikeut-chieuch                    */
1029 	2, /* tikeut-thieuth                    */
1030 	3, /* rieul-ssangkiyeok                 */
1031 	3, /* rieul-kiyeok-hieuh                */
1032 	3, /* ssangrieul-khieukh                */
1033 	3, /* rieul-mieum-hieuh                 */
1034 	3, /* rieul-pieup-tikeut                */
1035 	3, /* rieul-pieup-phieuph               */
1036 	2, /* rieul-yesieung                    */
1037 	3, /* rieul-yeorinhieuh-hieuh           */
1038 	2, /* kapyeounrieul                     */
1039 	2, /* mieum-nieun                       */
1040 	3, /* mieum-ssangnieun                  */
1041 	2, /* ssangmieum                        */
1042 	3, /* mieum-pieup-sios                  */
1043 	2, /* mieum-cieuc                       */
1044 	2, /* pieup-tikeut                      */
1045 	3, /* pieup-rieul-phieuph               */
1046 	2, /* pieup-mieum                       */
1047 	2, /* ssangpieup                        */
1048 	3, /* pieup-sios-tikeut                 */
1049 	2, /* pieup-cieuc                       */
1050 	2, /* pieup-chieuch                     */
1051 	2, /* sios-mieum                        */
1052 	3, /* sios-kapyeounpieup                */
1053 	3, /* ssangsios-kiyeok                  */
1054 	3, /* ssangsios-tikeut                  */
1055 	2, /* sios-pansios                      */
1056 	2, /* sios-cieuc                        */
1057 	2, /* sios-chieuch                      */
1058 	2, /* sios-thieuth                      */
1059 	2, /* sios-hieuh                        */
1060 	2, /* pansios-pieup                     */
1061 	3, /* pansios-kapyeounpieup             */
1062 	2, /* yesieung-mieum                    */
1063 	2, /* yesieung-hieuh                    */
1064 	2, /* cieuc-pieup                       */
1065 	3, /* cieuc-ssangpieup                  */
1066 	2, /* ssangcieuc                        */
1067 	2, /* phieuph-sios                      */
1068 	2, /* phieuph-thieuth                   */
1069     };
1070 
1071     if (jong >= 0x11a8 && jong <= 0x11ff) {
1072 	return table[jong - 0x11a8];
1073     } else if (jong >= 0xd7cb && jong <= 0xd7fb) {
1074 	return table_ext_b[jong - 0xd7cb];
1075     }
1076 
1077     return 0;
1078 }
1079 
1080 ucschar
hangul_jongseong_get_diff(ucschar prevjong,ucschar jong)1081 hangul_jongseong_get_diff(ucschar prevjong, ucschar jong)
1082 {
1083     static const ucschar table[][2] = {
1084 	{ 0x1100, 0x1100 }, /* kiyeok: kiyeok, kiyeok                      */
1085 	{ 0x1100, 0x1101 }, /* ssangkiyeok: kiyeok, ssangkiyeok            */
1086 	{ 0x1109, 0      }, /* kiyeok-sios: sios                           */
1087 	{ 0x1102, 0x1102 }, /* nieun: nieun, nieun                         */
1088 	{ 0x110c, 0x115c }, /* nieun-cieuc: cieuc, nieun-cieuc             */
1089 	{ 0x1112, 0x115d }, /* nieun-hieuh: hieuh, nieun-hieuh             */
1090 	{ 0x1103, 0x1103 }, /* tikeut: tikeut, tikeut                      */
1091 	{ 0x1105, 0x1105 }, /* rieul: rieul, rieul                         */
1092 	{ 0x1100, 0xa964 }, /* rieul-kiyeok: kiyeok, rieul-kiyeok          */
1093 	{ 0x1106, 0xa968 }, /* rieul-mieum: mieum, rieul-mieum             */
1094 	{ 0x1107, 0xa969 }, /* rieul-pieup: pieup, rieul-pieup             */
1095 	{ 0x1109, 0xa96c }, /* rieul-sios: sios, rieul-sios                */
1096 	{ 0x1110, 0      }, /* rieul-thieuth: thieuth                      */
1097 	{ 0x1111, 0      }, /* rieul-phieuph: phieuph                      */
1098 	{ 0x1112, 0x111a }, /* rieul-hieuh: hieuh, rieul-hieuh             */
1099 	{ 0x1106, 0x1106 }, /* mieum: mieum, mieum                         */
1100 	{ 0x1107, 0x1107 }, /* pieup: pieup, pieup                         */
1101 	{ 0x1109, 0x1121 }, /* pieup-sios: sios, pieup-sios                */
1102 	{ 0x1109, 0x1109 }, /* sios: sios, sios                            */
1103 	{ 0x1109, 0x110a }, /* ssangsios: sios, ssangsios                  */
1104 	{ 0x110b, 0x110b }, /* ieung: ieung, ieung                         */
1105 	{ 0x110c, 0x110c }, /* cieuc: cieuc, cieuc                         */
1106 	{ 0x110e, 0x110e }, /* chieuch: chieuch, chieuch                   */
1107 	{ 0x110f, 0x110f }, /* khieukh: khieukh, khieukh                   */
1108 	{ 0x1110, 0x1110 }, /* thieuth: thieuth, thieuth                   */
1109 	{ 0x1111, 0x1111 }, /* phieuph: phieuph, phieuph                   */
1110 	{ 0x1112, 0x1112 }, /* hieuh: hieuh, hieuh                         */
1111 	{ 0x1105, 0      }, /* kiyeok-rieul: rieul                         */
1112 	{ 0x1100, 0x112d }, /* kiyeok-sios-kiyeok: kiyeok, sios-kiyeok     */
1113 	{ 0x1100, 0x1113 }, /* nieun-kiyeok: kiyeok, nieun-kiyeok          */
1114 	{ 0x1103, 0x1115 }, /* nieun-tikeut: tikeut, nieun-tikeut          */
1115 	{ 0x1109, 0x115b }, /* nieun-sios: sios, nieun-sios                */
1116 	{ 0x1140, 0      }, /* nieun-pansios: pansios                      */
1117 	{ 0x1110, 0      }, /* nieun-thieuth: thieuth                      */
1118 	{ 0x1100, 0x1117 }, /* tikeut-kiyeok: kiyeok, tikeut-kiyeok        */
1119 	{ 0x1105, 0x115e }, /* tikeut-rieul: rieul, tikeut-rieul           */
1120 	{ 0x1109, 0      }, /* rieul-kiyeok-sios: sios                     */
1121 	{ 0x1102, 0x1118 }, /* rieul-nieun: nieun, rieul-nieun             */
1122 	{ 0x1103, 0xa966 }, /* rieul-tikeut: tikeut, rieul-tikeut          */
1123 	{ 0x1112, 0      }, /* rieul-tikeut-hieuh: hieuh                   */
1124 	{ 0x1105, 0x1119 }, /* ssangrieul: rieul, ssangrieul               */
1125 	{ 0x1100, 0xa96f }, /* rieul-mieum-kiyeok: kiyeok, mieum-kiyeok    */
1126 	{ 0x1109, 0xa971 }, /* rieul-mieum-sios: sios, mieum-sios          */
1127 	{ 0x1109, 0x1121 }, /* rieul-pieup-sios: sios, pieup-sios          */
1128 	{ 0x1112, 0xa974 }, /* rieul-pieup-hieuh: hieuh, pieup-hieuh       */
1129 	{ 0x110b, 0x112b }, /* rieul-kapyeounpieup: ieung, kapyeounpieup   */
1130 	{ 0x1109, 0x110a }, /* rieul-ssangsios: sios, ssangsios            */
1131 	{ 0x1140, 0      }, /* rieul-pansios: pansios                      */
1132 	{ 0x110f, 0xa96e }, /* rieul-khieukh: khieukh, rieul-khieukh       */
1133 	{ 0x1159, 0      }, /* rieul-yeorinhieuh: yeorinhieuh              */
1134 	{ 0x1100, 0xa96f }, /* mieum-kiyeok: kiyeok, mieum-kiyeok          */
1135 	{ 0x1105, 0      }, /* mieum-rieul: rieul                          */
1136 	{ 0x1107, 0x111c }, /* mieum-pieup: pieup, mieum-pieup             */
1137 	{ 0x1109, 0xa971 }, /* mieum-sios: sios, mieum-sios                */
1138 	{ 0x1109, 0x110a }, /* mieum-ssangsios: sios, ssangsios            */
1139 	{ 0x1140, 0      }, /* mieum-pansios: pansios                      */
1140 	{ 0x110e, 0      }, /* mieum-chieuch: chieuch                      */
1141 	{ 0x1112, 0      }, /* mieum-hieuh: hieuh                          */
1142 	{ 0x110b, 0x111d }, /* kapyeounmieum: ieung, kapyeounmieum         */
1143 	{ 0x1105, 0      }, /* pieup-rieul: rieul                          */
1144 	{ 0x1111, 0x112a }, /* pieup-phieuph: phieuph, pieup-phieuph       */
1145 	{ 0x1112, 0xa974 }, /* pieup-hieuh: hieuh, pieup-hieuh             */
1146 	{ 0x110b, 0x112b }, /* kapyeounpieup: ieung, kapyeounpieup         */
1147 	{ 0x1100, 0x112d }, /* sios-kiyeok: kiyeok, sios-kiyeok            */
1148 	{ 0x1103, 0x112f }, /* sios-tikeut: tikeut, sios-tikeut            */
1149 	{ 0x1105, 0x1130 }, /* sios-rieul: rieul, sios-rieul               */
1150 	{ 0x1107, 0x1132 }, /* sios-pieup: pieup, sios-pieup               */
1151 	{ 0x1140, 0x1140 }, /* pansios: pansios, pansios                   */
1152 	{ 0x1100, 0      }, /* yesieung-kiyeok: kiyeok                     */
1153 	{ 0x1100, 0x1101 }, /* yesieung-ssangkiyeok: kiyeok, ssangkiyeok   */
1154 	{ 0x114c, 0      }, /* ssangyesieung: yesieung                     */
1155 	{ 0x110f, 0      }, /* yesieung-khieukh: khieukh                   */
1156 	{ 0x114c, 0x114c }, /* yesieung: yesieung, yesieung                */
1157 	{ 0x1109, 0      }, /* yesieung-sios: sios                         */
1158 	{ 0x1140, 0      }, /* yesieung-pansios: pansios                   */
1159 	{ 0x1107, 0x1156 }, /* phieuph-pieup: pieup, phieuph-pieup         */
1160 	{ 0x110b, 0x1157 }, /* kapyeounphieuph: ieung, kapyeounphieuph     */
1161 	{ 0x1102, 0      }, /* hieuh-nieun: nieun                          */
1162 	{ 0x1105, 0      }, /* hieuh-rieul: rieul                          */
1163 	{ 0x1106, 0      }, /* hieuh-mieum: mieum                          */
1164 	{ 0x1107, 0      }, /* hieuh-pieup: pieup                          */
1165 	{ 0x1159, 0x1159 }, /* yeorinhieuh: yeorinhieuh, yeorinhieuh       */
1166 	{ 0x1102, 0      }, /* kiyeok-nieun: nieun                         */
1167 	{ 0x1107, 0      }, /* kiyeok-pieup: pieup                         */
1168 	{ 0x110e, 0      }, /* kiyeok-chieuch: chieuch                     */
1169 	{ 0x110f, 0      }, /* kiyeok-khieukh: khieukh                     */
1170 	{ 0x1112, 0      }, /* kiyeok-hieuh: hieuh                         */
1171 	{ 0x1102, 0x1114 }, /* ssangnieun: nieun, ssangnieun               */
1172     };
1173 
1174     static const ucschar table_ext_b[][2] = {
1175 	{ 0x1105, 0      }, /* nieun-rieul: rieul                          */
1176 	{ 0x110e, 0      }, /* nieun-chieuch: chieuch                      */
1177 	{ 0x1103, 0x1104 }, /* ssangtikeut: tikeut, ssangtikeut            */
1178 	{ 0x1107, 0xa961 }, /* ssangtikeut-pieup: pieup, tikeut-pieup      */
1179 	{ 0x1107, 0xa961 }, /* tikeut-pieup: pieup, tikeut-pieup           */
1180 	{ 0x1109, 0xa962 }, /* tikeut-sios: sios, tikeut-sios              */
1181 	{ 0x1100, 0x112d }, /* tikeut-sios-kiyeok: kiyeok, sios-kiyeok     */
1182 	{ 0x110c, 0xa963 }, /* tikeut-cieuc: cieuc, tikeut-cieuc           */
1183 	{ 0x110e, 0      }, /* tikeut-chieuch: chieuch                     */
1184 	{ 0x1110, 0      }, /* tikeut-thieuth: thieuth                     */
1185 	{ 0x1100, 0x1101 }, /* rieul-ssangkiyeok: kiyeok, ssangkiyeok      */
1186 	{ 0x1112, 0      }, /* rieul-kiyeok-hieuh: hieuh                   */
1187 	{ 0x110f, 0xa96e }, /* ssangrieul-khieukh: khieukh, rieul-khieukh  */
1188 	{ 0x1112, 0      }, /* rieul-mieum-hieuh: hieuh                    */
1189 	{ 0x1103, 0x1120 }, /* rieul-pieup-tikeut: tikeut, pieup-tikeut    */
1190 	{ 0x1111, 0x112a }, /* rieul-pieup-phieuph: phieuph, pieup-phieuph */
1191 	{ 0x114c, 0      }, /* rieul-yesieung: yesieung                    */
1192 	{ 0x1112, 0      }, /* rieul-yeorinhieuh-hieuh: hieuh              */
1193 	{ 0x110b, 0x111b }, /* kapyeounrieul: ieung, kapyeounrieul         */
1194 	{ 0x1102, 0      }, /* mieum-nieun: nieun                          */
1195 	{ 0x1102, 0x1114 }, /* mieum-ssangnieun: nieun, ssangnieun         */
1196 	{ 0x1106, 0      }, /* ssangmieum: mieum                           */
1197 	{ 0x1109, 0x1121 }, /* mieum-pieup-sios: sios, pieup-sios          */
1198 	{ 0x110c, 0      }, /* mieum-cieuc: cieuc                          */
1199 	{ 0x1103, 0x1120 }, /* pieup-tikeut: tikeut, pieup-tikeut          */
1200 	{ 0x1111, 0      }, /* pieup-rieul-phieuph: phieuph                */
1201 	{ 0x1106, 0      }, /* pieup-mieum: mieum                          */
1202 	{ 0x1107, 0x1108 }, /* ssangpieup: pieup, ssangpieup               */
1203 	{ 0x1103, 0x112f }, /* pieup-sios-tikeut: tikeut, sios-tikeut      */
1204 	{ 0x110c, 0x1127 }, /* pieup-cieuc: cieuc, pieup-cieuc             */
1205 	{ 0x110e, 0x1128 }, /* pieup-chieuch: chieuch, pieup-chieuch       */
1206 	{ 0x1106, 0x1131 }, /* sios-mieum: mieum, sios-mieum               */
1207 	{ 0x110b, 0x112b }, /* sios-kapyeounpieup: ieung, kapyeounpieup    */
1208 	{ 0x1100, 0x112d }, /* ssangsios-kiyeok: kiyeok, sios-kiyeok       */
1209 	{ 0x1103, 0x112f }, /* ssangsios-tikeut: tikeut, sios-tikeut       */
1210 	{ 0x1140, 0      }, /* sios-pansios: pansios                       */
1211 	{ 0x110c, 0x1136 }, /* sios-cieuc: cieuc, sios-cieuc               */
1212 	{ 0x110e, 0x1137 }, /* sios-chieuch: chieuch, sios-chieuch         */
1213 	{ 0x1110, 0x1139 }, /* sios-thieuth: thieuth, sios-thieuth         */
1214 	{ 0x1112, 0x113b }, /* sios-hieuh: hieuh, sios-hieuh               */
1215 	{ 0x1107, 0      }, /* pansios-pieup: pieup                        */
1216 	{ 0x110b, 0x112b }, /* pansios-kapyeounpieup: ieung, kapyeounpieup */
1217 	{ 0x1106, 0      }, /* yesieung-mieum: mieum                       */
1218 	{ 0x1112, 0      }, /* yesieung-hieuh: hieuh                       */
1219 	{ 0x1107, 0      }, /* cieuc-pieup: pieup                          */
1220 	{ 0x1107, 0x1108 }, /* cieuc-ssangpieup: pieup, ssangpieup         */
1221 	{ 0x110c, 0x110d }, /* ssangcieuc: cieuc, ssangcieuc               */
1222 	{ 0x1109, 0      }, /* phieuph-sios: sios                          */
1223 	{ 0x1110, 0      }, /* phieuph-thieuth: thieuth                    */
1224     };
1225 
1226     ucschar cho = 0;
1227 
1228     if (prevjong == 0) {
1229 	cho = hangul_jongseong_to_choseong(jong);
1230     } else {
1231 	int diff;
1232 	int n1;
1233 	int n2;
1234 
1235 	n1 = hangul_jongseong_get_ncomponent(prevjong);
1236 	n2 = hangul_jongseong_get_ncomponent(jong);
1237 
1238 	diff = n2 - n1 - 1;
1239 	if (diff >= 0 && diff < 2) {
1240 	    if (jong >= 0x11a8 && jong <= 0x11ff) {
1241 		cho = table[jong - 0x11a8][diff];
1242 	    } else if (jong >= 0xd7cb && jong <= 0xd7fb) {
1243 		cho = table_ext_b[jong - 0xd7cb][diff];
1244 	    }
1245 	} else if (diff == 2) {
1246 	    cho = hangul_jongseong_to_choseong(jong);
1247 	}
1248     }
1249 
1250     return cho;
1251 }
1252 
1253 /**
1254  * @ingroup hangulctype
1255  * @brief 자모 코드를 조합하여 한글 음절로 변환
1256  * @param choseong 초성이 될 UCS4 코드 값
1257  * @param jungseong 중성이 될 UCS4 코드 값
1258  * @param jongseong 종성이 될 UCS4 코드 값
1259  * @return @a choseong @a jungseong @a jongseong을 조합한 현대 한글 음절 코드,
1260  *         또는 0
1261  *
1262  * 이 함수는 @a choseong @a jungseong @a jongseong으로 주어진 코드 값을 각각
1263  * 초성, 중성, 종성으로 하는 현대 한글 음절 코드를 구한다.
1264  * @a choseong @a jungseong @a jongseong 이 조합 가능한 코드가 아니라면
1265  * 0을 리턴한다. 종성이 없는 글자를 만들기 위해서는 jongseong에 0을 주면 된다.
1266  */
1267 ucschar
hangul_jamo_to_syllable(ucschar choseong,ucschar jungseong,ucschar jongseong)1268 hangul_jamo_to_syllable(ucschar choseong, ucschar jungseong, ucschar jongseong)
1269 {
1270     ucschar c;
1271 
1272     /* we use 0x11a7 like a Jongseong filler */
1273     if (jongseong == 0)
1274 	jongseong = 0x11a7;         /* Jongseong filler */
1275 
1276     if (!hangul_is_choseong_conjoinable(choseong))
1277 	return 0;
1278     if (!hangul_is_jungseong_conjoinable(jungseong))
1279 	return 0;
1280     if (!hangul_is_jongseong_conjoinable(jongseong))
1281 	return 0;
1282 
1283     choseong  -= choseong_base;
1284     jungseong -= jungseong_base;
1285     jongseong -= jongseong_base;
1286 
1287     c = ((choseong * njungseong) + jungseong) * njongseong + jongseong
1288 	+ syllable_base;
1289     return c;
1290 }
1291 
1292 /**
1293  * @ingroup hangulctype
1294  * @brief 음절을 자모로 분해
1295  * @param syllable 분해할 음절
1296  * @retval choseong 음절에서 초성 부분의 코드
1297  * @retval jungseong 음절에서 중성 부분의 코드
1298  * @retval jongseong 음절에서 종성 부분의 코드, 종성이 없으면 0을 반환한다
1299  * @return 없음
1300  *
1301  * 이 함수는 @a syllable 로 주어진 음절 코드를 분해하여 자모 코드를 반환한다.
1302  * 반환하는 값은 @a choseong, @a jungseong, @a jongseong 의 포인터에 대입하여
1303  * 리턴한다. 종성이 없는 음절인 경우에는 @a jongseong 에 0을 반환한다.
1304  */
1305 void
hangul_syllable_to_jamo(ucschar syllable,ucschar * choseong,ucschar * jungseong,ucschar * jongseong)1306 hangul_syllable_to_jamo(ucschar syllable,
1307 			ucschar* choseong,
1308 			ucschar* jungseong,
1309 			ucschar* jongseong)
1310 {
1311     if (jongseong != NULL)
1312 	*jongseong = 0;
1313     if (jungseong != NULL)
1314 	*jungseong = 0;
1315     if (choseong != NULL)
1316 	*choseong = 0;
1317 
1318     if (!hangul_is_syllable(syllable))
1319 	return;
1320 
1321     syllable -= syllable_base;
1322     if (jongseong != NULL) {
1323 	if (syllable % njongseong != 0)
1324 	    *jongseong = jongseong_base + syllable % njongseong;
1325     }
1326     syllable /= njongseong;
1327 
1328     if (jungseong != NULL) {
1329 	*jungseong = jungseong_base + syllable % njungseong;
1330     }
1331     syllable /= njungseong;
1332 
1333     if (choseong != NULL) {
1334 	*choseong = choseong_base + syllable;
1335     }
1336 }
1337 
1338 /** @deprecated 이 함수 대신 hangul_syllable_to_jamo함수를 사용한다. */
1339 void
hangul_syllable_to_jaso(ucschar syllable,ucschar * choseong,ucschar * jungseong,ucschar * jongseong)1340 hangul_syllable_to_jaso(ucschar syllable,
1341 			ucschar* choseong,
1342 			ucschar* jungseong,
1343 			ucschar* jongseong)
1344 {
1345     return hangul_syllable_to_jamo(syllable, choseong, jungseong, jongseong);
1346 }
1347 
1348 static inline bool
is_syllable_boundary(ucschar prev,ucschar next)1349 is_syllable_boundary(ucschar prev, ucschar next)
1350 {
1351     if (hangul_is_choseong(prev)) {
1352 	if (hangul_is_choseong(next))
1353 	    return false;
1354 	if (hangul_is_jungseong(next))
1355 	    return false;
1356 	if (hangul_is_syllable(next))
1357 	    return false;
1358 	if (hangul_is_combining_mark(next))
1359 	    return false;
1360 	if (next == HANGUL_JUNGSEONG_FILLER)
1361 	    return false;
1362     } else if (prev == HANGUL_CHOSEONG_FILLER) {
1363 	if (hangul_is_jungseong(next))
1364 	    return false;
1365 	if (next == HANGUL_JUNGSEONG_FILLER)
1366 	    return false;
1367     } else if (hangul_is_jungseong(prev)) {
1368 	if (hangul_is_jungseong(next))
1369 	    return false;
1370 	if (hangul_is_jongseong(next))
1371 	    return false;
1372 	if (hangul_is_combining_mark(next))
1373 	    return false;
1374     } else if (prev == HANGUL_JUNGSEONG_FILLER) {
1375 	if (hangul_is_jongseong(next))
1376 	    return false;
1377     } else if (hangul_is_jongseong(prev)) {
1378 	if (hangul_is_jongseong(next))
1379 	    return false;
1380 	if (hangul_is_combining_mark(next))
1381 	    return false;
1382     } else if (hangul_is_syllable(prev)) {
1383 	if ((prev - syllable_base) % njongseong == 0) {
1384 	    // 종성이 없는 음절: LV
1385 	    if (hangul_is_jungseong(next))
1386 		return false;
1387 	    if (hangul_is_jongseong(next))
1388 		return false;
1389 	} else {
1390 	    // 종성이 있는 음절: LVT
1391 	    if (hangul_is_jongseong(next))
1392 		return false;
1393 	}
1394 	if (hangul_is_combining_mark(next))
1395 	    return false;
1396     }
1397 
1398     return true;
1399 }
1400 
1401 static inline ucschar
choseong_compress(ucschar a,ucschar b)1402 choseong_compress(ucschar a, ucschar b)
1403 {
1404     if (a == 0)
1405 	return b;
1406 
1407     if (a == 0x1100 && b == 0x1100)
1408 	return 0x1101;
1409     if (a == 0x1103 && b == 0x1103)
1410 	return 0x1104;
1411     if (a == 0x1107 && b == 0x1107)
1412 	return 0x1108;
1413     if (a == 0x1109 && b == 0x1109)
1414 	return 0x110A;
1415     if (a == 0x110c && b == 0x110c)
1416 	return 0x110d;
1417     return 0;
1418 }
1419 
1420 static inline ucschar
jungseong_compress(ucschar a,ucschar b)1421 jungseong_compress(ucschar a, ucschar b)
1422 {
1423     if (a == 0)
1424 	return b;
1425 
1426     if (a == 0x1169) {
1427 	if (b == 0x1161)
1428 	    return 0x116a;
1429 	if (b == 0x1162)
1430 	    return 0x116b;
1431 	if (b == 0x1175)
1432 	    return 0x116c;
1433     }
1434     if (a == 0x116e) {
1435 	if (b == 0x1165)
1436 	    return 0x116f;
1437 	if (b == 0x1166)
1438 	    return 0x1170;
1439 	if (b == 0x1175)
1440 	    return 0x1171;
1441     }
1442     if (b == 0x1175) {
1443 	if (a == 0x1173)
1444 	    return 0x1174;
1445 	if (a == 0x1161)
1446 	    return 0x1162;
1447 	if (a == 0x1163)
1448 	    return 0x1164;
1449 	if (a == 0x1165)
1450 	    return 0x1166;
1451 	if (a == 0x1167)
1452 	    return 0x1168;
1453     }
1454 
1455     return 0;
1456 }
1457 
1458 static inline ucschar
jongseong_compress(ucschar a,ucschar b)1459 jongseong_compress(ucschar a, ucschar b)
1460 {
1461     if (a == 0)
1462 	return b;
1463 
1464     if (a == 0x11a8) {
1465 	if (b == 0x11a8)
1466 	    return 0x11a9;
1467 	if (b == 0x11ba)
1468 	    return 0x11aa;
1469     }
1470     if (a == 0x11ab) {
1471 	if (b == 0x11b0)
1472 	    return 0x11ab;
1473 	if (b == 0x11c2)
1474 	    return 0x11ad;
1475     }
1476     if (a == 0x11af) {
1477 	if (b == 0x11a8)
1478 	    return 0x11b0;
1479 	if (b == 0x11b7)
1480 	    return 0x11b1;
1481 	if (b == 0x11b8)
1482 	    return 0x11b2;
1483 	if (b == 0x11ba)
1484 	    return 0x11b3;
1485 	if (b == 0x11c0)
1486 	    return 0x11b4;
1487 	if (b == 0x11c1)
1488 	    return 0x11b5;
1489 	if (b == 0x11c2)
1490 	    return 0x11b6;
1491     }
1492     if (a == 0x11b8 && b == 0x11ba)
1493 	return 0x11b9;
1494     if (a == 0x11ba && b == 0x11ba)
1495 	return 0x11bb;
1496 
1497     return 0;
1498 }
1499 
1500 static inline ucschar
build_syllable(const ucschar * str,size_t len)1501 build_syllable(const ucschar* str, size_t len)
1502 {
1503     int i;
1504     ucschar cho = 0, jung = 0, jong = 0;
1505 
1506     i = 0;
1507     while (i < len && hangul_is_choseong_conjoinable(str[i])) {
1508 	cho = choseong_compress(cho, str[i]);
1509 	if (cho == 0)
1510 	    return 0;
1511 	i++;
1512     }
1513 
1514     while (i < len && hangul_is_jungseong_conjoinable(str[i])) {
1515 	jung = jungseong_compress(jung, str[i]);
1516 	if (jung == 0)
1517 	    return 0;
1518 	i++;
1519     }
1520 
1521     while (i < len && hangul_is_jongseong_conjoinable(str[i])) {
1522 	jong = jongseong_compress(jong, str[i]);
1523 	if (jong == 0)
1524 	    return 0;
1525 	i++;
1526     }
1527 
1528     if (i < len)
1529 	return 0;
1530 
1531     return hangul_jamo_to_syllable(cho, jung, jong);
1532 }
1533 
1534 /**
1535  * @ingroup hangulctype
1536  * @brief 한 음절에 해당하는 코드의 갯수를 구하는 함수
1537  * @param str 음절의 길이를 구할 스트링
1538  * @param max_len @a str 에서 읽을 길이의 제한값
1539  * @return 한 음절에 해당하는 코드의 갯수
1540  *
1541  * 이 함수는 @a str 에서 한 음절에 해당하는 코드의 갯수를 구한다.
1542  * 한 음절에 해당하는 코드의 갯수가 @a max_len 보다 많다면 @a max_len 을
1543  * 반환한다. 한 음절이라고 판단하는 기준은 L*V*T+ 패턴에 따른다. 이 패턴은
1544  * regular expression의 컨벤션을 따른 것으로, 1개 이상의 초성과 중성, 0개
1545  * 이상의 종성이 모인 자모 스트링을 한 음절로 인식한다는 뜻이다. 예를 들면
1546  * 다음과 같은 자모 스트링도 한 음절로 인식한다.
1547  *
1548  *  예) "ㅂ ㅂ ㅜ ㅔ ㄹ ㄱ" -> "쀍"
1549  *
1550  * 따라서 위 경우에는 6을 반환하게 된다.
1551  *
1552  * 일반적으로는 방점(U+302E, U+302F)까지 한 음절로 인식하겠지만, 이 함수는
1553  * 음절과 자모간 변환을 편리하게 하기 위해 구현된 것으로 방점은 다른 음절로
1554  * 인식한다.
1555  *
1556  * @a str 이 자모 코드에 해당하지 않는 경우에는 1을 반환한다.
1557  *
1558  * 이 함수는 자모 스트링에서 총 음절의 갯수를 구하는 함수가 아님에 주의한다.
1559  */
1560 int
hangul_syllable_len(const ucschar * str,int max_len)1561 hangul_syllable_len(const ucschar* str, int max_len)
1562 {
1563     int i = 0;
1564 
1565     if (max_len == 0)
1566 	return 0;
1567 
1568     if (str[i] != 0) {
1569 	for (i = 1; i < max_len; i++) {
1570 	    if (str[i] == 0)
1571 		break;
1572 
1573 	    if (is_syllable_boundary(str[i - 1], str[i]))
1574 		break;
1575 	}
1576     }
1577 
1578     return i;
1579 }
1580 
1581 /**
1582  * @ingroup hangulctype
1583  * @brief @a iter를 기준으로 이전 음절의 첫자모 글자에 대한 포인터를 구하는 함수
1584  * @param iter 현재 위치
1585  * @param begin 스트링의 시작위치, 포인터가 이동할 한계값
1586  * @return 이전 음절의 첫번째 자모에 대한 포인터
1587  *
1588  * 이 함수는 @a iter로 주어진 자모 스트링의 포인터를 기준으로 이전 음절의
1589  * 첫번째 자모에 대한 포인터를 리턴한다. 음절을 찾기위해서 begin보다
1590  * 앞쪽으로 이동하지 않는다.
1591  *
1592  * 한 음절이라고 판단하는 기준은 L*V*T+M? 패턴에 따른다.
1593  */
1594 const ucschar*
hangul_syllable_iterator_prev(const ucschar * iter,const ucschar * begin)1595 hangul_syllable_iterator_prev(const ucschar* iter, const ucschar* begin)
1596 {
1597     if (iter > begin)
1598 	iter--;
1599 
1600     while (iter > begin) {
1601 	ucschar prev = iter[-1];
1602 	ucschar curr = iter[0];
1603 	if (is_syllable_boundary(prev, curr))
1604 	    break;
1605 	iter--;
1606     }
1607 
1608     return iter;
1609 }
1610 
1611 /**
1612  * @ingroup hangulctype
1613  * @brief @a iter를 기준으로 다음 음절의 첫자모 글자에 대한 포인터를 구하는 함수
1614  * @param iter 현재 위치
1615  * @param end 스트링의 끝위치, 포인터가 이동할 한계값
1616  * @return 다음 음절의 첫번째 자모에 대한 포인터
1617  *
1618  * 이 함수는 @a iter로 주어진 자모 스트링의 포인터를 기준으로 다음 음절의
1619  * 첫번째 자모에 대한 포인터를 리턴한다. 음절을 찾기위해서 end를 넘어
1620  * 이동하지 않는다.
1621  *
1622  * 한 음절이라고 판단하는 기준은 L*V*T+M? 패턴에 따른다.
1623  */
1624 const ucschar*
hangul_syllable_iterator_next(const ucschar * iter,const ucschar * end)1625 hangul_syllable_iterator_next(const ucschar* iter, const ucschar* end)
1626 {
1627     if (iter < end)
1628 	iter++;
1629 
1630     while (iter < end) {
1631 	ucschar prev = iter[-1];
1632 	ucschar curr = iter[0];
1633 	if (is_syllable_boundary(prev, curr))
1634 	    break;
1635 	iter++;
1636     }
1637 
1638     return iter;
1639 }
1640 
1641 /**
1642  * @ingroup hangulctype
1643  * @brief 자모 스트링을 음절 스트링으로 변환
1644  * @param dest 음절형으로 변환된 결과가 저장될 버퍼
1645  * @param destlen 결과를 저장할 버퍼의 길이(ucschar 코드 단위)
1646  * @param src 변환할 자모 스트링
1647  * @param srclen 변환할 자모 스트링의 길이(ucschar 코드 단위)
1648  * @return @a destlen 에 저장한 코드의 갯수
1649  *
1650  * 이 함수는 L+V+T*M? 패턴에 따라 자모 스트링 변환을 시도한다. 한 음절을
1651  * 판단하는 기준은 @ref hangul_syllable_len 을 참조한다.
1652  * 만일 @a src 가 적절한 음절형태로 변환이 불가능한 경우에는 자모 스트링이
1653  * 그대로 복사된다.
1654  *
1655  * 이 함수는 자모 스트링 @a src 를 음절형으로 변환하여 @a dest 에 저장한다.
1656  * @a srclen 에 지정된 갯수만큼 읽고, @a destlen 에 지정된 길이 이상 쓰지
1657  * 않는다.  @a srclen 이 -1이라면 @a src 는 0으로 끝나는 스트링으로 가정하고
1658  * 0을 제외한 길이까지 변환을 시도한다. 따라서 변환된 결과 스트링은 0으로
1659  * 끝나지 않는다. 만일 0으로 끝나는 스트링을 만들고 싶다면 다음과 같이 한다.
1660  *
1661  * @code
1662  * int n = hangul_jamos_to_syllables(dest, destlen, src, srclen);
1663  * dest[n] = 0;
1664  * @endcode
1665  */
1666 int
hangul_jamos_to_syllables(ucschar * dest,int destlen,const ucschar * src,int srclen)1667 hangul_jamos_to_syllables(ucschar* dest, int destlen, const ucschar* src, int srclen)
1668 {
1669     ucschar* d;
1670     const ucschar* s;
1671 
1672     int inleft;
1673     int outleft;
1674     int n;
1675 
1676     if (srclen < 0) {
1677 	s = src;
1678 	while (*s != 0)
1679 	    s++;
1680 	srclen = s - src;
1681     }
1682 
1683     s = src;
1684     d = dest;
1685     inleft = srclen;
1686     outleft = destlen;
1687 
1688     n = hangul_syllable_len(s, inleft);
1689     while (n > 0 && inleft > 0 && outleft > 0) {
1690 	ucschar c = build_syllable(s, n);
1691 	if (c != 0) {
1692 	    *d = c;
1693 	    d++;
1694 	    outleft--;
1695 	} else {
1696 	    int i;
1697 	    for (i = 0; i < n && i < outleft; i++) {
1698 		d[i] = s[i];
1699 	    }
1700 	    d += i;
1701 	    outleft -= i;
1702 	}
1703 
1704 	s += n;
1705 	inleft -= n;
1706 	n = hangul_syllable_len(s, inleft);
1707     }
1708 
1709     return destlen - outleft;
1710 }
1711