1 /* libhangul
2 * Copyright (C) 2004 - 2009 Choe Hwanjin
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
22
23 #include <stdlib.h>
24
25 #include "hangul.h"
26
27 /**
28 * @defgroup hangulctype 한글 글자 조작
29 *
30 * @section hangulctype 한글 글자 조작
31 * libhangul은 한글 각 글자를 구분하고 조작하는데 사용할 수 있는 몇가지 함수를
32 * 제공한다. libhangul의 글자 구분 함수의 인터페이스에서 글자의 기본 단위는
33 * UCS4 코드값이다.
34 */
35
36 /**
37 * @file hangulctype.c
38 */
39
40 /**
41 * @ingroup hangulctype
42 * @typedef ucschar
43 * @brief UCS4 코드 단위의 글자 코드 값
44 *
45 * UCS4 코드 값을 저장한다. libhangul에서 사용하는 문자열의 기본단위이다.
46 * preedit 문자열과 commit 문자열 모두 ucschar 포인터 형으로 전달된다.
47 * 이 스트링은 C 스트링과 유사하게 0으로 끝난다.
48 * 유니코드 값이 한글의 어떤 범주에 속하는지 확인하는 함수도 모두 ucschar 형을
49 * 사용한다.
50 */
51
52 static const ucschar syllable_base = 0xac00;
53 static const ucschar choseong_base = 0x1100;
54 static const ucschar jungseong_base = 0x1161;
55 static const ucschar jongseong_base = 0x11a7;
56 static const int njungseong = 21;
57 static const int njongseong = 28;
58
59 /**
60 * @ingroup hangulctype
61 * @brief 초성인지 확인하는 함수
62 * @param c UCS4 코드 값
63 * @return @a c 가 초성에 해당하면 true를 리턴함, 아니면 false
64 *
65 * @a c 로 주어진 UCS4 코드가 초성인지 확인한다.
66 * Unicode 5.2 지원
67 */
68 bool
hangul_is_choseong(ucschar c)69 hangul_is_choseong(ucschar c)
70 {
71 return (c >= 0x1100 && c <= 0x115f) ||
72 (c >= 0xa960 && c <= 0xa97c);
73 ;
74 }
75
76 /**
77 * @ingroup hangulctype
78 * @brief 중성인지 확인하는 함수
79 * @param c UCS4 코드 값
80 * @return @a c 가 중성에 해당하면 true를 리턴함, 아니면 false
81 *
82 * @a c 로 주어진 UCS4 코드가 중성인지 확인한다.
83 * Unicode 5.2 지원
84 */
85 bool
hangul_is_jungseong(ucschar c)86 hangul_is_jungseong(ucschar c)
87 {
88 return (c >= 0x1160 && c <= 0x11a7) ||
89 (c >= 0xd7b0 && c <= 0xd7c6);
90 }
91
92 /**
93 * @ingroup hangulctype
94 * @brief 종성인지 확인하는 함수
95 * @param c UCS4 코드 값
96 * @return @a c 가 종성에 해당하면 true를 리턴함, 아니면 false
97 *
98 * @a c 로 주어진 UCS4 코드가 종성인지 확인한다.
99 * Unicode 5.2 지원
100 */
101 bool
hangul_is_jongseong(ucschar c)102 hangul_is_jongseong(ucschar c)
103 {
104 return (c >= 0x11a8 && c <= 0x11ff) ||
105 (c >= 0xd7cb && c <= 0xd7fb);
106 }
107
108 bool
hangul_is_combining_mark(ucschar c)109 hangul_is_combining_mark(ucschar c)
110 {
111 return c == 0x302e || c == 0x302f ||
112 (c >= 0x0300 && c <= 0x036F) ||
113 (c >= 0x1dc0 && c <= 0x1dff) ||
114 (c >= 0xfe20 && c <= 0xfe2f);
115 }
116
117 /**
118 * @ingroup hangulctype
119 * @brief 초성이고 조합 가능한지 확인
120 */
121 bool
hangul_is_choseong_conjoinable(ucschar c)122 hangul_is_choseong_conjoinable(ucschar c)
123 {
124 return c >= 0x1100 && c <= 0x1112;
125 }
126
127 /**
128 * @ingroup hangulctype
129 * @brief 중성이고 조합 가능한지 확인
130 */
131 bool
hangul_is_jungseong_conjoinable(ucschar c)132 hangul_is_jungseong_conjoinable(ucschar c)
133 {
134 return c >= 0x1161 && c <= 0x1175;
135 }
136
137 /**
138 * @ingroup hangulctype
139 * @brief 종성이고 조합 가능한지 확인
140 */
141 bool
hangul_is_jongseong_conjoinable(ucschar c)142 hangul_is_jongseong_conjoinable(ucschar c)
143 {
144 return c >= 0x11a7 && c <= 0x11c2;
145 }
146
147 /**
148 * @ingroup hangulctype
149 * @brief 한글 음절 인지 확
150 * @param c UCS4 코드 값
151 * @return @a c가 한글 음절 코드이면 true, 그 외에는 false
152 *
153 * 이 함수는 @a c로 주어진 UCS4 코드가 현대 한글 음절에 해당하는지
154 * 확인한다.
155 */
156 bool
hangul_is_syllable(ucschar c)157 hangul_is_syllable(ucschar c)
158 {
159 return c >= 0xac00 && c <= 0xd7a3;
160 }
161
162 /**
163 * @ingroup hangulctype
164 * @brief 자모 인지 확인
165 * @param c UCS4 코드 값
166 * @return @a c 가 자모 코드이면 true를 리턴, 그외에는 false
167 *
168 * @a c 로 주어진 UCS4 코드가 자모 코드인지 확인한다.
169 * Unicode 5.2 지원
170 */
171 bool
hangul_is_jamo(ucschar c)172 hangul_is_jamo(ucschar c)
173 {
174 return hangul_is_choseong(c) ||
175 hangul_is_jungseong(c) ||
176 hangul_is_jongseong(c);
177 }
178
179 /**
180 * @ingroup hangulctype
181 * @brief 호환 자모인지 확인
182 * @param c UCS4 코드 값
183 * @return @a c가 호환자모이면 true, 그 외에는 false
184 *
185 * 이 함수는 @a c로 주어진 UCS4 코드가 호환 자모인지 확인한다.
186 */
187 bool
hangul_is_cjamo(ucschar c)188 hangul_is_cjamo(ucschar c)
189 {
190 return c >= 0x3131 && c <= 0x318e;
191 }
192
193 /**
194 * @ingroup hangulctype
195 * @brief 자모 코드를 대응하는 호환 자모로 변환
196 * @param c 변환할 UCS4 코드 값
197 * @return @a c 에 대응되는 호환 자모 값, or c
198 *
199 * 이 함수는 @a c 로 주어진 자모 코드와 같은 형태를 가진 호환 자모 값을
200 * 리턴한다. 자모와 같은 형태를 가진 호환 자모가 없는 경우에는 @a c 의
201 * 값을 그대로 리턴한다.
202 */
203 ucschar
hangul_jamo_to_cjamo(ucschar c)204 hangul_jamo_to_cjamo(ucschar c)
205 {
206 static unsigned short jamo_table[] = {
207 0x3131, /* 0x1100 */
208 0x3132, /* 0x1101 */
209 0x3134, /* 0x1102 */
210 0x3137, /* 0x1103 */
211 0x3138, /* 0x1104 */
212 0x3139, /* 0x1105 */
213 0x3141, /* 0x1106 */
214 0x3142, /* 0x1107 */
215 0x3143, /* 0x1108 */
216 0x3145, /* 0x1109 */
217 0x3146, /* 0x110a */
218 0x3147, /* 0x110b */
219 0x3148, /* 0x110c */
220 0x3149, /* 0x110d */
221 0x314a, /* 0x110e */
222 0x314b, /* 0x110f */
223 0x314c, /* 0x1110 */
224 0x314d, /* 0x1111 */
225 0x314e, /* 0x1112 */
226 0x0000, /* 0x1113 */
227 0x3165, /* 0x1114 */
228 0x3166, /* 0x1115 */
229 0x0000, /* 0x1116 */
230 0x0000, /* 0x1117 */
231 0x0000, /* 0x1118 */
232 0x0000, /* 0x1119 */
233 0x3140, /* 0x111a */
234 0x0000, /* 0x111b */
235 0x316e, /* 0x111c */
236 0x3171, /* 0x111d */
237 0x3172, /* 0x111e */
238 0x0000, /* 0x111f */
239 0x3173, /* 0x1120 */
240 0x3144, /* 0x1121 */
241 0x3174, /* 0x1122 */
242 0x3175, /* 0x1123 */
243 0x0000, /* 0x1124 */
244 0x0000, /* 0x1125 */
245 0x0000, /* 0x1126 */
246 0x3176, /* 0x1127 */
247 0x0000, /* 0x1128 */
248 0x3177, /* 0x1129 */
249 0x0000, /* 0x112a */
250 0x3178, /* 0x112b */
251 0x3179, /* 0x112c */
252 0x317a, /* 0x112d */
253 0x317b, /* 0x112e */
254 0x317c, /* 0x112f */
255 0x0000, /* 0x1130 */
256 0x0000, /* 0x1131 */
257 0x317d, /* 0x1132 */
258 0x0000, /* 0x1133 */
259 0x0000, /* 0x1134 */
260 0x0000, /* 0x1135 */
261 0x317e, /* 0x1136 */
262 0x0000, /* 0x1137 */
263 0x0000, /* 0x1138 */
264 0x0000, /* 0x1139 */
265 0x0000, /* 0x113a */
266 0x0000, /* 0x113b */
267 0x0000, /* 0x113c */
268 0x0000, /* 0x113d */
269 0x0000, /* 0x113e */
270 0x0000, /* 0x113f */
271 0x317f, /* 0x1140 */
272 0x0000, /* 0x1141 */
273 0x0000, /* 0x1142 */
274 0x0000, /* 0x1143 */
275 0x0000, /* 0x1144 */
276 0x0000, /* 0x1145 */
277 0x0000, /* 0x1146 */
278 0x3180, /* 0x1147 */
279 0x0000, /* 0x1148 */
280 0x0000, /* 0x1149 */
281 0x0000, /* 0x114a */
282 0x0000, /* 0x114b */
283 0x3181, /* 0x114c */
284 0x0000, /* 0x114d */
285 0x0000, /* 0x114e */
286 0x0000, /* 0x114f */
287 0x0000, /* 0x1150 */
288 0x0000, /* 0x1151 */
289 0x0000, /* 0x1152 */
290 0x0000, /* 0x1153 */
291 0x0000, /* 0x1154 */
292 0x0000, /* 0x1155 */
293 0x0000, /* 0x1156 */
294 0x3184, /* 0x1157 */
295 0x3185, /* 0x1158 */
296 0x3186, /* 0x1159 */
297 0x0000, /* 0x115a */
298 0x0000, /* 0x115b */
299 0x0000, /* 0x115c */
300 0x0000, /* 0x115d */
301 0x0000, /* 0x115e */
302 0x0000, /* 0x115f */
303 0x3164, /* 0x1160 */
304 0x314f, /* 0x1161 */
305 0x3150, /* 0x1162 */
306 0x3151, /* 0x1163 */
307 0x3152, /* 0x1164 */
308 0x3153, /* 0x1165 */
309 0x3154, /* 0x1166 */
310 0x3155, /* 0x1167 */
311 0x3156, /* 0x1168 */
312 0x3157, /* 0x1169 */
313 0x3158, /* 0x116a */
314 0x3159, /* 0x116b */
315 0x315a, /* 0x116c */
316 0x315b, /* 0x116d */
317 0x315c, /* 0x116e */
318 0x315d, /* 0x116f */
319 0x315e, /* 0x1170 */
320 0x315f, /* 0x1171 */
321 0x3160, /* 0x1172 */
322 0x3161, /* 0x1173 */
323 0x3162, /* 0x1174 */
324 0x3163, /* 0x1175 */
325 0x0000, /* 0x1176 */
326 0x0000, /* 0x1177 */
327 0x0000, /* 0x1178 */
328 0x0000, /* 0x1179 */
329 0x0000, /* 0x117a */
330 0x0000, /* 0x117b */
331 0x0000, /* 0x117c */
332 0x0000, /* 0x117d */
333 0x0000, /* 0x117e */
334 0x0000, /* 0x117f */
335 0x0000, /* 0x1180 */
336 0x0000, /* 0x1181 */
337 0x0000, /* 0x1182 */
338 0x0000, /* 0x1183 */
339 0x3187, /* 0x1184 */
340 0x3188, /* 0x1185 */
341 0x0000, /* 0x1186 */
342 0x0000, /* 0x1187 */
343 0x3189, /* 0x1188 */
344 0x0000, /* 0x1189 */
345 0x0000, /* 0x118a */
346 0x0000, /* 0x118b */
347 0x0000, /* 0x118c */
348 0x0000, /* 0x118d */
349 0x0000, /* 0x118e */
350 0x0000, /* 0x118f */
351 0x0000, /* 0x1190 */
352 0x318a, /* 0x1191 */
353 0x318b, /* 0x1192 */
354 0x0000, /* 0x1193 */
355 0x318c, /* 0x1194 */
356 0x0000, /* 0x1195 */
357 0x0000, /* 0x1196 */
358 0x0000, /* 0x1197 */
359 0x0000, /* 0x1198 */
360 0x0000, /* 0x1199 */
361 0x0000, /* 0x119a */
362 0x0000, /* 0x119b */
363 0x0000, /* 0x119c */
364 0x0000, /* 0x119d */
365 0x318d, /* 0x119e */
366 0x0000, /* 0x119f */
367 0x0000, /* 0x11a0 */
368 0x318e, /* 0x11a1 */
369 0x0000, /* 0x11a2 */
370 0x0000, /* 0x11a3 */
371 0x0000, /* 0x11a4 */
372 0x0000, /* 0x11a5 */
373 0x0000, /* 0x11a6 */
374 0x0000, /* 0x11a7 */
375 0x3131, /* 0x11a8 */
376 0x3132, /* 0x11a9 */
377 0x3133, /* 0x11aa */
378 0x3134, /* 0x11ab */
379 0x3135, /* 0x11ac */
380 0x3136, /* 0x11ad */
381 0x3137, /* 0x11ae */
382 0x3139, /* 0x11af */
383 0x313a, /* 0x11b0 */
384 0x313b, /* 0x11b1 */
385 0x313c, /* 0x11b2 */
386 0x313d, /* 0x11b3 */
387 0x313e, /* 0x11b4 */
388 0x313f, /* 0x11b5 */
389 0x3140, /* 0x11b6 */
390 0x3141, /* 0x11b7 */
391 0x3142, /* 0x11b8 */
392 0x3144, /* 0x11b9 */
393 0x3145, /* 0x11ba */
394 0x3146, /* 0x11bb */
395 0x3147, /* 0x11bc */
396 0x3148, /* 0x11bd */
397 0x314a, /* 0x11be */
398 0x314b, /* 0x11bf */
399 0x314c, /* 0x11c0 */
400 0x314d, /* 0x11c1 */
401 0x314e, /* 0x11c2 */
402 0x0000, /* 0x11c3 */
403 0x0000, /* 0x11c4 */
404 0x0000, /* 0x11c5 */
405 0x0000, /* 0x11c6 */
406 0x3167, /* 0x11c7 */
407 0x3168, /* 0x11c8 */
408 0x0000, /* 0x11c9 */
409 0x0000, /* 0x11ca */
410 0x0000, /* 0x11cb */
411 0x3169, /* 0x11cc */
412 0x0000, /* 0x11cd */
413 0x316a, /* 0x11ce */
414 0x0000, /* 0x11cf */
415 0x0000, /* 0x11d0 */
416 0x0000, /* 0x11d1 */
417 0x0000, /* 0x11d2 */
418 0x316b, /* 0x11d3 */
419 0x0000, /* 0x11d4 */
420 0x0000, /* 0x11d5 */
421 0x0000, /* 0x11d6 */
422 0x316c, /* 0x11d7 */
423 0x0000, /* 0x11d8 */
424 0x316d, /* 0x11d9 */
425 0x0000, /* 0x11da */
426 0x0000, /* 0x11db */
427 0x0000, /* 0x11dc */
428 0x316f, /* 0x11dd */
429 0x0000, /* 0x11de */
430 0x3170, /* 0x11df */
431 0x0000, /* 0x11e0 */
432 0x0000, /* 0x11e1 */
433 0x0000, /* 0x11e2 */
434 0x0000, /* 0x11e3 */
435 0x0000, /* 0x11e4 */
436 0x0000, /* 0x11e5 */
437 0x0000, /* 0x11e6 */
438 0x0000, /* 0x11e7 */
439 0x0000, /* 0x11e8 */
440 0x0000, /* 0x11e9 */
441 0x0000, /* 0x11ea */
442 0x0000, /* 0x11eb */
443 0x0000, /* 0x11ec */
444 0x0000, /* 0x11ed */
445 0x0000, /* 0x11ee */
446 0x0000, /* 0x11ef */
447 0x0000, /* 0x11f0 */
448 0x3182, /* 0x11f1 */
449 0x3183, /* 0x11f2 */
450 0x0000, /* 0x11f3 */
451 0x0000, /* 0x11f4 */
452 0x0000, /* 0x11f5 */
453 0x0000, /* 0x11f6 */
454 0x0000, /* 0x11f7 */
455 0x0000, /* 0x11f8 */
456 0x0000, /* 0x11f9 */
457 0x0000, /* 0x11fa */
458 0x0000, /* 0x11fb */
459 0x0000, /* 0x11fc */
460 0x0000, /* 0x11fd */
461 0x0000, /* 0x11fe */
462 0x0000, /* 0x11ff */
463 };
464
465 static unsigned short jamo_ext_A_table[] = {
466 0x0000, /* 0xa960 */
467 0x0000, /* 0xa961 */
468 0x0000, /* 0xa962 */
469 0x0000, /* 0xa963 */
470 0x313a, /* 0xa964 */
471 0x0000, /* 0xa965 */
472 0x316a, /* 0xa966 */
473 0x0000, /* 0xa967 */
474 0x313b, /* 0xa968 */
475 0x313c, /* 0xa969 */
476 0x0000, /* 0xa96a */
477 0x0000, /* 0xa96b */
478 0x313d, /* 0xa96c */
479 0x0000, /* 0xa96d */
480 0x0000, /* 0xa96e */
481 0x0000, /* 0xa96f */
482 0x0000, /* 0xa970 */
483 0x316f, /* 0xa971 */
484 0x0000, /* 0xa972 */
485 0x0000, /* 0xa973 */
486 0x0000, /* 0xa974 */
487 0x0000, /* 0xa975 */
488 0x0000, /* 0xa976 */
489 0x0000, /* 0xa977 */
490 0x0000, /* 0xa978 */
491 0x0000, /* 0xa979 */
492 0x0000, /* 0xa97a */
493 0x0000, /* 0xa97b */
494 0x0000, /* 0xa97c */
495 };
496
497 static unsigned short jamo_ext_B_table[] = {
498 0x0000, /* 0xd7b0 */
499 0x0000, /* 0xd7b1 */
500 0x0000, /* 0xd7b2 */
501 0x0000, /* 0xd7b3 */
502 0x0000, /* 0xd7b4 */
503 0x0000, /* 0xd7b5 */
504 0x0000, /* 0xd7b6 */
505 0x0000, /* 0xd7b7 */
506 0x0000, /* 0xd7b8 */
507 0x0000, /* 0xd7b9 */
508 0x0000, /* 0xd7ba */
509 0x0000, /* 0xd7bb */
510 0x0000, /* 0xd7bc */
511 0x0000, /* 0xd7bd */
512 0x0000, /* 0xd7be */
513 0x0000, /* 0xd7bf */
514 0x0000, /* 0xd7c0 */
515 0x0000, /* 0xd7c1 */
516 0x0000, /* 0xd7c2 */
517 0x0000, /* 0xd7c3 */
518 0x0000, /* 0xd7c4 */
519 0x0000, /* 0xd7c5 */
520 0x0000, /* 0xd7c6 */
521 0x0000, /* 0xd7c7 */
522 0x0000, /* 0xd7c8 */
523 0x0000, /* 0xd7c9 */
524 0x0000, /* 0xd7ca */
525 0x0000, /* 0xd7cb */
526 0x0000, /* 0xd7cc */
527 0x3138, /* 0xd7cd */
528 0x0000, /* 0xd7ce */
529 0x0000, /* 0xd7cf */
530 0x0000, /* 0xd7d0 */
531 0x0000, /* 0xd7d1 */
532 0x0000, /* 0xd7d2 */
533 0x0000, /* 0xd7d3 */
534 0x0000, /* 0xd7d4 */
535 0x0000, /* 0xd7d5 */
536 0x0000, /* 0xd7d6 */
537 0x0000, /* 0xd7d7 */
538 0x0000, /* 0xd7d8 */
539 0x0000, /* 0xd7d9 */
540 0x0000, /* 0xd7da */
541 0x0000, /* 0xd7db */
542 0x0000, /* 0xd7dc */
543 0x0000, /* 0xd7dd */
544 0x0000, /* 0xd7de */
545 0x0000, /* 0xd7df */
546 0x0000, /* 0xd7e0 */
547 0x0000, /* 0xd7e1 */
548 0x0000, /* 0xd7e2 */
549 0x3173, /* 0xd7e3 */
550 0x0000, /* 0xd7e4 */
551 0x0000, /* 0xd7e5 */
552 0x3143, /* 0xd7e6 */
553 0x3175, /* 0xd7e7 */
554 0x3176, /* 0xd7e8 */
555 0x0000, /* 0xd7e9 */
556 0x0000, /* 0xd7ea */
557 0x0000, /* 0xd7eb */
558 0x0000, /* 0xd7ec */
559 0x0000, /* 0xd7ed */
560 0x0000, /* 0xd7ee */
561 0x317e, /* 0xd7ef */
562 0x0000, /* 0xd7f0 */
563 0x0000, /* 0xd7f1 */
564 0x0000, /* 0xd7f2 */
565 0x0000, /* 0xd7f3 */
566 0x0000, /* 0xd7f4 */
567 0x0000, /* 0xd7f5 */
568 0x0000, /* 0xd7f6 */
569 0x0000, /* 0xd7f7 */
570 0x0000, /* 0xd7f8 */
571 0x3149, /* 0xd7f9 */
572 0x0000, /* 0xd7fa */
573 0x0000, /* 0xd7fb */
574 };
575
576 ucschar ret = 0;
577
578 if (c >= 0x1100 && c <= 0x11ff) {
579 ret = jamo_table[c - 0x1100];
580 } else if (c >= 0xa960 && c <= 0xa97c) {
581 ret = jamo_ext_A_table[c - 0xa960];
582 } else if (c >= 0xd7b0 && c <= 0xd7fb) {
583 ret = jamo_ext_B_table[c - 0xd7b0];
584 }
585
586 if (ret == 0)
587 ret = c;
588
589 return ret;
590 }
591
592 ucschar
hangul_choseong_to_jongseong(ucschar c)593 hangul_choseong_to_jongseong(ucschar c)
594 {
595 static const ucschar table[] = {
596 0x11a8, /* cho kiyeok -> jong kiyeok */
597 0x11a9, /* cho ssangkiyeok -> jong ssangkiyeok */
598 0x11ab, /* cho nieun -> jong nieun */
599 0x11ae, /* cho tikeut -> jong tikeut */
600 0xd7cd, /* cho ssangtikeut -> jong ssangtikeut */
601 0x11af, /* cho rieul -> jong rieul */
602 0x11b7, /* cho mieum -> jong mieum */
603 0x11b8, /* cho pieup -> jong pieup */
604 0xd7e6, /* cho ssangpieup -> jong ssangpieup */
605 0x11ba, /* cho sios -> jong sios */
606 0x11bb, /* cho ssangsios -> jong ssangsios */
607 0x11bc, /* cho ieung -> jong ieung */
608 0x11bd, /* cho cieuc -> jong cieuc */
609 0xd7f9, /* cho ssangcieuc -> jong ssangcieuc */
610 0x11be, /* cho chieuch -> jong chieuch */
611 0x11bf, /* cho khieukh -> jong khieukh */
612 0x11c0, /* cho thieuth -> jong thieuth */
613 0x11c1, /* cho phieuph -> jong phieuph */
614 0x11c2, /* cho hieuh -> jong hieuh */
615 0x11c5, /* cho nieun-kiyeok -> jong nieun-kiyeok */
616 0x11ff, /* cho ssangnieun -> jong ssangnieun */
617 0x11c6, /* cho nieun-tikeut -> jong nieun-tikeut */
618 0, /* cho nieun-pieup */
619 0x11ca, /* cho tikeut-kiyeok -> jong tikeut-kiyeok */
620 0x11cd, /* cho rieul-nieun -> jong rieul-nieun */
621 0x11d0, /* cho ssangrieul -> jong ssangrieul */
622 0x11b6, /* cho rieul-hieuh -> jong rieul-hieuh */
623 0xd7dd, /* cho kapyeounrieul -> jong kapyeounrieul */
624 0x11dc, /* cho mieum-pieup -> jong mieum-pieup */
625 0x11e2, /* cho kapyeounmieum -> jong kapyeounmieum */
626 0, /* cho pieup-kiyeok */
627 0, /* cho pieup-nieun */
628 0xd7e3, /* cho pieup-tikeut -> jong pieup-tikeut */
629 0x11b9, /* cho pieup-sios -> jong pieup-sios */
630 0, /* cho pieup-sios-kiyeok */
631 0xd7e7, /* cho pieup-sios-tikeut -> jong pieup-sios-tikeut */
632 0, /* cho pieup-sios-pieup */
633 0, /* cho pieup-ssangsios */
634 0, /* cho pieup-sios-cieuc */
635 0xd7e8, /* cho pieup-cieuc -> jong pieup-cieuc */
636 0xd7e9, /* cho pieup-chieuch -> jong pieup-chieuch */
637 0, /* cho pieup-thieuth */
638 0x11e4, /* cho pieup-phieuph -> jong pieup-phieuph */
639 0x11e6, /* cho kapyeounpieup -> jong kapyeounpieup */
640 0, /* cho kapyeounssangpieup */
641 0x11e7, /* cho sios-kiyeok -> jong sios-kiyeok */
642 0, /* cho sios-nieun */
643 0x11e8, /* cho sios-tikeut -> jong sios-tikeut */
644 0x11e9, /* cho sios-rieul -> jong sios-rieul */
645 0xd7ea, /* cho sios-mieum -> jong sios-mieum */
646 0x11ea, /* cho sios-pieup -> jong sios-pieup */
647 0, /* cho sios-pieup-kiyeok */
648 0, /* cho sios-ssangsios */
649 0, /* cho sios-ieung */
650 0xd7ef, /* cho sios-cieuc -> jong sios-cieuc */
651 0xd7f0, /* cho sios-chieuch -> jong sios-chieuch */
652 0, /* cho sios-khieukh */
653 0xd7f1, /* cho sios-thieuth -> jong sios-thieuth */
654 0, /* cho sios-phieuph */
655 0xd7f2, /* cho sios-hieuh -> jong sios-hieuh */
656 0, /* cho chitueumsios */
657 0, /* cho chitueumssangsios */
658 0, /* cho ceongchieumsios */
659 0, /* cho ceongchieumssangsios */
660 0x11eb, /* cho pansios -> jong pansios */
661 0x11ec, /* cho ieung-kiyeok -> jong ieung-kiyeok */
662 0, /* cho ieung-tikeut */
663 0, /* cho ieung-mieum */
664 0, /* cho ieung-pieup */
665 0, /* cho ieung-sios */
666 0, /* cho ieung-pansios */
667 0x11ee, /* cho ssangieung -> jong ssangieung */
668 0, /* cho ieung-cieuc */
669 0, /* cho ieung-chieuch */
670 0, /* cho ieung-thieuth */
671 0, /* cho ieung-phieuph */
672 0x11f0, /* cho yesieung -> jong yesieung */
673 0, /* cho cieuc-ieung */
674 0, /* cho chitueumcieuc */
675 0, /* cho chitueumssangcieuc */
676 0, /* cho ceongchieumcieuc */
677 0, /* cho ceongchieumssangcieuc */
678 0, /* cho chieuch-khieukh */
679 0, /* cho chieuch-hieuh */
680 0, /* cho chitueumchieuch */
681 0, /* cho ceongchieumchieuch */
682 0x11f3, /* cho phieuph-pieup -> jong phieuph-pieup */
683 0x11f4, /* cho kapyeounphieuph -> jong kapyeounphieuph */
684 0, /* cho ssanghieuh */
685 0x11f9, /* cho yeorinhieuh -> jong yeorinhieuh */
686 0, /* cho kiyeok-tikeut */
687 0x11c7, /* cho nieun-sios -> jong nieun-sios */
688 0x11ac, /* cho nieun-cieuc -> jong nieun-cieuc */
689 0x11ad, /* cho nieun-hieuh -> jong nieun-hieuh */
690 0x11cb, /* cho tikeut-rieul -> jong tikeut-rieul */
691 0, /* cho filler */
692 };
693
694 static const ucschar table_ext_a[] = {
695 0, /* cho tikeut-mieum */
696 0xd7cf, /* cho tikeut-pieup -> jong tikeut-pieup */
697 0xd7d0, /* cho tikeut-sios -> jong tikeut-sios */
698 0xd7d2, /* cho tikeut-cieuc -> jong tikeut-cieuc */
699 0x11b0, /* cho rieul-kiyeok -> jong rieul-kiyeok */
700 0xd7d5, /* cho rieul-ssangkiyeok -> jong rieul-ssangkiyeok */
701 0x11ce, /* cho rieul-tikeut -> jong rieul-tikeut */
702 0, /* cho rieul-ssangtikeut */
703 0x11b1, /* cho rieul-mieum -> jong rieul-mieum */
704 0x11b2, /* cho rieul-pieup -> jong rieul-pieup */
705 0, /* cho rieul-ssangpieup */
706 0x11d5, /* cho rieul-kapyeounpieup -> jong rieul-kapyeounpieup */
707 0x11b3, /* cho rieul-sios -> jong rieul-sios */
708 0, /* cho rieul-cieuc */
709 0x11d8, /* cho rieul-khieukh -> jong rieul-khieukh */
710 0x11da, /* cho mieum-kiyeok -> jong mieum-kiyeok */
711 0, /* cho mieum-tikeut */
712 0x11dd, /* cho mieum-sios -> jong mieum-sios */
713 0, /* cho pieup-sios-thieuth */
714 0, /* cho pieup-khieukh */
715 0x11e5, /* cho pieup-hieuh -> jong pieup-hieuh */
716 0, /* cho ssangsios-pieup */
717 0, /* cho ieung-rieul */
718 0, /* cho ieung-hieuh */
719 0, /* cho ssangcieuc-hieuh */
720 0, /* cho ssangthieuth */
721 0, /* cho phieuph-hieuh */
722 0, /* cho hieuh-sios */
723 0, /* cho ssangyeorinhieuh */
724 };
725
726 if (c >= 0x1100 && c <= 0x115e)
727 return table[c - 0x1100];
728 else if (c >= 0xa960 && c <= 0xa97c)
729 return table_ext_a[c - 0xa960];
730
731 return 0;
732 }
733
734 ucschar
hangul_jongseong_to_choseong(ucschar c)735 hangul_jongseong_to_choseong(ucschar c)
736 {
737 static const ucschar table[] = {
738 0x1100, /* jong kiyeok -> cho kiyeok */
739 0x1101, /* jong ssangkiyeok -> cho ssangkiyeok */
740 0, /* jong kiyeok-sios */
741 0x1102, /* jong nieun -> cho nieun */
742 0x115c, /* jong nieun-cieuc -> cho nieun-cieuc */
743 0x115d, /* jong nieun-hieuh -> cho nieun-hieuh */
744 0x1103, /* jong tikeut -> cho tikeut */
745 0x1105, /* jong rieul -> cho rieul */
746 0xa964, /* jong rieul-kiyeok -> cho rieul-kiyeok */
747 0xa968, /* jong rieul-mieum -> cho rieul-mieum */
748 0xa969, /* jong rieul-pieup -> cho rieul-pieup */
749 0xa96c, /* jong rieul-sios -> cho rieul-sios */
750 0, /* jong rieul-thieuth */
751 0, /* jong rieul-phieuph */
752 0x111a, /* jong rieul-hieuh -> cho rieul-hieuh */
753 0x1106, /* jong mieum -> cho mieum */
754 0x1107, /* jong pieup -> cho pieup */
755 0x1121, /* jong pieup-sios -> cho pieup-sios */
756 0x1109, /* jong sios -> cho sios */
757 0x110a, /* jong ssangsios -> cho ssangsios */
758 0x110b, /* jong ieung -> cho ieung */
759 0x110c, /* jong cieuc -> cho cieuc */
760 0x110e, /* jong chieuch -> cho chieuch */
761 0x110f, /* jong khieukh -> cho khieukh */
762 0x1110, /* jong thieuth -> cho thieuth */
763 0x1111, /* jong phieuph -> cho phieuph */
764 0x1112, /* jong hieuh -> cho hieuh */
765 0, /* jong kiyeok-rieul */
766 0, /* jong kiyeok-sios-kiyeok */
767 0x1113, /* jong nieun-kiyeok -> cho nieun-kiyeok */
768 0x1115, /* jong nieun-tikeut -> cho nieun-tikeut */
769 0x115b, /* jong nieun-sios -> cho nieun-sios */
770 0, /* jong nieun-pansios */
771 0, /* jong nieun-thieuth */
772 0x1117, /* jong tikeut-kiyeok -> cho tikeut-kiyeok */
773 0x115e, /* jong tikeut-rieul -> cho tikeut-rieul */
774 0, /* jong rieul-kiyeok-sios */
775 0x1118, /* jong rieul-nieun -> cho rieul-nieun */
776 0xa966, /* jong rieul-tikeut -> cho rieul-tikeut */
777 0, /* jong rieul-tikeut-hieuh */
778 0x1119, /* jong ssangrieul -> cho ssangrieul */
779 0, /* jong rieul-mieum-kiyeok */
780 0, /* jong rieul-mieum-sios */
781 0, /* jong rieul-pieup-sios */
782 0, /* jong rieul-pieup-hieuh */
783 0xa96b, /* jong rieul-kapyeounpieup -> cho rieul-kapyeounpieup */
784 0, /* jong rieul-ssangsios */
785 0, /* jong rieul-pansios */
786 0xa96e, /* jong rieul-khieukh -> cho rieul-khieukh */
787 0, /* jong rieul-yeorinhieuh */
788 0xa96f, /* jong mieum-kiyeok -> cho mieum-kiyeok */
789 0, /* jong mieum-rieul */
790 0x111c, /* jong mieum-pieup -> cho mieum-pieup */
791 0xa971, /* jong mieum-sios -> cho mieum-sios */
792 0, /* jong mieum-ssangsios */
793 0, /* jong mieum-pansios */
794 0, /* jong mieum-chieuch */
795 0, /* jong mieum-hieuh */
796 0x111d, /* jong kapyeounmieum -> cho kapyeounmieum */
797 0, /* jong pieup-rieul */
798 0x112a, /* jong pieup-phieuph -> cho pieup-phieuph */
799 0xa974, /* jong pieup-hieuh -> cho pieup-hieuh */
800 0x112b, /* jong kapyeounpieup -> cho kapyeounpieup */
801 0x112d, /* jong sios-kiyeok -> cho sios-kiyeok */
802 0x112f, /* jong sios-tikeut -> cho sios-tikeut */
803 0x1130, /* jong sios-rieul -> cho sios-rieul */
804 0x1132, /* jong sios-pieup -> cho sios-pieup */
805 0x1140, /* jong pansios -> cho pansios */
806 0x1141, /* jong ieung-kiyeok -> cho ieung-kiyeok */
807 0, /* jong ieung-ssangkiyeok */
808 0x1147, /* jong ssangieung -> cho ssangieung */
809 0, /* jong ieung-khieukh */
810 0x114c, /* jong yesieung -> cho yesieung */
811 0, /* jong yesieung-sios */
812 0, /* jong yesieung-pansios */
813 0x1156, /* jong phieuph-pieup -> cho phieuph-pieup */
814 0x1157, /* jong kapyeounphieuph -> cho kapyeounphieuph */
815 0, /* jong hieuh-nieun */
816 0, /* jong hieuh-rieul */
817 0, /* jong hieuh-mieum */
818 0, /* jong hieuh-pieup */
819 0x1159, /* jong yeorinhieuh -> cho yeorinhieuh */
820 0, /* jong kiyeok-nieun */
821 0, /* jong kiyeok-pieup */
822 0, /* jong kiyeok-chieuch */
823 0, /* jong kiyeok-khieukh */
824 0, /* jong kiyeok-hieuh */
825 0x1114, /* jong ssangnieun -> cho ssangnieun */
826 };
827
828 static const ucschar table_ext_b[] = {
829 0, /* jong nieun-rieul */
830 0, /* jong nieun-chieuch */
831 0x1104, /* jong ssangtikeut -> cho ssangtikeut */
832 0, /* jong ssangtikeut-pieup */
833 0xa961, /* jong tikeut-pieup -> cho tikeut-pieup */
834 0xa962, /* jong tikeut-sios -> cho tikeut-sios */
835 0, /* jong tikeut-sios-kiyeok */
836 0xa963, /* jong tikeut-cieuc -> cho tikeut-cieuc */
837 0, /* jong tikeut-chieuch */
838 0, /* jong tikeut-thieuth */
839 0xa965, /* jong rieul-ssangkiyeok -> cho rieul-ssangkiyeok */
840 0, /* jong rieul-kiyeok-hieuh */
841 0, /* jong ssangrieul-khieukh */
842 0, /* jong rieul-mieum-hieuh */
843 0, /* jong rieul-pieup-tikeut */
844 0, /* jong rieul-pieup-phieuph */
845 0, /* jong rieul-yesieung */
846 0, /* jong rieul-yeorinhieuh-hieuh */
847 0x111b, /* jong kapyeounrieul -> cho kapyeounrieul */
848 0, /* jong mieum-nieun */
849 0, /* jong mieum-ssangnieun */
850 0, /* jong ssangmieum */
851 0, /* jong mieum-pieup-sios */
852 0, /* jong mieum-cieuc */
853 0x1120, /* jong pieup-tikeut -> cho pieup-tikeut */
854 0, /* jong pieup-rieul-phieuph */
855 0, /* jong pieup-mieum */
856 0x1108, /* jong ssangpieup -> cho ssangpieup */
857 0x1123, /* jong pieup-sios-tikeut -> cho pieup-sios-tikeut */
858 0x1127, /* jong pieup-cieuc -> cho pieup-cieuc */
859 0x1128, /* jong pieup-chieuch -> cho pieup-chieuch */
860 0x1131, /* jong sios-mieum -> cho sios-mieum */
861 0, /* jong sios-kapyeounpieup */
862 0, /* jong ssangsios-kiyeok */
863 0, /* jong ssangsios-tikeut */
864 0, /* jong sios-pansios */
865 0x1136, /* jong sios-cieuc -> cho sios-cieuc */
866 0x1137, /* jong sios-chieuch -> cho sios-chieuch */
867 0x1139, /* jong sios-thieuth -> cho sios-thieuth */
868 0x113b, /* jong sios-hieuh -> cho sios-hieuh */
869 0, /* jong pansios-pieup */
870 0, /* jong pansios-kapyeounpieup */
871 0, /* jong yesieung-mieum */
872 0, /* jong yesieung-hieuh */
873 0, /* jong cieuc-pieup */
874 0, /* jong cieuc-ssangpieup */
875 0x110d, /* jong ssangcieuc -> cho ssangcieuc */
876 0, /* jong phieuph-sios */
877 0, /* jong phieuph-thieuth */
878 };
879
880 if (c >= 0x11a8 && c <= 0x11ff)
881 return table[c - 0x11a8];
882 else if (c >= 0xd7cb && c <= 0xd7fb)
883 return table_ext_b[c - 0xd7cb];
884
885 return 0;
886 }
887
888 void
hangul_jongseong_dicompose(ucschar c,ucschar * jong,ucschar * cho)889 hangul_jongseong_dicompose(ucschar c, ucschar* jong, ucschar* cho)
890 {
891 static ucschar table[][2] = {
892 { 0, 0x1100 }, /* jong kiyeok = cho kiyeok */
893 { 0x11a8, 0x1100 }, /* jong ssangkiyeok = jong kiyeok + cho kiyeok */
894 { 0x11a8, 0x1109 }, /* jong kiyeok-sios = jong kiyeok + cho sios */
895 { 0, 0x1102 }, /* jong nieun = cho nieun */
896 { 0x11ab, 0x110c }, /* jong nieun-cieuc = jong nieun + cho cieuc */
897 { 0x11ab, 0x1112 }, /* jong nieun-hieuh = jong nieun + cho hieuh */
898 { 0, 0x1103 }, /* jong tikeut = cho tikeut */
899 { 0, 0x1105 }, /* jong rieul = cho rieul */
900 { 0x11af, 0x1100 }, /* jong rieul-kiyeok = jong rieul + cho kiyeok */
901 { 0x11af, 0x1106 }, /* jong rieul-mieum = jong rieul + cho mieum */
902 { 0x11af, 0x1107 }, /* jong rieul-pieup = jong rieul + cho pieup */
903 { 0x11af, 0x1109 }, /* jong rieul-sios = jong rieul + cho sios */
904 { 0x11af, 0x1110 }, /* jong rieul-thieuth = jong rieul + cho thieuth */
905 { 0x11af, 0x1111 }, /* jong rieul-phieuph = jong rieul + cho phieuph */
906 { 0x11af, 0x1112 }, /* jong rieul-hieuh = jong rieul + cho hieuh */
907 { 0, 0x1106 }, /* jong mieum = cho mieum */
908 { 0, 0x1107 }, /* jong pieup = cho pieup */
909 { 0x11b8, 0x1109 }, /* jong pieup-sios = jong pieup + cho sios */
910 { 0, 0x1109 }, /* jong sios = cho sios */
911 { 0x11ba, 0x1109 }, /* jong ssangsios = jong sios + cho sios */
912 { 0, 0x110b }, /* jong ieung = cho ieung */
913 { 0, 0x110c }, /* jong cieuc = cho cieuc */
914 { 0, 0x110e }, /* jong chieuch = cho chieuch */
915 { 0, 0x110f }, /* jong khieukh = cho khieukh */
916 { 0, 0x1110 }, /* jong thieuth = cho thieuth */
917 { 0, 0x1111 }, /* jong phieuph = cho phieuph */
918 { 0, 0x1112 } /* jong hieuh = cho hieuh */
919 };
920
921 *jong = table[c - 0x11a8][0];
922 *cho = table[c - 0x11a8][1];
923 }
924
925 static int
hangul_jongseong_get_ncomponent(ucschar jong)926 hangul_jongseong_get_ncomponent(ucschar jong)
927 {
928 static const char table[] = {
929 1, /* kiyeok */
930 2, /* ssangkiyeok */
931 2, /* kiyeok-sios */
932 1, /* nieun */
933 2, /* nieun-cieuc */
934 2, /* nieun-hieuh */
935 1, /* tikeut */
936 1, /* rieul */
937 2, /* rieul-kiyeok */
938 2, /* rieul-mieum */
939 2, /* rieul-pieup */
940 2, /* rieul-sios */
941 2, /* rieul-thieuth */
942 2, /* rieul-phieuph */
943 2, /* rieul-hieuh */
944 1, /* mieum */
945 1, /* pieup */
946 2, /* pieup-sios */
947 1, /* sios */
948 2, /* ssangsios */
949 1, /* ieung */
950 1, /* cieuc */
951 1, /* chieuch */
952 1, /* khieukh */
953 1, /* thieuth */
954 1, /* phieuph */
955 1, /* hieuh */
956 2, /* kiyeok-rieul */
957 3, /* kiyeok-sios-kiyeok */
958 2, /* nieun-kiyeok */
959 2, /* nieun-tikeut */
960 2, /* nieun-sios */
961 2, /* nieun-pansios */
962 2, /* nieun-thieuth */
963 2, /* tikeut-kiyeok */
964 2, /* tikeut-rieul */
965 3, /* rieul-kiyeok-sios */
966 2, /* rieul-nieun */
967 2, /* rieul-tikeut */
968 3, /* rieul-tikeut-hieuh */
969 2, /* ssangrieul */
970 3, /* rieul-mieum-kiyeok */
971 3, /* rieul-mieum-sios */
972 3, /* rieul-pieup-sios */
973 3, /* rieul-pieup-hieuh */
974 3, /* rieul-kapyeounpieup */
975 3, /* rieul-ssangsios */
976 2, /* rieul-pansios */
977 2, /* rieul-khieukh */
978 2, /* rieul-yeorinhieuh */
979 2, /* mieum-kiyeok */
980 2, /* mieum-rieul */
981 2, /* mieum-pieup */
982 2, /* mieum-sios */
983 3, /* mieum-ssangsios */
984 2, /* mieum-pansios */
985 2, /* mieum-chieuch */
986 2, /* mieum-hieuh */
987 2, /* kapyeounmieum */
988 2, /* pieup-rieul */
989 2, /* pieup-phieuph */
990 2, /* pieup-hieuh */
991 2, /* kapyeounpieup */
992 2, /* sios-kiyeok */
993 2, /* sios-tikeut */
994 2, /* sios-rieul */
995 2, /* sios-pieup */
996 1, /* pansios */
997 2, /* ieung-kiyeok */
998 3, /* ieung-ssangkiyeok */
999 2, /* ssangieung */
1000 2, /* ieung-khieukh */
1001 1, /* yesieung */
1002 2, /* yesieung-sios */
1003 2, /* yesieung-pansios */
1004 2, /* phieuph-pieup */
1005 2, /* kapyeounphieuph */
1006 2, /* hieuh-nieun */
1007 2, /* hieuh-rieul */
1008 2, /* hieuh-mieum */
1009 2, /* hieuh-pieup */
1010 1, /* yeorinhieuh */
1011 2, /* kiyeok-nieun */
1012 2, /* kiyeok-pieup */
1013 2, /* kiyeok-chieuch */
1014 2, /* kiyeok-khieukh */
1015 2, /* kiyeok-hieuh */
1016 2, /* ssangnieun */
1017 };
1018
1019 static const char table_ext_b[] = {
1020 2, /* nieun-rieul */
1021 2, /* nieun-chieuch */
1022 2, /* ssangtikeut */
1023 3, /* ssangtikeut-pieup */
1024 2, /* tikeut-pieup */
1025 2, /* tikeut-sios */
1026 3, /* tikeut-sios-kiyeok */
1027 2, /* tikeut-cieuc */
1028 2, /* tikeut-chieuch */
1029 2, /* tikeut-thieuth */
1030 3, /* rieul-ssangkiyeok */
1031 3, /* rieul-kiyeok-hieuh */
1032 3, /* ssangrieul-khieukh */
1033 3, /* rieul-mieum-hieuh */
1034 3, /* rieul-pieup-tikeut */
1035 3, /* rieul-pieup-phieuph */
1036 2, /* rieul-yesieung */
1037 3, /* rieul-yeorinhieuh-hieuh */
1038 2, /* kapyeounrieul */
1039 2, /* mieum-nieun */
1040 3, /* mieum-ssangnieun */
1041 2, /* ssangmieum */
1042 3, /* mieum-pieup-sios */
1043 2, /* mieum-cieuc */
1044 2, /* pieup-tikeut */
1045 3, /* pieup-rieul-phieuph */
1046 2, /* pieup-mieum */
1047 2, /* ssangpieup */
1048 3, /* pieup-sios-tikeut */
1049 2, /* pieup-cieuc */
1050 2, /* pieup-chieuch */
1051 2, /* sios-mieum */
1052 3, /* sios-kapyeounpieup */
1053 3, /* ssangsios-kiyeok */
1054 3, /* ssangsios-tikeut */
1055 2, /* sios-pansios */
1056 2, /* sios-cieuc */
1057 2, /* sios-chieuch */
1058 2, /* sios-thieuth */
1059 2, /* sios-hieuh */
1060 2, /* pansios-pieup */
1061 3, /* pansios-kapyeounpieup */
1062 2, /* yesieung-mieum */
1063 2, /* yesieung-hieuh */
1064 2, /* cieuc-pieup */
1065 3, /* cieuc-ssangpieup */
1066 2, /* ssangcieuc */
1067 2, /* phieuph-sios */
1068 2, /* phieuph-thieuth */
1069 };
1070
1071 if (jong >= 0x11a8 && jong <= 0x11ff) {
1072 return table[jong - 0x11a8];
1073 } else if (jong >= 0xd7cb && jong <= 0xd7fb) {
1074 return table_ext_b[jong - 0xd7cb];
1075 }
1076
1077 return 0;
1078 }
1079
1080 ucschar
hangul_jongseong_get_diff(ucschar prevjong,ucschar jong)1081 hangul_jongseong_get_diff(ucschar prevjong, ucschar jong)
1082 {
1083 static const ucschar table[][2] = {
1084 { 0x1100, 0x1100 }, /* kiyeok: kiyeok, kiyeok */
1085 { 0x1100, 0x1101 }, /* ssangkiyeok: kiyeok, ssangkiyeok */
1086 { 0x1109, 0 }, /* kiyeok-sios: sios */
1087 { 0x1102, 0x1102 }, /* nieun: nieun, nieun */
1088 { 0x110c, 0x115c }, /* nieun-cieuc: cieuc, nieun-cieuc */
1089 { 0x1112, 0x115d }, /* nieun-hieuh: hieuh, nieun-hieuh */
1090 { 0x1103, 0x1103 }, /* tikeut: tikeut, tikeut */
1091 { 0x1105, 0x1105 }, /* rieul: rieul, rieul */
1092 { 0x1100, 0xa964 }, /* rieul-kiyeok: kiyeok, rieul-kiyeok */
1093 { 0x1106, 0xa968 }, /* rieul-mieum: mieum, rieul-mieum */
1094 { 0x1107, 0xa969 }, /* rieul-pieup: pieup, rieul-pieup */
1095 { 0x1109, 0xa96c }, /* rieul-sios: sios, rieul-sios */
1096 { 0x1110, 0 }, /* rieul-thieuth: thieuth */
1097 { 0x1111, 0 }, /* rieul-phieuph: phieuph */
1098 { 0x1112, 0x111a }, /* rieul-hieuh: hieuh, rieul-hieuh */
1099 { 0x1106, 0x1106 }, /* mieum: mieum, mieum */
1100 { 0x1107, 0x1107 }, /* pieup: pieup, pieup */
1101 { 0x1109, 0x1121 }, /* pieup-sios: sios, pieup-sios */
1102 { 0x1109, 0x1109 }, /* sios: sios, sios */
1103 { 0x1109, 0x110a }, /* ssangsios: sios, ssangsios */
1104 { 0x110b, 0x110b }, /* ieung: ieung, ieung */
1105 { 0x110c, 0x110c }, /* cieuc: cieuc, cieuc */
1106 { 0x110e, 0x110e }, /* chieuch: chieuch, chieuch */
1107 { 0x110f, 0x110f }, /* khieukh: khieukh, khieukh */
1108 { 0x1110, 0x1110 }, /* thieuth: thieuth, thieuth */
1109 { 0x1111, 0x1111 }, /* phieuph: phieuph, phieuph */
1110 { 0x1112, 0x1112 }, /* hieuh: hieuh, hieuh */
1111 { 0x1105, 0 }, /* kiyeok-rieul: rieul */
1112 { 0x1100, 0x112d }, /* kiyeok-sios-kiyeok: kiyeok, sios-kiyeok */
1113 { 0x1100, 0x1113 }, /* nieun-kiyeok: kiyeok, nieun-kiyeok */
1114 { 0x1103, 0x1115 }, /* nieun-tikeut: tikeut, nieun-tikeut */
1115 { 0x1109, 0x115b }, /* nieun-sios: sios, nieun-sios */
1116 { 0x1140, 0 }, /* nieun-pansios: pansios */
1117 { 0x1110, 0 }, /* nieun-thieuth: thieuth */
1118 { 0x1100, 0x1117 }, /* tikeut-kiyeok: kiyeok, tikeut-kiyeok */
1119 { 0x1105, 0x115e }, /* tikeut-rieul: rieul, tikeut-rieul */
1120 { 0x1109, 0 }, /* rieul-kiyeok-sios: sios */
1121 { 0x1102, 0x1118 }, /* rieul-nieun: nieun, rieul-nieun */
1122 { 0x1103, 0xa966 }, /* rieul-tikeut: tikeut, rieul-tikeut */
1123 { 0x1112, 0 }, /* rieul-tikeut-hieuh: hieuh */
1124 { 0x1105, 0x1119 }, /* ssangrieul: rieul, ssangrieul */
1125 { 0x1100, 0xa96f }, /* rieul-mieum-kiyeok: kiyeok, mieum-kiyeok */
1126 { 0x1109, 0xa971 }, /* rieul-mieum-sios: sios, mieum-sios */
1127 { 0x1109, 0x1121 }, /* rieul-pieup-sios: sios, pieup-sios */
1128 { 0x1112, 0xa974 }, /* rieul-pieup-hieuh: hieuh, pieup-hieuh */
1129 { 0x110b, 0x112b }, /* rieul-kapyeounpieup: ieung, kapyeounpieup */
1130 { 0x1109, 0x110a }, /* rieul-ssangsios: sios, ssangsios */
1131 { 0x1140, 0 }, /* rieul-pansios: pansios */
1132 { 0x110f, 0xa96e }, /* rieul-khieukh: khieukh, rieul-khieukh */
1133 { 0x1159, 0 }, /* rieul-yeorinhieuh: yeorinhieuh */
1134 { 0x1100, 0xa96f }, /* mieum-kiyeok: kiyeok, mieum-kiyeok */
1135 { 0x1105, 0 }, /* mieum-rieul: rieul */
1136 { 0x1107, 0x111c }, /* mieum-pieup: pieup, mieum-pieup */
1137 { 0x1109, 0xa971 }, /* mieum-sios: sios, mieum-sios */
1138 { 0x1109, 0x110a }, /* mieum-ssangsios: sios, ssangsios */
1139 { 0x1140, 0 }, /* mieum-pansios: pansios */
1140 { 0x110e, 0 }, /* mieum-chieuch: chieuch */
1141 { 0x1112, 0 }, /* mieum-hieuh: hieuh */
1142 { 0x110b, 0x111d }, /* kapyeounmieum: ieung, kapyeounmieum */
1143 { 0x1105, 0 }, /* pieup-rieul: rieul */
1144 { 0x1111, 0x112a }, /* pieup-phieuph: phieuph, pieup-phieuph */
1145 { 0x1112, 0xa974 }, /* pieup-hieuh: hieuh, pieup-hieuh */
1146 { 0x110b, 0x112b }, /* kapyeounpieup: ieung, kapyeounpieup */
1147 { 0x1100, 0x112d }, /* sios-kiyeok: kiyeok, sios-kiyeok */
1148 { 0x1103, 0x112f }, /* sios-tikeut: tikeut, sios-tikeut */
1149 { 0x1105, 0x1130 }, /* sios-rieul: rieul, sios-rieul */
1150 { 0x1107, 0x1132 }, /* sios-pieup: pieup, sios-pieup */
1151 { 0x1140, 0x1140 }, /* pansios: pansios, pansios */
1152 { 0x1100, 0 }, /* yesieung-kiyeok: kiyeok */
1153 { 0x1100, 0x1101 }, /* yesieung-ssangkiyeok: kiyeok, ssangkiyeok */
1154 { 0x114c, 0 }, /* ssangyesieung: yesieung */
1155 { 0x110f, 0 }, /* yesieung-khieukh: khieukh */
1156 { 0x114c, 0x114c }, /* yesieung: yesieung, yesieung */
1157 { 0x1109, 0 }, /* yesieung-sios: sios */
1158 { 0x1140, 0 }, /* yesieung-pansios: pansios */
1159 { 0x1107, 0x1156 }, /* phieuph-pieup: pieup, phieuph-pieup */
1160 { 0x110b, 0x1157 }, /* kapyeounphieuph: ieung, kapyeounphieuph */
1161 { 0x1102, 0 }, /* hieuh-nieun: nieun */
1162 { 0x1105, 0 }, /* hieuh-rieul: rieul */
1163 { 0x1106, 0 }, /* hieuh-mieum: mieum */
1164 { 0x1107, 0 }, /* hieuh-pieup: pieup */
1165 { 0x1159, 0x1159 }, /* yeorinhieuh: yeorinhieuh, yeorinhieuh */
1166 { 0x1102, 0 }, /* kiyeok-nieun: nieun */
1167 { 0x1107, 0 }, /* kiyeok-pieup: pieup */
1168 { 0x110e, 0 }, /* kiyeok-chieuch: chieuch */
1169 { 0x110f, 0 }, /* kiyeok-khieukh: khieukh */
1170 { 0x1112, 0 }, /* kiyeok-hieuh: hieuh */
1171 { 0x1102, 0x1114 }, /* ssangnieun: nieun, ssangnieun */
1172 };
1173
1174 static const ucschar table_ext_b[][2] = {
1175 { 0x1105, 0 }, /* nieun-rieul: rieul */
1176 { 0x110e, 0 }, /* nieun-chieuch: chieuch */
1177 { 0x1103, 0x1104 }, /* ssangtikeut: tikeut, ssangtikeut */
1178 { 0x1107, 0xa961 }, /* ssangtikeut-pieup: pieup, tikeut-pieup */
1179 { 0x1107, 0xa961 }, /* tikeut-pieup: pieup, tikeut-pieup */
1180 { 0x1109, 0xa962 }, /* tikeut-sios: sios, tikeut-sios */
1181 { 0x1100, 0x112d }, /* tikeut-sios-kiyeok: kiyeok, sios-kiyeok */
1182 { 0x110c, 0xa963 }, /* tikeut-cieuc: cieuc, tikeut-cieuc */
1183 { 0x110e, 0 }, /* tikeut-chieuch: chieuch */
1184 { 0x1110, 0 }, /* tikeut-thieuth: thieuth */
1185 { 0x1100, 0x1101 }, /* rieul-ssangkiyeok: kiyeok, ssangkiyeok */
1186 { 0x1112, 0 }, /* rieul-kiyeok-hieuh: hieuh */
1187 { 0x110f, 0xa96e }, /* ssangrieul-khieukh: khieukh, rieul-khieukh */
1188 { 0x1112, 0 }, /* rieul-mieum-hieuh: hieuh */
1189 { 0x1103, 0x1120 }, /* rieul-pieup-tikeut: tikeut, pieup-tikeut */
1190 { 0x1111, 0x112a }, /* rieul-pieup-phieuph: phieuph, pieup-phieuph */
1191 { 0x114c, 0 }, /* rieul-yesieung: yesieung */
1192 { 0x1112, 0 }, /* rieul-yeorinhieuh-hieuh: hieuh */
1193 { 0x110b, 0x111b }, /* kapyeounrieul: ieung, kapyeounrieul */
1194 { 0x1102, 0 }, /* mieum-nieun: nieun */
1195 { 0x1102, 0x1114 }, /* mieum-ssangnieun: nieun, ssangnieun */
1196 { 0x1106, 0 }, /* ssangmieum: mieum */
1197 { 0x1109, 0x1121 }, /* mieum-pieup-sios: sios, pieup-sios */
1198 { 0x110c, 0 }, /* mieum-cieuc: cieuc */
1199 { 0x1103, 0x1120 }, /* pieup-tikeut: tikeut, pieup-tikeut */
1200 { 0x1111, 0 }, /* pieup-rieul-phieuph: phieuph */
1201 { 0x1106, 0 }, /* pieup-mieum: mieum */
1202 { 0x1107, 0x1108 }, /* ssangpieup: pieup, ssangpieup */
1203 { 0x1103, 0x112f }, /* pieup-sios-tikeut: tikeut, sios-tikeut */
1204 { 0x110c, 0x1127 }, /* pieup-cieuc: cieuc, pieup-cieuc */
1205 { 0x110e, 0x1128 }, /* pieup-chieuch: chieuch, pieup-chieuch */
1206 { 0x1106, 0x1131 }, /* sios-mieum: mieum, sios-mieum */
1207 { 0x110b, 0x112b }, /* sios-kapyeounpieup: ieung, kapyeounpieup */
1208 { 0x1100, 0x112d }, /* ssangsios-kiyeok: kiyeok, sios-kiyeok */
1209 { 0x1103, 0x112f }, /* ssangsios-tikeut: tikeut, sios-tikeut */
1210 { 0x1140, 0 }, /* sios-pansios: pansios */
1211 { 0x110c, 0x1136 }, /* sios-cieuc: cieuc, sios-cieuc */
1212 { 0x110e, 0x1137 }, /* sios-chieuch: chieuch, sios-chieuch */
1213 { 0x1110, 0x1139 }, /* sios-thieuth: thieuth, sios-thieuth */
1214 { 0x1112, 0x113b }, /* sios-hieuh: hieuh, sios-hieuh */
1215 { 0x1107, 0 }, /* pansios-pieup: pieup */
1216 { 0x110b, 0x112b }, /* pansios-kapyeounpieup: ieung, kapyeounpieup */
1217 { 0x1106, 0 }, /* yesieung-mieum: mieum */
1218 { 0x1112, 0 }, /* yesieung-hieuh: hieuh */
1219 { 0x1107, 0 }, /* cieuc-pieup: pieup */
1220 { 0x1107, 0x1108 }, /* cieuc-ssangpieup: pieup, ssangpieup */
1221 { 0x110c, 0x110d }, /* ssangcieuc: cieuc, ssangcieuc */
1222 { 0x1109, 0 }, /* phieuph-sios: sios */
1223 { 0x1110, 0 }, /* phieuph-thieuth: thieuth */
1224 };
1225
1226 ucschar cho = 0;
1227
1228 if (prevjong == 0) {
1229 cho = hangul_jongseong_to_choseong(jong);
1230 } else {
1231 int diff;
1232 int n1;
1233 int n2;
1234
1235 n1 = hangul_jongseong_get_ncomponent(prevjong);
1236 n2 = hangul_jongseong_get_ncomponent(jong);
1237
1238 diff = n2 - n1 - 1;
1239 if (diff >= 0 && diff < 2) {
1240 if (jong >= 0x11a8 && jong <= 0x11ff) {
1241 cho = table[jong - 0x11a8][diff];
1242 } else if (jong >= 0xd7cb && jong <= 0xd7fb) {
1243 cho = table_ext_b[jong - 0xd7cb][diff];
1244 }
1245 } else if (diff == 2) {
1246 cho = hangul_jongseong_to_choseong(jong);
1247 }
1248 }
1249
1250 return cho;
1251 }
1252
1253 /**
1254 * @ingroup hangulctype
1255 * @brief 자모 코드를 조합하여 한글 음절로 변환
1256 * @param choseong 초성이 될 UCS4 코드 값
1257 * @param jungseong 중성이 될 UCS4 코드 값
1258 * @param jongseong 종성이 될 UCS4 코드 값
1259 * @return @a choseong @a jungseong @a jongseong을 조합한 현대 한글 음절 코드,
1260 * 또는 0
1261 *
1262 * 이 함수는 @a choseong @a jungseong @a jongseong으로 주어진 코드 값을 각각
1263 * 초성, 중성, 종성으로 하는 현대 한글 음절 코드를 구한다.
1264 * @a choseong @a jungseong @a jongseong 이 조합 가능한 코드가 아니라면
1265 * 0을 리턴한다. 종성이 없는 글자를 만들기 위해서는 jongseong에 0을 주면 된다.
1266 */
1267 ucschar
hangul_jamo_to_syllable(ucschar choseong,ucschar jungseong,ucschar jongseong)1268 hangul_jamo_to_syllable(ucschar choseong, ucschar jungseong, ucschar jongseong)
1269 {
1270 ucschar c;
1271
1272 /* we use 0x11a7 like a Jongseong filler */
1273 if (jongseong == 0)
1274 jongseong = 0x11a7; /* Jongseong filler */
1275
1276 if (!hangul_is_choseong_conjoinable(choseong))
1277 return 0;
1278 if (!hangul_is_jungseong_conjoinable(jungseong))
1279 return 0;
1280 if (!hangul_is_jongseong_conjoinable(jongseong))
1281 return 0;
1282
1283 choseong -= choseong_base;
1284 jungseong -= jungseong_base;
1285 jongseong -= jongseong_base;
1286
1287 c = ((choseong * njungseong) + jungseong) * njongseong + jongseong
1288 + syllable_base;
1289 return c;
1290 }
1291
1292 /**
1293 * @ingroup hangulctype
1294 * @brief 음절을 자모로 분해
1295 * @param syllable 분해할 음절
1296 * @retval choseong 음절에서 초성 부분의 코드
1297 * @retval jungseong 음절에서 중성 부분의 코드
1298 * @retval jongseong 음절에서 종성 부분의 코드, 종성이 없으면 0을 반환한다
1299 * @return 없음
1300 *
1301 * 이 함수는 @a syllable 로 주어진 음절 코드를 분해하여 자모 코드를 반환한다.
1302 * 반환하는 값은 @a choseong, @a jungseong, @a jongseong 의 포인터에 대입하여
1303 * 리턴한다. 종성이 없는 음절인 경우에는 @a jongseong 에 0을 반환한다.
1304 */
1305 void
hangul_syllable_to_jamo(ucschar syllable,ucschar * choseong,ucschar * jungseong,ucschar * jongseong)1306 hangul_syllable_to_jamo(ucschar syllable,
1307 ucschar* choseong,
1308 ucschar* jungseong,
1309 ucschar* jongseong)
1310 {
1311 if (jongseong != NULL)
1312 *jongseong = 0;
1313 if (jungseong != NULL)
1314 *jungseong = 0;
1315 if (choseong != NULL)
1316 *choseong = 0;
1317
1318 if (!hangul_is_syllable(syllable))
1319 return;
1320
1321 syllable -= syllable_base;
1322 if (jongseong != NULL) {
1323 if (syllable % njongseong != 0)
1324 *jongseong = jongseong_base + syllable % njongseong;
1325 }
1326 syllable /= njongseong;
1327
1328 if (jungseong != NULL) {
1329 *jungseong = jungseong_base + syllable % njungseong;
1330 }
1331 syllable /= njungseong;
1332
1333 if (choseong != NULL) {
1334 *choseong = choseong_base + syllable;
1335 }
1336 }
1337
1338 /** @deprecated 이 함수 대신 hangul_syllable_to_jamo함수를 사용한다. */
1339 void
hangul_syllable_to_jaso(ucschar syllable,ucschar * choseong,ucschar * jungseong,ucschar * jongseong)1340 hangul_syllable_to_jaso(ucschar syllable,
1341 ucschar* choseong,
1342 ucschar* jungseong,
1343 ucschar* jongseong)
1344 {
1345 return hangul_syllable_to_jamo(syllable, choseong, jungseong, jongseong);
1346 }
1347
1348 static inline bool
is_syllable_boundary(ucschar prev,ucschar next)1349 is_syllable_boundary(ucschar prev, ucschar next)
1350 {
1351 if (hangul_is_choseong(prev)) {
1352 if (hangul_is_choseong(next))
1353 return false;
1354 if (hangul_is_jungseong(next))
1355 return false;
1356 if (hangul_is_syllable(next))
1357 return false;
1358 if (hangul_is_combining_mark(next))
1359 return false;
1360 if (next == HANGUL_JUNGSEONG_FILLER)
1361 return false;
1362 } else if (prev == HANGUL_CHOSEONG_FILLER) {
1363 if (hangul_is_jungseong(next))
1364 return false;
1365 if (next == HANGUL_JUNGSEONG_FILLER)
1366 return false;
1367 } else if (hangul_is_jungseong(prev)) {
1368 if (hangul_is_jungseong(next))
1369 return false;
1370 if (hangul_is_jongseong(next))
1371 return false;
1372 if (hangul_is_combining_mark(next))
1373 return false;
1374 } else if (prev == HANGUL_JUNGSEONG_FILLER) {
1375 if (hangul_is_jongseong(next))
1376 return false;
1377 } else if (hangul_is_jongseong(prev)) {
1378 if (hangul_is_jongseong(next))
1379 return false;
1380 if (hangul_is_combining_mark(next))
1381 return false;
1382 } else if (hangul_is_syllable(prev)) {
1383 if ((prev - syllable_base) % njongseong == 0) {
1384 // 종성이 없는 음절: LV
1385 if (hangul_is_jungseong(next))
1386 return false;
1387 if (hangul_is_jongseong(next))
1388 return false;
1389 } else {
1390 // 종성이 있는 음절: LVT
1391 if (hangul_is_jongseong(next))
1392 return false;
1393 }
1394 if (hangul_is_combining_mark(next))
1395 return false;
1396 }
1397
1398 return true;
1399 }
1400
1401 static inline ucschar
choseong_compress(ucschar a,ucschar b)1402 choseong_compress(ucschar a, ucschar b)
1403 {
1404 if (a == 0)
1405 return b;
1406
1407 if (a == 0x1100 && b == 0x1100)
1408 return 0x1101;
1409 if (a == 0x1103 && b == 0x1103)
1410 return 0x1104;
1411 if (a == 0x1107 && b == 0x1107)
1412 return 0x1108;
1413 if (a == 0x1109 && b == 0x1109)
1414 return 0x110A;
1415 if (a == 0x110c && b == 0x110c)
1416 return 0x110d;
1417 return 0;
1418 }
1419
1420 static inline ucschar
jungseong_compress(ucschar a,ucschar b)1421 jungseong_compress(ucschar a, ucschar b)
1422 {
1423 if (a == 0)
1424 return b;
1425
1426 if (a == 0x1169) {
1427 if (b == 0x1161)
1428 return 0x116a;
1429 if (b == 0x1162)
1430 return 0x116b;
1431 if (b == 0x1175)
1432 return 0x116c;
1433 }
1434 if (a == 0x116e) {
1435 if (b == 0x1165)
1436 return 0x116f;
1437 if (b == 0x1166)
1438 return 0x1170;
1439 if (b == 0x1175)
1440 return 0x1171;
1441 }
1442 if (b == 0x1175) {
1443 if (a == 0x1173)
1444 return 0x1174;
1445 if (a == 0x1161)
1446 return 0x1162;
1447 if (a == 0x1163)
1448 return 0x1164;
1449 if (a == 0x1165)
1450 return 0x1166;
1451 if (a == 0x1167)
1452 return 0x1168;
1453 }
1454
1455 return 0;
1456 }
1457
1458 static inline ucschar
jongseong_compress(ucschar a,ucschar b)1459 jongseong_compress(ucschar a, ucschar b)
1460 {
1461 if (a == 0)
1462 return b;
1463
1464 if (a == 0x11a8) {
1465 if (b == 0x11a8)
1466 return 0x11a9;
1467 if (b == 0x11ba)
1468 return 0x11aa;
1469 }
1470 if (a == 0x11ab) {
1471 if (b == 0x11b0)
1472 return 0x11ab;
1473 if (b == 0x11c2)
1474 return 0x11ad;
1475 }
1476 if (a == 0x11af) {
1477 if (b == 0x11a8)
1478 return 0x11b0;
1479 if (b == 0x11b7)
1480 return 0x11b1;
1481 if (b == 0x11b8)
1482 return 0x11b2;
1483 if (b == 0x11ba)
1484 return 0x11b3;
1485 if (b == 0x11c0)
1486 return 0x11b4;
1487 if (b == 0x11c1)
1488 return 0x11b5;
1489 if (b == 0x11c2)
1490 return 0x11b6;
1491 }
1492 if (a == 0x11b8 && b == 0x11ba)
1493 return 0x11b9;
1494 if (a == 0x11ba && b == 0x11ba)
1495 return 0x11bb;
1496
1497 return 0;
1498 }
1499
1500 static inline ucschar
build_syllable(const ucschar * str,size_t len)1501 build_syllable(const ucschar* str, size_t len)
1502 {
1503 int i;
1504 ucschar cho = 0, jung = 0, jong = 0;
1505
1506 i = 0;
1507 while (i < len && hangul_is_choseong_conjoinable(str[i])) {
1508 cho = choseong_compress(cho, str[i]);
1509 if (cho == 0)
1510 return 0;
1511 i++;
1512 }
1513
1514 while (i < len && hangul_is_jungseong_conjoinable(str[i])) {
1515 jung = jungseong_compress(jung, str[i]);
1516 if (jung == 0)
1517 return 0;
1518 i++;
1519 }
1520
1521 while (i < len && hangul_is_jongseong_conjoinable(str[i])) {
1522 jong = jongseong_compress(jong, str[i]);
1523 if (jong == 0)
1524 return 0;
1525 i++;
1526 }
1527
1528 if (i < len)
1529 return 0;
1530
1531 return hangul_jamo_to_syllable(cho, jung, jong);
1532 }
1533
1534 /**
1535 * @ingroup hangulctype
1536 * @brief 한 음절에 해당하는 코드의 갯수를 구하는 함수
1537 * @param str 음절의 길이를 구할 스트링
1538 * @param max_len @a str 에서 읽을 길이의 제한값
1539 * @return 한 음절에 해당하는 코드의 갯수
1540 *
1541 * 이 함수는 @a str 에서 한 음절에 해당하는 코드의 갯수를 구한다.
1542 * 한 음절에 해당하는 코드의 갯수가 @a max_len 보다 많다면 @a max_len 을
1543 * 반환한다. 한 음절이라고 판단하는 기준은 L*V*T+ 패턴에 따른다. 이 패턴은
1544 * regular expression의 컨벤션을 따른 것으로, 1개 이상의 초성과 중성, 0개
1545 * 이상의 종성이 모인 자모 스트링을 한 음절로 인식한다는 뜻이다. 예를 들면
1546 * 다음과 같은 자모 스트링도 한 음절로 인식한다.
1547 *
1548 * 예) "ㅂ ㅂ ㅜ ㅔ ㄹ ㄱ" -> "쀍"
1549 *
1550 * 따라서 위 경우에는 6을 반환하게 된다.
1551 *
1552 * 일반적으로는 방점(U+302E, U+302F)까지 한 음절로 인식하겠지만, 이 함수는
1553 * 음절과 자모간 변환을 편리하게 하기 위해 구현된 것으로 방점은 다른 음절로
1554 * 인식한다.
1555 *
1556 * @a str 이 자모 코드에 해당하지 않는 경우에는 1을 반환한다.
1557 *
1558 * 이 함수는 자모 스트링에서 총 음절의 갯수를 구하는 함수가 아님에 주의한다.
1559 */
1560 int
hangul_syllable_len(const ucschar * str,int max_len)1561 hangul_syllable_len(const ucschar* str, int max_len)
1562 {
1563 int i = 0;
1564
1565 if (max_len == 0)
1566 return 0;
1567
1568 if (str[i] != 0) {
1569 for (i = 1; i < max_len; i++) {
1570 if (str[i] == 0)
1571 break;
1572
1573 if (is_syllable_boundary(str[i - 1], str[i]))
1574 break;
1575 }
1576 }
1577
1578 return i;
1579 }
1580
1581 /**
1582 * @ingroup hangulctype
1583 * @brief @a iter를 기준으로 이전 음절의 첫자모 글자에 대한 포인터를 구하는 함수
1584 * @param iter 현재 위치
1585 * @param begin 스트링의 시작위치, 포인터가 이동할 한계값
1586 * @return 이전 음절의 첫번째 자모에 대한 포인터
1587 *
1588 * 이 함수는 @a iter로 주어진 자모 스트링의 포인터를 기준으로 이전 음절의
1589 * 첫번째 자모에 대한 포인터를 리턴한다. 음절을 찾기위해서 begin보다
1590 * 앞쪽으로 이동하지 않는다.
1591 *
1592 * 한 음절이라고 판단하는 기준은 L*V*T+M? 패턴에 따른다.
1593 */
1594 const ucschar*
hangul_syllable_iterator_prev(const ucschar * iter,const ucschar * begin)1595 hangul_syllable_iterator_prev(const ucschar* iter, const ucschar* begin)
1596 {
1597 if (iter > begin)
1598 iter--;
1599
1600 while (iter > begin) {
1601 ucschar prev = iter[-1];
1602 ucschar curr = iter[0];
1603 if (is_syllable_boundary(prev, curr))
1604 break;
1605 iter--;
1606 }
1607
1608 return iter;
1609 }
1610
1611 /**
1612 * @ingroup hangulctype
1613 * @brief @a iter를 기준으로 다음 음절의 첫자모 글자에 대한 포인터를 구하는 함수
1614 * @param iter 현재 위치
1615 * @param end 스트링의 끝위치, 포인터가 이동할 한계값
1616 * @return 다음 음절의 첫번째 자모에 대한 포인터
1617 *
1618 * 이 함수는 @a iter로 주어진 자모 스트링의 포인터를 기준으로 다음 음절의
1619 * 첫번째 자모에 대한 포인터를 리턴한다. 음절을 찾기위해서 end를 넘어
1620 * 이동하지 않는다.
1621 *
1622 * 한 음절이라고 판단하는 기준은 L*V*T+M? 패턴에 따른다.
1623 */
1624 const ucschar*
hangul_syllable_iterator_next(const ucschar * iter,const ucschar * end)1625 hangul_syllable_iterator_next(const ucschar* iter, const ucschar* end)
1626 {
1627 if (iter < end)
1628 iter++;
1629
1630 while (iter < end) {
1631 ucschar prev = iter[-1];
1632 ucschar curr = iter[0];
1633 if (is_syllable_boundary(prev, curr))
1634 break;
1635 iter++;
1636 }
1637
1638 return iter;
1639 }
1640
1641 /**
1642 * @ingroup hangulctype
1643 * @brief 자모 스트링을 음절 스트링으로 변환
1644 * @param dest 음절형으로 변환된 결과가 저장될 버퍼
1645 * @param destlen 결과를 저장할 버퍼의 길이(ucschar 코드 단위)
1646 * @param src 변환할 자모 스트링
1647 * @param srclen 변환할 자모 스트링의 길이(ucschar 코드 단위)
1648 * @return @a destlen 에 저장한 코드의 갯수
1649 *
1650 * 이 함수는 L+V+T*M? 패턴에 따라 자모 스트링 변환을 시도한다. 한 음절을
1651 * 판단하는 기준은 @ref hangul_syllable_len 을 참조한다.
1652 * 만일 @a src 가 적절한 음절형태로 변환이 불가능한 경우에는 자모 스트링이
1653 * 그대로 복사된다.
1654 *
1655 * 이 함수는 자모 스트링 @a src 를 음절형으로 변환하여 @a dest 에 저장한다.
1656 * @a srclen 에 지정된 갯수만큼 읽고, @a destlen 에 지정된 길이 이상 쓰지
1657 * 않는다. @a srclen 이 -1이라면 @a src 는 0으로 끝나는 스트링으로 가정하고
1658 * 0을 제외한 길이까지 변환을 시도한다. 따라서 변환된 결과 스트링은 0으로
1659 * 끝나지 않는다. 만일 0으로 끝나는 스트링을 만들고 싶다면 다음과 같이 한다.
1660 *
1661 * @code
1662 * int n = hangul_jamos_to_syllables(dest, destlen, src, srclen);
1663 * dest[n] = 0;
1664 * @endcode
1665 */
1666 int
hangul_jamos_to_syllables(ucschar * dest,int destlen,const ucschar * src,int srclen)1667 hangul_jamos_to_syllables(ucschar* dest, int destlen, const ucschar* src, int srclen)
1668 {
1669 ucschar* d;
1670 const ucschar* s;
1671
1672 int inleft;
1673 int outleft;
1674 int n;
1675
1676 if (srclen < 0) {
1677 s = src;
1678 while (*s != 0)
1679 s++;
1680 srclen = s - src;
1681 }
1682
1683 s = src;
1684 d = dest;
1685 inleft = srclen;
1686 outleft = destlen;
1687
1688 n = hangul_syllable_len(s, inleft);
1689 while (n > 0 && inleft > 0 && outleft > 0) {
1690 ucschar c = build_syllable(s, n);
1691 if (c != 0) {
1692 *d = c;
1693 d++;
1694 outleft--;
1695 } else {
1696 int i;
1697 for (i = 0; i < n && i < outleft; i++) {
1698 d[i] = s[i];
1699 }
1700 d += i;
1701 outleft -= i;
1702 }
1703
1704 s += n;
1705 inleft -= n;
1706 n = hangul_syllable_len(s, inleft);
1707 }
1708
1709 return destlen - outleft;
1710 }
1711