1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2
3 #include "ef_johab_conv.h"
4
5 #include <pobl/bl_mem.h>
6 #include <pobl/bl_debug.h>
7
8 #include "ef_ko_kr_map.h"
9
10 #if 0
11 #define __DEBUG
12 #endif
13
14 /* --- static functions --- */
15
remap_unsupported_charset(ef_char_t * ch)16 static void remap_unsupported_charset(ef_char_t *ch) {
17 ef_char_t c;
18
19 if (ch->cs == ISO10646_UCS4_1) {
20 if (!ef_map_ucs4_to_ko_kr(&c, ch)) {
21 return;
22 }
23
24 *ch = c;
25 }
26
27 /*
28 * once all korean characters are converted to UHC.
29 */
30 if (ch->cs == KSC5601_1987) {
31 if (ef_map_ksc5601_1987_to_uhc(&c, ch)) {
32 *ch = c;
33 }
34 }
35
36 if (ch->cs == UHC) {
37 /*
38 * converting hangul to johab.
39 */
40 if (ef_map_uhc_to_johab(&c, ch)) {
41 *ch = c;
42 }
43
44 /*
45 * the rest may be chinese characters or so , and they all are converted
46 * to ksc5601.
47 */
48 if (ef_map_uhc_to_ksc5601_1987(&c, ch)) {
49 *ch = c;
50 }
51 }
52 }
53
convert_to_johab(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)54 static size_t convert_to_johab(ef_conv_t *conv, u_char *dst, size_t dst_size,
55 ef_parser_t *parser) {
56 size_t filled_size;
57 ef_char_t ch;
58
59 filled_size = 0;
60 while (ef_parser_next_char(parser, &ch)) {
61 remap_unsupported_charset(&ch);
62
63 if (ch.cs == JOHAB) {
64 /* Hangul */
65
66 if (filled_size + 1 >= dst_size) {
67 ef_parser_full_reset(parser);
68
69 return filled_size;
70 }
71
72 *(dst++) = ch.ch[0];
73 *(dst++) = ch.ch[1];
74
75 filled_size += 2;
76 } else if (ch.cs == KSC5601_1987) {
77 /*
78 * not Hangul
79 * KSC5601_1987 Hangul chars are remapped to JOHAB in
80 * remap_unsupported_charset()
81 */
82
83 u_char byte1;
84 u_char byte2;
85
86 if (filled_size + 1 >= dst_size) {
87 ef_parser_full_reset(parser);
88
89 return filled_size;
90 }
91
92 #ifdef __DEBUG
93 bl_debug_printf("0x%.2x%.2x -> ", ch.ch[0], ch.ch[1]);
94 #endif
95
96 if (ch.ch[0] <= 0x2c) {
97 if (ch.ch[0] % 2 == 1) {
98 byte1 = (ch.ch[0] - 0x20 + 0x1b1) / 2;
99
100 goto pattern_1;
101 } else {
102 byte1 = (ch.ch[0] - 0x20 + 0x1b0) / 2;
103
104 goto pattern_2;
105 }
106 } else if (ch.ch[0] == 0x49) {
107 byte1 = 0xd8;
108
109 goto pattern_1;
110 } else if (ch.ch[0] == 0x7e) {
111 byte1 = 0xd8;
112
113 goto pattern_2;
114 } else if (0x4a <= ch.ch[0] && ch.ch[0] <= 0x7d) {
115 if (ch.ch[0] % 2 == 0) {
116 byte1 = (ch.ch[0] - 0x20 + 0x196) / 2;
117
118 goto pattern_1;
119 } else {
120 byte1 = (ch.ch[0] - 0x20 + 0x195) / 2;
121
122 goto pattern_2;
123 }
124 } else {
125 #ifdef DEBUG
126 bl_warn_printf(BL_DEBUG_TAG " illegal johab format. cs(%x)/char(%x) is discarded.\n", ch.cs,
127 ef_char_to_int(&ch));
128 #endif
129
130 bl_msg_printf("conversion failed.\n");
131
132 continue;
133 }
134
135 pattern_1:
136 if (ch.ch[1] <= 0x6e) {
137 byte2 = ch.ch[1] - 0x20 + 0x30;
138 } else {
139 byte2 = ch.ch[1] - 0x20 + 0x42;
140 }
141
142 goto encoded;
143
144 pattern_2:
145 byte2 = ch.ch[1] - 0x20 + 0xa0;
146
147 encoded:
148 #ifdef __DEBUG
149 bl_debug_printf("0x%.2x%.2x\n", byte1, byte2);
150 #endif
151
152 *(dst++) = byte1;
153 *(dst++) = byte2;
154
155 filled_size += 2;
156 } else if (ch.cs == US_ASCII) {
157 if (filled_size >= dst_size) {
158 ef_parser_full_reset(parser);
159
160 return filled_size;
161 }
162
163 *(dst++) = ch.ch[0];
164
165 filled_size++;
166 } else if (conv->illegal_char) {
167 size_t size;
168 int is_full;
169
170 size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
171 if (is_full) {
172 ef_parser_full_reset(parser);
173
174 return filled_size;
175 }
176
177 dst += size;
178 filled_size += size;
179 }
180 }
181
182 return filled_size;
183 }
184
conv_init(ef_conv_t * conv)185 static void conv_init(ef_conv_t *conv) {}
186
conv_destroy(ef_conv_t * conv)187 static void conv_destroy(ef_conv_t *conv) { free(conv); }
188
189 /* --- global functions --- */
190
ef_johab_conv_new(void)191 ef_conv_t *ef_johab_conv_new(void) {
192 ef_conv_t *conv;
193
194 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
195 return NULL;
196 }
197
198 conv->convert = convert_to_johab;
199 conv->init = conv_init;
200 conv->destroy = conv_destroy;
201 conv->illegal_char = NULL;
202
203 return conv;
204 }
205