1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2
3 #include "ef_iso2022jp_conv.h"
4
5 #include <stdio.h> /* NULL */
6 #include <pobl/bl_mem.h>
7 #include <pobl/bl_debug.h>
8
9 #include "ef_iso2022_conv.h"
10 #include "ef_iso2022_intern.h"
11 #include "ef_ucs4_map.h"
12 #include "ef_ja_jp_map.h"
13
14 /* --- static functions --- */
15
remap_unsupported_charset(ef_char_t * ch,int version)16 static void remap_unsupported_charset(ef_char_t *ch, int version) {
17 ef_char_t c;
18
19 if (ch->cs == ISO10646_UCS4_1) {
20 if (ef_map_ucs4_to_ja_jp(&c, ch)) {
21 *ch = c;
22 }
23 }
24
25 ef_iso2022_remap_unsupported_charset(ch);
26
27 /*
28 * various gaiji chars => jis
29 */
30 if (ch->cs == SJIS_IBM_EXT) {
31 /*
32 * IBM extension characters cannot be regarded as
33 * jisc6226_1978/jisx0208_1983
34 * gaiji (which is based on iso2022 94n charset) , so we managed to remap
35 * here.
36 */
37
38 if (!ef_map_sjis_ibm_ext_to_jisx0208_1983(&c, ch) &&
39 !ef_map_sjis_ibm_ext_to_jisx0212_1990(&c, ch)) {
40 return;
41 }
42
43 *ch = c;
44 }
45 /*
46 * NEC special characters and NEC selected IBM characters are exactly in gaiji
47 * area
48 * of jisc6226_1978 , and MAC extension charcters are also in gaiji area of
49 * jisx0208_1983 , so we do not remap these.
50 */
51 else if (ch->cs == JISC6226_1978_NEC_EXT || ch->cs == JISC6226_1978_NECIBM_EXT) {
52 ch->cs = JISC6226_1978;
53 } else if (ch->cs == JISX0208_1983_MAC_EXT) {
54 ch->cs = JISX0208_1983;
55 }
56
57 /*
58 * conversion between JIS charsets.
59 */
60 if (version == 3) {
61 if (ch->cs == JISX0208_1983) {
62 if (ef_map_jisx0208_1983_to_jisx0213_2000_1(&c, ch)) {
63 *ch = c;
64 }
65 }
66 } else {
67 if (ch->cs == JISX0213_2000_1) {
68 if (ef_map_jisx0213_2000_1_to_jisx0208_1983(&c, ch)) {
69 *ch = c;
70 }
71 }
72 }
73 }
74
convert_to_iso2022jp(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser,int is_7,int version)75 static size_t convert_to_iso2022jp(ef_conv_t *conv, u_char *dst, size_t dst_size,
76 ef_parser_t *parser, int is_7, int version) {
77 ef_iso2022_conv_t *iso2022_conv;
78 size_t filled_size;
79 ef_char_t ch;
80
81 iso2022_conv = (ef_iso2022_conv_t*)conv;
82
83 filled_size = 0;
84 while (ef_parser_next_char(parser, &ch)) {
85 remap_unsupported_charset(&ch, version);
86
87 if ((!is_7) && ch.cs == JISX0201_KATA) {
88 if (filled_size >= dst_size) {
89 ef_parser_full_reset(parser);
90
91 return filled_size;
92 }
93
94 *(dst++) = MAP_TO_GR(*ch.ch);
95
96 filled_size++;
97 } else {
98 int count;
99
100 if (ch.cs == iso2022_conv->g0) {
101 if (filled_size + ch.size > dst_size) {
102 ef_parser_full_reset(parser);
103
104 return filled_size;
105 }
106 } else {
107 if (ch.cs == JISX0208_1983 || (version <= 2 && ch.cs == JISC6226_1978) ||
108 /* GB2312_80 for ISO2022JP-2(rfc1154) */
109 (version == 2 && ch.cs == GB2312_80)) {
110 #if 1
111 /* based on old iso2022 */
112
113 if (filled_size + ch.size + 2 >= dst_size) {
114 ef_parser_full_reset(parser);
115
116 return filled_size;
117 }
118
119 *(dst++) = ESC;
120 *(dst++) = MB_CS;
121 *(dst++) = CS94MB_FT(ch.cs);
122
123 filled_size += 3;
124
125 #else
126 /* based on new iso2022 */
127
128 if (filled_size + ch.size + 3 >= dst_size) {
129 ef_parser_full_reset(parser);
130
131 return filled_size;
132 }
133
134 *(dst++) = ESC;
135 *(dst++) = MB_CS;
136 *(dst++) = CS94_TO_G0;
137 *(dst++) = CS94MB_FT(ch.cs);
138
139 filled_size += 4;
140 #endif
141 } else if (ch.cs == JISX0212_1990 ||
142 /* KSC5601_1987 for ISO2022JP-2(rfc1154) */
143 (version == 2 && ch.cs == KSC5601_1987) ||
144 (version >= 3 && (ch.cs == JISX0213_2000_1 || ch.cs == JISX0213_2000_2))) {
145 if (filled_size + ch.size + 3 >= dst_size) {
146 ef_parser_full_reset(parser);
147
148 return filled_size;
149 }
150
151 *(dst++) = ESC;
152 *(dst++) = MB_CS;
153 *(dst++) = CS94_TO_G0;
154 *(dst++) = CS94MB_FT(ch.cs);
155
156 filled_size += 4;
157 } else if (ch.cs == US_ASCII ||
158 (version <= 2 && (ch.cs == JISX0201_ROMAN || ch.cs == JISX0201_KATA))) {
159 if (filled_size + ch.size + 2 >= dst_size) {
160 ef_parser_full_reset(parser);
161
162 return filled_size;
163 }
164
165 *(dst++) = ESC;
166 *(dst++) = CS94_TO_G0;
167 *(dst++) = CS94SB_FT(ch.cs);
168
169 filled_size += 3;
170 } else if (version >= 2 && (ch.cs == ISO8859_1_R || ch.cs == ISO8859_7_R)) {
171 /* for ISO2022JP-2(rfc1154) */
172 if (filled_size + ch.size + 2 >= dst_size) {
173 ef_parser_full_reset(parser);
174
175 return filled_size;
176 }
177
178 *(dst++) = ESC;
179 *(dst++) = CS96_TO_G2;
180 *(dst++) = CS96SB_FT(ch.cs);
181
182 filled_size += 3;
183 } else if (conv->illegal_char) {
184 size_t size;
185 int is_full;
186
187 size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
188 if (is_full) {
189 ef_parser_full_reset(parser);
190
191 return filled_size;
192 }
193
194 dst += size;
195 filled_size += size;
196
197 continue;
198 } else {
199 continue;
200 }
201
202 iso2022_conv->g0 = ch.cs;
203 }
204
205 for (count = 0; count < ch.size; count++) {
206 *(dst++) = ch.ch[count];
207 }
208
209 filled_size += ch.size;
210 }
211 }
212
213 return filled_size;
214 }
215
convert_to_iso2022jp_8(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)216 static size_t convert_to_iso2022jp_8(ef_conv_t *conv, u_char *dst, size_t dst_size,
217 ef_parser_t *parser) {
218 return convert_to_iso2022jp(conv, dst, dst_size, parser, 0, 1);
219 }
220
convert_to_iso2022jp_7(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)221 static size_t convert_to_iso2022jp_7(ef_conv_t *conv, u_char *dst, size_t dst_size,
222 ef_parser_t *parser) {
223 return convert_to_iso2022jp(conv, dst, dst_size, parser, 1, 1);
224 }
225
convert_to_iso2022jp2(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)226 static size_t convert_to_iso2022jp2(ef_conv_t *conv, u_char *dst, size_t dst_size,
227 ef_parser_t *parser) {
228 return convert_to_iso2022jp(conv, dst, dst_size, parser, 1, 2);
229 }
230
convert_to_iso2022jp3(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)231 static size_t convert_to_iso2022jp3(ef_conv_t *conv, u_char *dst, size_t dst_size,
232 ef_parser_t *parser) {
233 return convert_to_iso2022jp(conv, dst, dst_size, parser, 1, 3);
234 }
235
iso2022jp_7_conv_init(ef_conv_t * conv)236 static void iso2022jp_7_conv_init(ef_conv_t *conv) {
237 ef_iso2022_conv_t *iso2022_conv;
238
239 iso2022_conv = (ef_iso2022_conv_t*)conv;
240
241 iso2022_conv->gl = &iso2022_conv->g0;
242 iso2022_conv->gr = NULL;
243 iso2022_conv->g0 = US_ASCII;
244 iso2022_conv->g1 = UNKNOWN_CS;
245 iso2022_conv->g2 = UNKNOWN_CS;
246 iso2022_conv->g3 = UNKNOWN_CS;
247 }
248
iso2022jp_8_conv_init(ef_conv_t * conv)249 static void iso2022jp_8_conv_init(ef_conv_t *conv) {
250 ef_iso2022_conv_t *iso2022_conv;
251
252 iso2022_conv = (ef_iso2022_conv_t*)conv;
253
254 iso2022_conv->gl = &iso2022_conv->g0;
255 iso2022_conv->gr = &iso2022_conv->g1;
256 iso2022_conv->g0 = US_ASCII;
257 iso2022_conv->g1 = JISX0201_KATA;
258 iso2022_conv->g2 = UNKNOWN_CS;
259 iso2022_conv->g3 = UNKNOWN_CS;
260 }
261
conv_destroy(ef_conv_t * conv)262 static void conv_destroy(ef_conv_t *conv) { free(conv); }
263
264 /* --- global functions --- */
265
ef_iso2022jp_8_conv_new(void)266 ef_conv_t *ef_iso2022jp_8_conv_new(void) {
267 ef_iso2022_conv_t *iso2022_conv;
268
269 if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
270 return NULL;
271 }
272
273 iso2022jp_8_conv_init((ef_conv_t*)iso2022_conv);
274
275 iso2022_conv->conv.convert = convert_to_iso2022jp_8;
276 iso2022_conv->conv.init = iso2022jp_8_conv_init;
277 iso2022_conv->conv.destroy = conv_destroy;
278 iso2022_conv->conv.illegal_char = NULL;
279
280 return (ef_conv_t*)iso2022_conv;
281 }
282
ef_iso2022jp_7_conv_new(void)283 ef_conv_t *ef_iso2022jp_7_conv_new(void) {
284 ef_iso2022_conv_t *iso2022_conv;
285
286 if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
287 return NULL;
288 }
289
290 iso2022jp_7_conv_init((ef_conv_t*)iso2022_conv);
291
292 iso2022_conv->conv.convert = convert_to_iso2022jp_7;
293 iso2022_conv->conv.init = iso2022jp_7_conv_init;
294 iso2022_conv->conv.destroy = conv_destroy;
295 iso2022_conv->conv.illegal_char = NULL;
296
297 return (ef_conv_t*)iso2022_conv;
298 }
299
ef_iso2022jp2_conv_new(void)300 ef_conv_t *ef_iso2022jp2_conv_new(void) {
301 ef_iso2022_conv_t *iso2022_conv;
302
303 if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
304 return NULL;
305 }
306
307 iso2022jp_7_conv_init((ef_conv_t*)iso2022_conv);
308
309 iso2022_conv->conv.convert = convert_to_iso2022jp2;
310 iso2022_conv->conv.init = iso2022jp_7_conv_init;
311 iso2022_conv->conv.destroy = conv_destroy;
312 iso2022_conv->conv.illegal_char = NULL;
313
314 return (ef_conv_t*)iso2022_conv;
315 }
316
ef_iso2022jp3_conv_new(void)317 ef_conv_t *ef_iso2022jp3_conv_new(void) {
318 ef_iso2022_conv_t *iso2022_conv;
319
320 if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
321 return NULL;
322 }
323
324 iso2022jp_7_conv_init((ef_conv_t*)iso2022_conv);
325
326 iso2022_conv->conv.convert = convert_to_iso2022jp3;
327 iso2022_conv->conv.init = iso2022jp_7_conv_init;
328 iso2022_conv->conv.destroy = conv_destroy;
329 iso2022_conv->conv.illegal_char = NULL;
330
331 return (ef_conv_t*)iso2022_conv;
332 }
333