1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2 
3 #include "ef_iso2022jp_conv.h"
4 
5 #include <stdio.h> /* NULL */
6 #include <pobl/bl_mem.h>
7 #include <pobl/bl_debug.h>
8 
9 #include "ef_iso2022_conv.h"
10 #include "ef_iso2022_intern.h"
11 #include "ef_ucs4_map.h"
12 #include "ef_ja_jp_map.h"
13 
14 /* --- static functions --- */
15 
remap_unsupported_charset(ef_char_t * ch,int version)16 static void remap_unsupported_charset(ef_char_t *ch, int version) {
17   ef_char_t c;
18 
19   if (ch->cs == ISO10646_UCS4_1) {
20     if (ef_map_ucs4_to_ja_jp(&c, ch)) {
21       *ch = c;
22     }
23   }
24 
25   ef_iso2022_remap_unsupported_charset(ch);
26 
27   /*
28    * various gaiji chars => jis
29    */
30   if (ch->cs == SJIS_IBM_EXT) {
31     /*
32      * IBM extension characters cannot be regarded as
33      * jisc6226_1978/jisx0208_1983
34      * gaiji (which is based on iso2022 94n charset) , so we managed to remap
35      * here.
36      */
37 
38     if (!ef_map_sjis_ibm_ext_to_jisx0208_1983(&c, ch) &&
39         !ef_map_sjis_ibm_ext_to_jisx0212_1990(&c, ch)) {
40       return;
41     }
42 
43     *ch = c;
44   }
45   /*
46    * NEC special characters and NEC selected IBM characters are exactly in gaiji
47    * area
48    * of jisc6226_1978 , and MAC extension charcters are also in gaiji area of
49    * jisx0208_1983 , so we do not remap these.
50    */
51   else if (ch->cs == JISC6226_1978_NEC_EXT || ch->cs == JISC6226_1978_NECIBM_EXT) {
52     ch->cs = JISC6226_1978;
53   } else if (ch->cs == JISX0208_1983_MAC_EXT) {
54     ch->cs = JISX0208_1983;
55   }
56 
57   /*
58    * conversion between JIS charsets.
59    */
60   if (version == 3) {
61     if (ch->cs == JISX0208_1983) {
62       if (ef_map_jisx0208_1983_to_jisx0213_2000_1(&c, ch)) {
63         *ch = c;
64       }
65     }
66   } else {
67     if (ch->cs == JISX0213_2000_1) {
68       if (ef_map_jisx0213_2000_1_to_jisx0208_1983(&c, ch)) {
69         *ch = c;
70       }
71     }
72   }
73 }
74 
convert_to_iso2022jp(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser,int is_7,int version)75 static size_t convert_to_iso2022jp(ef_conv_t *conv, u_char *dst, size_t dst_size,
76                                    ef_parser_t *parser, int is_7, int version) {
77   ef_iso2022_conv_t *iso2022_conv;
78   size_t filled_size;
79   ef_char_t ch;
80 
81   iso2022_conv = (ef_iso2022_conv_t*)conv;
82 
83   filled_size = 0;
84   while (ef_parser_next_char(parser, &ch)) {
85     remap_unsupported_charset(&ch, version);
86 
87     if ((!is_7) && ch.cs == JISX0201_KATA) {
88       if (filled_size >= dst_size) {
89         ef_parser_full_reset(parser);
90 
91         return filled_size;
92       }
93 
94       *(dst++) = MAP_TO_GR(*ch.ch);
95 
96       filled_size++;
97     } else {
98       int count;
99 
100       if (ch.cs == iso2022_conv->g0) {
101         if (filled_size + ch.size > dst_size) {
102           ef_parser_full_reset(parser);
103 
104           return filled_size;
105         }
106       } else {
107         if (ch.cs == JISX0208_1983 || (version <= 2 && ch.cs == JISC6226_1978) ||
108             /* GB2312_80 for ISO2022JP-2(rfc1154) */
109             (version == 2 && ch.cs == GB2312_80)) {
110 #if 1
111           /* based on old iso2022 */
112 
113           if (filled_size + ch.size + 2 >= dst_size) {
114             ef_parser_full_reset(parser);
115 
116             return filled_size;
117           }
118 
119           *(dst++) = ESC;
120           *(dst++) = MB_CS;
121           *(dst++) = CS94MB_FT(ch.cs);
122 
123           filled_size += 3;
124 
125 #else
126           /* based on new iso2022 */
127 
128           if (filled_size + ch.size + 3 >= dst_size) {
129             ef_parser_full_reset(parser);
130 
131             return filled_size;
132           }
133 
134           *(dst++) = ESC;
135           *(dst++) = MB_CS;
136           *(dst++) = CS94_TO_G0;
137           *(dst++) = CS94MB_FT(ch.cs);
138 
139           filled_size += 4;
140 #endif
141         } else if (ch.cs == JISX0212_1990 ||
142                    /* KSC5601_1987 for ISO2022JP-2(rfc1154) */
143                    (version == 2 && ch.cs == KSC5601_1987) ||
144                    (version >= 3 && (ch.cs == JISX0213_2000_1 || ch.cs == JISX0213_2000_2))) {
145           if (filled_size + ch.size + 3 >= dst_size) {
146             ef_parser_full_reset(parser);
147 
148             return filled_size;
149           }
150 
151           *(dst++) = ESC;
152           *(dst++) = MB_CS;
153           *(dst++) = CS94_TO_G0;
154           *(dst++) = CS94MB_FT(ch.cs);
155 
156           filled_size += 4;
157         } else if (ch.cs == US_ASCII ||
158                    (version <= 2 && (ch.cs == JISX0201_ROMAN || ch.cs == JISX0201_KATA))) {
159           if (filled_size + ch.size + 2 >= dst_size) {
160             ef_parser_full_reset(parser);
161 
162             return filled_size;
163           }
164 
165           *(dst++) = ESC;
166           *(dst++) = CS94_TO_G0;
167           *(dst++) = CS94SB_FT(ch.cs);
168 
169           filled_size += 3;
170         } else if (version >= 2 && (ch.cs == ISO8859_1_R || ch.cs == ISO8859_7_R)) {
171           /* for ISO2022JP-2(rfc1154) */
172           if (filled_size + ch.size + 2 >= dst_size) {
173             ef_parser_full_reset(parser);
174 
175             return filled_size;
176           }
177 
178           *(dst++) = ESC;
179           *(dst++) = CS96_TO_G2;
180           *(dst++) = CS96SB_FT(ch.cs);
181 
182           filled_size += 3;
183         } else if (conv->illegal_char) {
184           size_t size;
185           int is_full;
186 
187           size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
188           if (is_full) {
189             ef_parser_full_reset(parser);
190 
191             return filled_size;
192           }
193 
194           dst += size;
195           filled_size += size;
196 
197           continue;
198         } else {
199           continue;
200         }
201 
202         iso2022_conv->g0 = ch.cs;
203       }
204 
205       for (count = 0; count < ch.size; count++) {
206         *(dst++) = ch.ch[count];
207       }
208 
209       filled_size += ch.size;
210     }
211   }
212 
213   return filled_size;
214 }
215 
convert_to_iso2022jp_8(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)216 static size_t convert_to_iso2022jp_8(ef_conv_t *conv, u_char *dst, size_t dst_size,
217                                      ef_parser_t *parser) {
218   return convert_to_iso2022jp(conv, dst, dst_size, parser, 0, 1);
219 }
220 
convert_to_iso2022jp_7(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)221 static size_t convert_to_iso2022jp_7(ef_conv_t *conv, u_char *dst, size_t dst_size,
222                                      ef_parser_t *parser) {
223   return convert_to_iso2022jp(conv, dst, dst_size, parser, 1, 1);
224 }
225 
convert_to_iso2022jp2(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)226 static size_t convert_to_iso2022jp2(ef_conv_t *conv, u_char *dst, size_t dst_size,
227                                     ef_parser_t *parser) {
228   return convert_to_iso2022jp(conv, dst, dst_size, parser, 1, 2);
229 }
230 
convert_to_iso2022jp3(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)231 static size_t convert_to_iso2022jp3(ef_conv_t *conv, u_char *dst, size_t dst_size,
232                                     ef_parser_t *parser) {
233   return convert_to_iso2022jp(conv, dst, dst_size, parser, 1, 3);
234 }
235 
iso2022jp_7_conv_init(ef_conv_t * conv)236 static void iso2022jp_7_conv_init(ef_conv_t *conv) {
237   ef_iso2022_conv_t *iso2022_conv;
238 
239   iso2022_conv = (ef_iso2022_conv_t*)conv;
240 
241   iso2022_conv->gl = &iso2022_conv->g0;
242   iso2022_conv->gr = NULL;
243   iso2022_conv->g0 = US_ASCII;
244   iso2022_conv->g1 = UNKNOWN_CS;
245   iso2022_conv->g2 = UNKNOWN_CS;
246   iso2022_conv->g3 = UNKNOWN_CS;
247 }
248 
iso2022jp_8_conv_init(ef_conv_t * conv)249 static void iso2022jp_8_conv_init(ef_conv_t *conv) {
250   ef_iso2022_conv_t *iso2022_conv;
251 
252   iso2022_conv = (ef_iso2022_conv_t*)conv;
253 
254   iso2022_conv->gl = &iso2022_conv->g0;
255   iso2022_conv->gr = &iso2022_conv->g1;
256   iso2022_conv->g0 = US_ASCII;
257   iso2022_conv->g1 = JISX0201_KATA;
258   iso2022_conv->g2 = UNKNOWN_CS;
259   iso2022_conv->g3 = UNKNOWN_CS;
260 }
261 
conv_destroy(ef_conv_t * conv)262 static void conv_destroy(ef_conv_t *conv) { free(conv); }
263 
264 /* --- global functions --- */
265 
ef_iso2022jp_8_conv_new(void)266 ef_conv_t *ef_iso2022jp_8_conv_new(void) {
267   ef_iso2022_conv_t *iso2022_conv;
268 
269   if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
270     return NULL;
271   }
272 
273   iso2022jp_8_conv_init((ef_conv_t*)iso2022_conv);
274 
275   iso2022_conv->conv.convert = convert_to_iso2022jp_8;
276   iso2022_conv->conv.init = iso2022jp_8_conv_init;
277   iso2022_conv->conv.destroy = conv_destroy;
278   iso2022_conv->conv.illegal_char = NULL;
279 
280   return (ef_conv_t*)iso2022_conv;
281 }
282 
ef_iso2022jp_7_conv_new(void)283 ef_conv_t *ef_iso2022jp_7_conv_new(void) {
284   ef_iso2022_conv_t *iso2022_conv;
285 
286   if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
287     return NULL;
288   }
289 
290   iso2022jp_7_conv_init((ef_conv_t*)iso2022_conv);
291 
292   iso2022_conv->conv.convert = convert_to_iso2022jp_7;
293   iso2022_conv->conv.init = iso2022jp_7_conv_init;
294   iso2022_conv->conv.destroy = conv_destroy;
295   iso2022_conv->conv.illegal_char = NULL;
296 
297   return (ef_conv_t*)iso2022_conv;
298 }
299 
ef_iso2022jp2_conv_new(void)300 ef_conv_t *ef_iso2022jp2_conv_new(void) {
301   ef_iso2022_conv_t *iso2022_conv;
302 
303   if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
304     return NULL;
305   }
306 
307   iso2022jp_7_conv_init((ef_conv_t*)iso2022_conv);
308 
309   iso2022_conv->conv.convert = convert_to_iso2022jp2;
310   iso2022_conv->conv.init = iso2022jp_7_conv_init;
311   iso2022_conv->conv.destroy = conv_destroy;
312   iso2022_conv->conv.illegal_char = NULL;
313 
314   return (ef_conv_t*)iso2022_conv;
315 }
316 
ef_iso2022jp3_conv_new(void)317 ef_conv_t *ef_iso2022jp3_conv_new(void) {
318   ef_iso2022_conv_t *iso2022_conv;
319 
320   if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
321     return NULL;
322   }
323 
324   iso2022jp_7_conv_init((ef_conv_t*)iso2022_conv);
325 
326   iso2022_conv->conv.convert = convert_to_iso2022jp3;
327   iso2022_conv->conv.init = iso2022jp_7_conv_init;
328   iso2022_conv->conv.destroy = conv_destroy;
329   iso2022_conv->conv.illegal_char = NULL;
330 
331   return (ef_conv_t*)iso2022_conv;
332 }
333