1
2 #include "wc.h"
3 #include "iso2022.h"
4 #include "jis.h"
5 #include "big5.h"
6 #include "johab.h"
7 #include "wtf.h"
8 #ifdef USE_UNICODE
9 #include "ucs.h"
10 #endif
11
12 #define C0 WC_ISO_MAP_C0
13 #define C1 WC_ISO_MAP_C1
14 #define GL WC_ISO_MAP_GL
15 #define GR WC_ISO_MAP_GR
16 #define GL2 WC_ISO_MAP_GL96
17 #define GR2 WC_ISO_MAP_GR96
18 #define SO WC_ISO_MAP_SO
19 #define SI WC_ISO_MAP_SI
20 #define ESC WC_ISO_MAP_ESC
21 #define SS2 WC_ISO_MAP_SS2
22 #define SS3 WC_ISO_MAP_SS3
23
24 wc_uint8 WC_ISO_MAP[ 0x100 ] = {
25 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, SO, SI,
26 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, ESC,C0, C0, C0, C0,
27 GL2,GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
28 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
29 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
30 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
31 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
32 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL2,
33
34 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, SS2,SS3,
35 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
36 GR2,GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR,
37 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR,
38 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR,
39 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR,
40 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR,
41 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR2,
42 };
43
44 static wc_uchar cs94_gmap[ 0x80 - WC_F_ISO_BASE ];
45 static wc_uchar cs94w_gmap[ 0x80 - WC_F_ISO_BASE ];
46 static wc_uchar cs96_gmap[ 0x80 - WC_F_ISO_BASE ];
47 static wc_uchar cs96w_gmap[ 0x80 - WC_F_ISO_BASE ];
48 static wc_uchar cs942_gmap[ 0x80 - WC_F_ISO_BASE ];
49
50 static void
wtf_push_iso2022(Str os,wc_ccs ccs,wc_uint32 code)51 wtf_push_iso2022(Str os, wc_ccs ccs, wc_uint32 code)
52 {
53 switch (ccs) {
54 case WC_CCS_JIS_C_6226:
55 case WC_CCS_JIS_X_0208:
56 case WC_CCS_JIS_X_0213_1:
57 ccs = wc_jisx0208_or_jisx02131(code);
58 break;
59 case WC_CCS_JIS_X_0212:
60 case WC_CCS_JIS_X_0213_2:
61 ccs = wc_jisx0212_or_jisx02132(code);
62 break;
63 case WC_CCS_JIS_X_0201:
64 case WC_CCS_GB_1988:
65 ccs = WC_CCS_US_ASCII;
66 break;
67 }
68 wtf_push(os, ccs, code);
69 }
70
71 Str
wc_conv_from_iso2022(Str is,wc_ces ces)72 wc_conv_from_iso2022(Str is, wc_ces ces)
73 {
74 Str os;
75 wc_uchar *sp = (wc_uchar *)is->ptr;
76 wc_uchar *ep = sp + is->length;
77 wc_uchar *p, *q = NULL;
78 int state = WC_ISO_NOSTATE;
79 wc_status st;
80 wc_ccs gl_ccs, gr_ccs;
81
82 for (p = sp; p < ep && !(WC_ISO_MAP[*p] & WC_ISO_MAP_DETECT); p++)
83 ;
84 if (p == ep)
85 return is;
86 os = Strnew_size(is->length);
87 if (p > sp)
88 Strcat_charp_n(os, is->ptr, (int)(p - sp));
89
90 wc_input_init(ces, &st);
91 gl_ccs = st.design[st.gl];
92 gr_ccs = st.design[st.gr];
93
94 for (; p < ep; p++) {
95 switch (state) {
96 case WC_ISO_NOSTATE:
97 switch (WC_ISO_MAP[*p]) {
98 case GL2:
99 gl_ccs = st.ss ? st.design[st.ss]
100 : st.design[st.gl];
101 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) {
102 Strcat_char(os, (char)*p);
103 break;
104 }
105 case GL:
106 gl_ccs = st.ss ? st.design[st.ss]
107 : st.design[st.gl];
108 if (WC_CCS_IS_WIDE(gl_ccs)) {
109 q = p;
110 state = WC_ISO_MBYTE1;
111 continue;
112 } else if (gl_ccs == WC_CES_US_ASCII)
113 Strcat_char(os, (char)*p);
114 else
115 wtf_push_iso2022(os, gl_ccs, (wc_uint32)*p);
116 break;
117 case GR2:
118 gr_ccs = st.ss ? st.design[st.ss]
119 : st.design[st.gr];
120 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) {
121 wtf_push_unknown(os, p, 1);
122 break;
123 }
124 case GR:
125 gr_ccs = st.ss ? st.design[st.ss]
126 : st.design[st.gr];
127 if (WC_CCS_IS_WIDE(gr_ccs)) {
128 q = p;
129 state = WC_EUC_MBYTE1;
130 continue;
131 } else if (gr_ccs)
132 wtf_push_iso2022(os, gr_ccs, (wc_uint32)*p);
133 else
134 wtf_push_unknown(os, p, 1);
135 break;
136 case C0:
137 Strcat_char(os, (char)*p);
138 break;
139 case C1:
140 wtf_push(os, WC_CCS_C1, (wc_uint32)*p);
141 break;
142 case ESC:
143 st.ss = 0;
144 if (wc_parse_iso2022_esc(&p, &st))
145 state = st.state;
146 else
147 Strcat_char(os, (char)*p);
148 continue;
149 case SI:
150 st.gl = 0;
151 break;
152 case SO:
153 st.gl = 1;
154 break;
155 case SS2:
156 if (! st.design[2]) {
157 wtf_push_unknown(os, p, 1);
158 break;
159 }
160 st.ss = 2;
161 continue;
162 case SS3:
163 if (! st.design[3]) {
164 wtf_push_unknown(os, p, 1);
165 break;
166 }
167 st.ss = 3;
168 continue;
169 }
170 break;
171 case WC_ISO_MBYTE1:
172 switch (WC_ISO_MAP[*p]) {
173 case GL2:
174 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) {
175 Strcat_char(os, (char)*q);
176 Strcat_char(os, (char)*p);
177 break;
178 }
179 case GL:
180 wtf_push_iso2022(os, gl_ccs, ((wc_uint32)*q << 8) | *p);
181 break;
182 default:
183 wtf_push_unknown(os, q, 2);
184 break;
185 }
186 break;
187 case WC_EUC_MBYTE1:
188 switch (WC_ISO_MAP[*p]) {
189 case GR2:
190 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) {
191 wtf_push_unknown(os, q, 2);
192 break;
193 }
194 case GR:
195 if (gr_ccs == WC_CCS_CNS_11643_X) {
196 state = WC_EUC_TW_MBYTE2;
197 continue;
198 }
199 wtf_push_iso2022(os, gr_ccs, ((wc_uint32)*q << 8) | *p);
200 break;
201 default:
202 wtf_push_unknown(os, q, 2);
203 break;
204 }
205 break;
206 case WC_EUC_TW_MBYTE2:
207 if (WC_ISO_MAP[*p] == GR) {
208 if (0xa1 <= *q && *q <= 0xa7) {
209 wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (*q - 0xa1),
210 ((wc_uint32)*(q+1) << 8) | *p);
211 break;
212 }
213 if (0xa8 <= *q && *q <= 0xb0) {
214 wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (*q - 0xa8),
215 ((wc_uint32)*(q+1) << 8) | *p);
216 break;
217 }
218 }
219 wtf_push_unknown(os, q, 3);
220 break;
221 case WC_ISO_CSWSR:
222 if (*p == WC_C_ESC && *(p+1) == WC_C_CSWSR) {
223 if (*(p+2) == WC_F_ISO_BASE) {
224 state = st.state = WC_ISO_NOSTATE;
225 p += 2;
226 continue;
227 } else if (*(p+2) > WC_F_ISO_BASE && *(p+2) <= 0x7e) {
228 p += 2;
229 continue;
230 }
231 }
232 wtf_push_unknown(os, p, 1);
233 continue;
234 case WC_ISO_CSWOSR:
235 wtf_push_unknown(os, p, ep - p);
236 return os;
237 break;
238 }
239 st.ss = 0;
240 state = WC_ISO_NOSTATE;
241 }
242 switch (state) {
243 case WC_ISO_MBYTE1:
244 case WC_EUC_MBYTE1:
245 wtf_push_unknown(os, p-1, 1);
246 break;
247 case WC_EUC_TW_MBYTE1:
248 wtf_push_unknown(os, p-2, 2);
249 break;
250 }
251 return os;
252 }
253
254 int
wc_parse_iso2022_esc(wc_uchar ** ptr,wc_status * st)255 wc_parse_iso2022_esc(wc_uchar **ptr, wc_status *st)
256 {
257 wc_uchar *p = *ptr, state, f = 0, g = 0, cs = 0;
258
259 if (*p != WC_C_ESC)
260 return 0;
261 state = *p;
262 for (p++; *p && state; p++) {
263 switch (state) {
264 case WC_C_ESC: /* ESC */
265 switch (*p) {
266 case WC_C_MBCS: /* ESC '$' */
267 state = *p;
268 continue;
269 case WC_C_G0_CS94: /* ESC '(' */
270 case WC_C_G1_CS94: /* ESC ')' */
271 case WC_C_G2_CS94: /* ESC '*' */
272 case WC_C_G3_CS94: /* ESC '+' */
273 state = cs = WC_C_G0_CS94;
274 g = *p & 0x03;
275 continue;
276 case WC_C_G0_CS96: /* ESC ',' */ /* ISO 2022 does not permit */
277 case WC_C_G1_CS96: /* ESC '-' */
278 case WC_C_G2_CS96: /* ESC '.' */
279 case WC_C_G3_CS96: /* ESC '/' */
280 state = cs = WC_C_G0_CS96;
281 g = *p & 0x03;
282 continue;
283 case WC_C_C0: /* ESC '!' */ /* not supported */
284 case WC_C_C1: /* ESC '"' */ /* not supported */
285 case WC_C_REP: /* ESC '&' */ /* not supported */
286 state = cs = WC_C_C0;
287 continue;
288 case WC_C_CSWSR: /* ESC '%' */ /* not supported */
289 state = cs = WC_C_CSWSR;
290 continue;
291 case WC_C_SS2: /* ESC 'N' */
292 st->ss = 2; *ptr = p; return 1;
293 case WC_C_SS3: /* ESC 'O' */
294 st->ss = 3; *ptr = p; return 1;
295 case WC_C_LS2: /* ESC 'n' */
296 st->gl = 2; *ptr = p; return 1;
297 case WC_C_LS3: /* ESC 'o' */
298 st->gl = 3; *ptr = p; return 1;
299 case WC_C_LS1R: /* ESC '~' */
300 st->gr = 1; *ptr = p; return 1;
301 case WC_C_LS2R: /* ESC '}' */
302 st->gr = 2; *ptr = p; return 1;
303 case WC_C_LS3R: /* ESC '|' */
304 st->gr = 3; *ptr = p; return 1;
305 default:
306 return 0;
307 }
308 break;
309 case WC_C_MBCS: /* ESC '$' */
310 switch (*p) {
311 case WC_F_JIS_C_6226: /* ESC '$' @ */
312 case WC_F_JIS_X_0208: /* ESC '$' B */
313 case WC_F_GB_2312: /* ESC '$' A */
314 state = 0;
315 cs = WC_C_G0_CS94 | 0x80;
316 g = 0;
317 f = *p;
318 break;
319 case WC_C_G0_CS94: /* ESC '$' '(' */
320 case WC_C_G1_CS94: /* ESC '$' ')' */
321 case WC_C_G2_CS94: /* ESC '$' '*' */
322 case WC_C_G3_CS94: /* ESC '$' '+' */
323 state = cs = WC_C_G0_CS94 | 0x80;
324 g = *p & 0x03;
325 continue;
326 case WC_C_G0_CS96: /* ESC '$' ',' */ /* ISO 2022 does not permit */
327 case WC_C_G1_CS96: /* ESC '$' '-' */
328 case WC_C_G2_CS96: /* ESC '$' '.' */
329 case WC_C_G3_CS96: /* ESC '$' '/' */
330 state = cs = WC_C_G0_CS96 | 0x80;
331 g = *p & 0x03;
332 continue;
333 default:
334 return 0;
335 }
336 break;
337 case WC_C_G0_CS94: /* ESC [()*+] F */
338 if (*p == WC_C_CS942) { /* ESC [()*+] '!' */
339 state = cs = WC_C_CS942 | 0x80;
340 g = *p & 0x03;
341 continue;
342 }
343 case WC_C_G0_CS96: /* ESC [,-./] F */
344 case WC_C_G0_CS94 | 0x80: /* ESC '$' [()*+] F */
345 case WC_C_G0_CS96 | 0x80: /* ESC '$' [,-./] F */
346 case WC_C_CS942 | 0x80: /* ESC [()*+] '!' F */
347 case WC_C_C0: /* ESC [!"&] F */
348 case WC_C_CSWSR | 0x80: /* ESC '%' '/' F */
349 state = 0;
350 f = *p;
351 break;
352 case WC_C_CSWSR: /* ESC '%' F */
353 if (*p == WC_C_CSWOSR) { /* ESC '%' '/' */
354 state = cs = WC_C_CSWSR | 0x80;
355 continue;
356 }
357 state = 0;
358 f = *p;
359 break;
360 default:
361 return 0;
362 }
363 }
364 if (f < WC_F_ISO_BASE || f > 0x7e)
365 return 0;
366 switch (cs) {
367 case WC_C_G0_CS94:
368 st->design[g] = WC_CCS_SET_CS94(f);
369 break;
370 case WC_C_G0_CS94 | 0x80:
371 st->design[g] = WC_CCS_SET_CS94W(f);
372 break;
373 case WC_C_G0_CS96:
374 st->design[g] = WC_CCS_SET_CS96(f);
375 break;
376 case WC_C_G0_CS96 | 0x80:
377 st->design[g] = WC_CCS_SET_CS96W(f);
378 break;
379 case WC_C_CS942 | 0x80:
380 st->design[g] = WC_CCS_SET_CS942(f);
381 break;
382 case WC_C_CSWSR:
383 if (f == WC_F_ISO_BASE)
384 st->state = WC_ISO_NOSTATE;
385 else
386 st->state = WC_ISO_CSWSR;
387 break;
388 case WC_C_CSWOSR:
389 st->state = WC_ISO_CSWOSR;
390 break;
391 }
392 *ptr = p - 1;
393 return 1;
394 }
395
396 void
wc_push_to_iso2022(Str os,wc_wchar_t cc,wc_status * st)397 wc_push_to_iso2022(Str os, wc_wchar_t cc, wc_status *st)
398 {
399 wc_uchar g = 0;
400 wc_bool is_wide = WC_FALSE, retry = WC_FALSE;
401 wc_wchar_t cc2;
402
403 while (1) {
404 switch (WC_CCS_TYPE(cc.ccs)) {
405 case WC_CCS_A_CS94:
406 if (cc.ccs == WC_CCS_US_ASCII)
407 cc.ccs = st->g0_ccs;
408 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
409 g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
410 break;
411 case WC_CCS_A_CS94W:
412 is_wide = 1;
413 switch (cc.ccs) {
414 #ifdef USE_UNICODE
415 case WC_CCS_JIS_X_0212:
416 if (!WcOption.use_jisx0212 && WcOption.use_jisx0213 &&
417 WcOption.ucs_conv) {
418 cc2 = wc_jisx0212_to_jisx0213(cc);
419 if (cc2.ccs == WC_CCS_JIS_X_0213_1 ||
420 cc2.ccs == WC_CCS_JIS_X_0213_2) {
421 cc = cc2;
422 continue;
423 }
424 }
425 break;
426 case WC_CCS_JIS_X_0213_1:
427 case WC_CCS_JIS_X_0213_2:
428 if (!WcOption.use_jisx0213 && WcOption.use_jisx0212 &&
429 WcOption.ucs_conv) {
430 cc2 = wc_jisx0213_to_jisx0212(cc);
431 if (cc2.ccs == WC_CCS_JIS_X_0212) {
432 cc = cc2;
433 continue;
434 }
435 }
436 break;
437 #endif
438 }
439 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
440 g = cs94w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
441 break;
442 case WC_CCS_A_CS96:
443 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
444 g = cs96_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
445 break;
446 case WC_CCS_A_CS96W:
447 is_wide = 1;
448 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
449 g = cs96w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
450 break;
451 case WC_CCS_A_CS942:
452 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
453 g = cs942_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
454 break;
455 case WC_CCS_A_UNKNOWN_W:
456 if (WcOption.no_replace)
457 return;
458 is_wide = 1;
459 cc.ccs = WC_CCS_US_ASCII;
460 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
461 g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
462 cc.code = ((wc_uint32)WC_REPLACE_W[0] << 8) | WC_REPLACE_W[1];
463 break;
464 case WC_CCS_A_UNKNOWN:
465 if (WcOption.no_replace)
466 return;
467 cc.ccs = WC_CCS_US_ASCII;
468 if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE)
469 g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE];
470 cc.code = (wc_uint32)WC_REPLACE[0];
471 break;
472 default:
473 if ((cc.ccs == WC_CCS_JOHAB || cc.ccs == WC_CCS_JOHAB_1 ||
474 cc.ccs == WC_CCS_JOHAB_2 || cc.ccs == WC_CCS_JOHAB_3) &&
475 cs94w_gmap[WC_F_KS_X_1001 - WC_F_ISO_BASE]) {
476 wc_wchar_t cc2 = wc_johab_to_ksx1001(cc);
477 if (cc2.ccs == WC_CCS_KS_X_1001) {
478 cc = cc2;
479 continue;
480 }
481 }
482 #ifdef USE_UNICODE
483 if (WcOption.ucs_conv)
484 cc = wc_any_to_iso2022(cc, st);
485 else
486 #endif
487 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
488 continue;
489 }
490 if (! g) {
491 #ifdef USE_UNICODE
492 if (WcOption.ucs_conv && ! retry)
493 cc = wc_any_to_any_ces(cc, st);
494 else
495 #endif
496 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
497 retry = WC_TRUE;
498 continue;
499 }
500
501 wc_push_iso2022_esc(os, cc.ccs, g, 1, st);
502 if (is_wide)
503 Strcat_char(os, (char)((cc.code >> 8) & 0x7f));
504 Strcat_char(os, (char)(cc.code & 0x7f));
505 return;
506 }
507 }
508
509 void
wc_push_to_iso2022_end(Str os,wc_status * st)510 wc_push_to_iso2022_end(Str os, wc_status *st)
511 {
512 if (st->design[1] != 0 && st->design[1] != st->g1_ccs)
513 wc_push_iso2022_esc(os, st->g1_ccs, WC_C_G1_CS94, 0, st);
514 wc_push_iso2022_esc(os, st->g0_ccs, WC_C_G0_CS94, 1, st);
515 }
516
517 void
wc_push_iso2022_esc(Str os,wc_ccs ccs,wc_uchar g,wc_uint8 invoke,wc_status * st)518 wc_push_iso2022_esc(Str os, wc_ccs ccs, wc_uchar g, wc_uint8 invoke, wc_status *st)
519 {
520 wc_uint8 g_invoke = g & 0x03;
521
522 if (st->design[g_invoke] != ccs) {
523 Strcat_char(os, WC_C_ESC);
524 if (WC_CCS_IS_WIDE(ccs)) {
525 Strcat_char(os, WC_C_MBCS);
526 if (g_invoke != 0 ||
527 (ccs != WC_CCS_JIS_C_6226 &&
528 ccs != WC_CCS_JIS_X_0208 &&
529 ccs != WC_CCS_GB_2312))
530 Strcat_char(os, (char)g);
531 } else {
532 Strcat_char(os, (char)g);
533 if ((ccs & WC_CCS_A_ISO_2022) == WC_CCS_A_CS942)
534 Strcat_char(os, WC_C_CS942);
535 }
536 Strcat_char(os, (char)WC_CCS_GET_F(ccs));
537 st->design[g_invoke] = ccs;
538 }
539 if (! invoke)
540 return;
541
542 switch (g_invoke) {
543 case 0:
544 if (st->gl != 0) {
545 Strcat_char(os, WC_C_SI);
546 st->gl = 0;
547 }
548 break;
549 case 1:
550 if (st->gl != 1) {
551 Strcat_char(os, WC_C_SO);
552 st->gl = 1;
553 }
554 break;
555 case 2:
556 Strcat_char(os, WC_C_ESC);
557 Strcat_char(os, WC_C_SS2);
558 break;
559 case 3:
560 Strcat_char(os, WC_C_ESC);
561 Strcat_char(os, WC_C_SS3);
562 break;
563 }
564 }
565
566 void
wc_push_to_euc(Str os,wc_wchar_t cc,wc_status * st)567 wc_push_to_euc(Str os, wc_wchar_t cc, wc_status *st)
568 {
569 wc_ccs g1_ccs = st->ces_info->gset[1].ccs;
570
571 while (1) {
572 if (cc.ccs == g1_ccs) {
573 Strcat_char(os, (char)((cc.code >> 8) | 0x80));
574 Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
575 return;
576 }
577 switch (cc.ccs) {
578 case WC_CCS_US_ASCII:
579 Strcat_char(os, (char)cc.code);
580 return;
581 case WC_CCS_C1:
582 Strcat_char(os, (char)(cc.code | 0x80));
583 return;
584 case WC_CCS_UNKNOWN_W:
585 if (!WcOption.no_replace)
586 Strcat_charp(os, WC_REPLACE_W);
587 return;
588 case WC_CCS_UNKNOWN:
589 if (!WcOption.no_replace)
590 Strcat_charp(os, WC_REPLACE);
591 return;
592 case WC_CCS_JOHAB:
593 case WC_CCS_JOHAB_1:
594 case WC_CCS_JOHAB_2:
595 case WC_CCS_JOHAB_3:
596 if (st->ces_info->id == WC_CES_EUC_KR) {
597 cc = wc_johab_to_ksx1001(cc);
598 continue;
599 }
600 default:
601 #ifdef USE_UNICODE
602 if (WcOption.ucs_conv)
603 cc = wc_any_to_any_ces(cc, st);
604 else
605 #endif
606 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
607 continue;
608 }
609 }
610 }
611
612 void
wc_push_to_eucjp(Str os,wc_wchar_t cc,wc_status * st)613 wc_push_to_eucjp(Str os, wc_wchar_t cc, wc_status *st)
614 {
615 while (1) {
616 switch (cc.ccs) {
617 case WC_CCS_US_ASCII:
618 Strcat_char(os, (char)cc.code);
619 return;
620 case WC_CCS_JIS_X_0201K:
621 if (WcOption.use_jisx0201k) {
622 Strcat_char(os, WC_C_SS2R);
623 Strcat_char(os, (char)(cc.code | 0x80));
624 return;
625 } else if (WcOption.fix_width_conv)
626 cc.ccs = WC_CCS_UNKNOWN;
627 else
628 cc = wc_jisx0201k_to_jisx0208(cc);
629 continue;
630 case WC_CCS_JIS_X_0208:
631 break;
632 case WC_CCS_JIS_X_0213_1:
633 if (WcOption.use_jisx0213)
634 break;
635 #ifdef USE_UNICODE
636 else if (WcOption.ucs_conv && WcOption.use_jisx0212)
637 cc = wc_jisx0213_to_jisx0212(cc);
638 #endif
639 else
640 cc.ccs = WC_CCS_UNKNOWN_W;
641 continue;
642 case WC_CCS_JIS_X_0212:
643 if (WcOption.use_jisx0212) {
644 Strcat_char(os, WC_C_SS3R);
645 break;
646 }
647 #ifdef USE_UNICODE
648 else if (WcOption.ucs_conv && WcOption.use_jisx0213)
649 cc = wc_jisx0212_to_jisx0213(cc);
650 #endif
651 else
652 cc.ccs = WC_CCS_UNKNOWN_W;
653 continue;
654 case WC_CCS_JIS_X_0213_2:
655 if (WcOption.use_jisx0213) {
656 Strcat_char(os, WC_C_SS3R);
657 break;
658 }
659 #ifdef USE_UNICODE
660 else if (WcOption.ucs_conv && WcOption.use_jisx0212)
661 cc = wc_jisx0213_to_jisx0212(cc);
662 #endif
663 else
664 cc.ccs = WC_CCS_UNKNOWN_W;
665 continue;
666 case WC_CCS_C1:
667 Strcat_char(os, (char)(cc.code | 0x80));
668 return;
669 case WC_CCS_UNKNOWN_W:
670 if (!WcOption.no_replace)
671 Strcat_charp(os, WC_REPLACE_W);
672 return;
673 case WC_CCS_UNKNOWN:
674 if (!WcOption.no_replace)
675 Strcat_charp(os, WC_REPLACE);
676 return;
677 default:
678 #ifdef USE_UNICODE
679 if (WcOption.ucs_conv)
680 cc = wc_any_to_any_ces(cc, st);
681 else
682 #endif
683 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
684 continue;
685 }
686 Strcat_char(os, (char)((cc.code >> 8) | 0x80));
687 Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
688 return;
689 }
690 }
691
692 void
wc_push_to_euctw(Str os,wc_wchar_t cc,wc_status * st)693 wc_push_to_euctw(Str os, wc_wchar_t cc, wc_status *st)
694 {
695 while (1) {
696 switch (cc.ccs) {
697 case WC_CCS_US_ASCII:
698 Strcat_char(os, (char)cc.code);
699 return;
700 case WC_CCS_CNS_11643_1:
701 break;
702 case WC_CCS_CNS_11643_2:
703 case WC_CCS_CNS_11643_3:
704 case WC_CCS_CNS_11643_4:
705 case WC_CCS_CNS_11643_5:
706 case WC_CCS_CNS_11643_6:
707 case WC_CCS_CNS_11643_7:
708 Strcat_char(os, WC_C_SS2R);
709 Strcat_char(os, (char)(0xA1 + (cc.ccs - WC_CCS_CNS_11643_1)));
710 break;
711 case WC_CCS_CNS_11643_8:
712 case WC_CCS_CNS_11643_9:
713 case WC_CCS_CNS_11643_10:
714 case WC_CCS_CNS_11643_11:
715 case WC_CCS_CNS_11643_12:
716 case WC_CCS_CNS_11643_13:
717 case WC_CCS_CNS_11643_14:
718 case WC_CCS_CNS_11643_15:
719 case WC_CCS_CNS_11643_16:
720 Strcat_char(os, WC_C_SS2R);
721 Strcat_char(os, (char)(0xA8 + (cc.ccs - WC_CCS_CNS_11643_8)));
722 break;
723 case WC_CCS_C1:
724 Strcat_char(os, (char)(cc.code | 0x80));
725 return;
726 case WC_CCS_UNKNOWN_W:
727 if (!WcOption.no_replace)
728 Strcat_charp(os, WC_REPLACE_W);
729 return;
730 case WC_CCS_UNKNOWN:
731 if (!WcOption.no_replace)
732 Strcat_charp(os, WC_REPLACE);
733 return;
734 default:
735 #ifdef USE_UNICODE
736 if (WcOption.ucs_conv)
737 cc = wc_any_to_any_ces(cc, st);
738 else
739 #endif
740 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
741 continue;
742 }
743 Strcat_char(os, (char)((cc.code >> 8) | 0x80));
744 Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
745 return;
746 }
747 }
748
749 void
wc_push_to_iso8859(Str os,wc_wchar_t cc,wc_status * st)750 wc_push_to_iso8859(Str os, wc_wchar_t cc, wc_status *st)
751 {
752 wc_ccs g1_ccs = st->ces_info->gset[1].ccs;
753
754 while (1) {
755 if (cc.ccs == g1_ccs) {
756 Strcat_char(os, (char)(cc.code | 0x80));
757 return;
758 }
759 switch (cc.ccs) {
760 case WC_CCS_US_ASCII:
761 Strcat_char(os, (char)cc.code);
762 return;
763 case WC_CCS_C1:
764 Strcat_char(os, (char)(cc.code | 0x80));
765 return;
766 case WC_CCS_UNKNOWN_W:
767 if (!WcOption.no_replace)
768 Strcat_charp(os, WC_REPLACE_W);
769 return;
770 case WC_CCS_UNKNOWN:
771 if (!WcOption.no_replace)
772 Strcat_charp(os, WC_REPLACE);
773 return;
774 default:
775 #ifdef USE_UNICODE
776 if (WcOption.ucs_conv)
777 cc = wc_any_to_any_ces(cc, st);
778 else
779 #endif
780 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
781 continue;
782 }
783 }
784 }
785
786 void
wc_create_gmap(wc_status * st)787 wc_create_gmap(wc_status *st)
788 {
789 wc_gset *gset = st->ces_info->gset;
790 wc_uchar *gset_ext = st->ces_info->gset_ext;
791 int i, f;
792
793 if (WcOption.strict_iso2022) {
794 for (i = 0; i < WC_F_ISO_BASE; i++) {
795 cs94_gmap[i] = 0;
796 cs96_gmap[i] = 0;
797 cs94w_gmap[i] = 0;
798 cs96w_gmap[i] = 0;
799 cs942_gmap[i] = 0;
800 }
801 } else {
802 for (i = 0; i < WC_F_ISO_BASE; i++) {
803 cs94_gmap[i] = gset_ext[0];
804 cs96_gmap[i] = gset_ext[1];
805 cs94w_gmap[i] = gset_ext[2];
806 cs96w_gmap[i] = gset_ext[3];
807 cs942_gmap[i] = gset_ext[0];
808 }
809 }
810 for (i = 0; gset[i].ccs; i++) {
811 f = WC_CCS_GET_F(gset[i].ccs) - WC_F_ISO_BASE;
812 switch (WC_CCS_TYPE(gset[i].ccs)) {
813 case WC_CCS_A_CS94:
814 switch (gset[i].ccs) {
815 case WC_CCS_JIS_X_0201K:
816 if (!WcOption.use_jisx0201k)
817 continue;
818 break;
819 }
820 cs94_gmap[f] = gset[i].g;
821 break;
822 case WC_CCS_A_CS94W:
823 switch (gset[i].ccs) {
824 case WC_CCS_JIS_X_0212:
825 if (!WcOption.use_jisx0212)
826 continue;
827 break;
828 case WC_CCS_JIS_X_0213_1:
829 case WC_CCS_JIS_X_0213_2:
830 if (!WcOption.use_jisx0213)
831 continue;
832 break;
833 }
834 cs94w_gmap[f] = gset[i].g;
835 break;
836 case WC_CCS_A_CS96:
837 cs96_gmap[f] = gset[i].g;
838 break;
839 case WC_CCS_A_CS96W:
840 cs96w_gmap[f] = gset[i].g;
841 break;
842 case WC_CCS_A_CS942:
843 cs942_gmap[f] = gset[i].g;
844 break;
845 }
846 }
847 }
848
849 Str
wc_char_conv_from_iso2022(wc_uchar c,wc_status * st)850 wc_char_conv_from_iso2022(wc_uchar c, wc_status *st)
851 {
852 static Str os;
853 static wc_uchar buf[4];
854 static size_t nbuf;
855 wc_uchar *p;
856 wc_ccs gl_ccs, gr_ccs;
857
858 if (st->state == -1) {
859 st->state = WC_ISO_NOSTATE;
860 os = Strnew_size(8);
861 nbuf = 0;
862 }
863
864 gl_ccs = st->ss ? st->design[st->ss] : st->design[st->gl];
865 gr_ccs = st->ss ? st->design[st->ss] : st->design[st->gr];
866
867 switch (st->state) {
868 case WC_ISO_NOSTATE:
869 switch (WC_ISO_MAP[c]) {
870 case GL2:
871 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) {
872 Strcat_char(os, (char)c);
873 break;
874 }
875 case GL:
876 if (WC_CCS_IS_WIDE(gl_ccs)) {
877 buf[nbuf++] = c;
878 st->state = WC_ISO_MBYTE1;
879 return NULL;
880 } else if (gl_ccs == WC_CES_US_ASCII)
881 Strcat_char(os, (char)c);
882 else
883 wtf_push_iso2022(os, gl_ccs, (wc_uint32)c);
884 break;
885 case GR2:
886 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96))
887 break;
888 case GR:
889 if (WC_CCS_IS_WIDE(gr_ccs)) {
890 buf[nbuf++] = c;
891 st->state = WC_EUC_MBYTE1;
892 return NULL;
893 } else if (gr_ccs)
894 wtf_push_iso2022(os, gr_ccs, (wc_uint32)c);
895 break;
896 case C0:
897 Strcat_char(os, (char)c);
898 break;
899 case C1:
900 break;
901 case ESC:
902 buf[nbuf++] = c;
903 st->state = WC_C_ESC;
904 return NULL;
905 case SI:
906 st->gl = 0;
907 break;
908 case SO:
909 st->gl = 1;
910 break;
911 case SS2:
912 if (! st->design[2])
913 return os;
914 st->ss = 2;
915 return NULL;
916 case SS3:
917 if (! st->design[3])
918 return os;
919 st->ss = 3;
920 return NULL;
921 }
922 break;
923 case WC_ISO_MBYTE1:
924 switch (WC_ISO_MAP[c]) {
925 case GL2:
926 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96))
927 break;
928 case GL:
929 buf[nbuf++] = c;
930 wtf_push_iso2022(os, gl_ccs, ((wc_uint32)buf[0] << 8) | buf[1]);
931 break;
932 }
933 st->state = WC_ISO_NOSTATE;
934 break;
935 case WC_EUC_MBYTE1:
936 switch (WC_ISO_MAP[c]) {
937 case GR2:
938 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96))
939 break;
940 case GR:
941 if (gr_ccs == WC_CCS_CNS_11643_X) {
942 buf[nbuf++] = c;
943 st->state = WC_EUC_TW_MBYTE2;
944 return NULL;
945 }
946 buf[nbuf++] = c;
947 wtf_push_iso2022(os, gr_ccs, ((wc_uint32)buf[0] << 8) | buf[1]);
948 break;
949 }
950 st->state = WC_ISO_NOSTATE;
951 break;
952 case WC_EUC_TW_MBYTE2:
953 if (WC_ISO_MAP[c] == GR) {
954 buf[nbuf++] = c;
955 c = buf[0];
956 if (0xa1 <= c && c <= 0xa7) {
957 wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (c - 0xa1),
958 ((wc_uint32)buf[1] << 8) | buf[2]);
959 break;
960 }
961 if (0xa8 <= c && c <= 0xb0) {
962 wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (c - 0xa8),
963 ((wc_uint32)buf[1] << 8) | buf[2]);
964 break;
965 }
966 }
967 st->state = WC_ISO_NOSTATE;
968 break;
969 case WC_C_ESC:
970 switch (c) {
971 case WC_C_G0_CS94:
972 case WC_C_G1_CS94:
973 case WC_C_G2_CS94:
974 case WC_C_G3_CS94:
975 buf[nbuf++] = c;
976 st->state = WC_C_G0_CS94;
977 return NULL;
978 case WC_C_G0_CS96:
979 case WC_C_G1_CS96:
980 case WC_C_G2_CS96:
981 case WC_C_G3_CS96:
982 case WC_C_C0:
983 case WC_C_C1:
984 case WC_C_REP:
985 buf[nbuf++] = c;
986 st->state = WC_C_G0_CS96;
987 return NULL;
988 case WC_C_MBCS:
989 case WC_C_CSWSR:
990 buf[nbuf++] = c;
991 st->state = c;
992 return NULL;
993 case WC_C_SS2:
994 st->ss = 2;
995 st->state = WC_ISO_NOSTATE;
996 return NULL;
997 case WC_C_SS3:
998 st->ss = 3;
999 st->state = WC_ISO_NOSTATE;
1000 return NULL;
1001 case WC_C_LS2:
1002 st->gl = 2;
1003 break;
1004 case WC_C_LS3:
1005 st->gl = 3;
1006 break;
1007 case WC_C_LS2R:
1008 st->gr = 2;
1009 break;
1010 case WC_C_LS3R:
1011 st->gr = 3;
1012 break;
1013 default:
1014 break;
1015 }
1016 break;
1017 case WC_C_MBCS:
1018 switch (c) {
1019 case WC_F_JIS_C_6226:
1020 case WC_F_JIS_X_0208:
1021 case WC_F_GB_2312:
1022 buf[nbuf++] = c;
1023 p = buf;
1024 wc_parse_iso2022_esc(&p, st);
1025 break;
1026 case WC_C_G0_CS94:
1027 case WC_C_G1_CS94:
1028 case WC_C_G2_CS94:
1029 case WC_C_G3_CS94:
1030 case WC_C_G0_CS96:
1031 case WC_C_G1_CS96:
1032 case WC_C_G2_CS96:
1033 case WC_C_G3_CS96:
1034 buf[nbuf++] = c;
1035 st->state = WC_C_G0_CS96;
1036 return NULL;
1037 }
1038 break;
1039 case WC_C_CSWSR:
1040 switch (c) {
1041 case WC_C_CSWOSR:
1042 buf[nbuf++] = c;
1043 st->state = WC_C_G1_CS94;
1044 return NULL;
1045 }
1046 buf[nbuf++] = c;
1047 p = buf;
1048 wc_parse_iso2022_esc(&p, st);
1049 break;
1050 case WC_C_G0_CS94:
1051 switch (c) {
1052 case WC_C_CS942:
1053 buf[nbuf++] = c;
1054 st->state = WC_C_G0_CS96;
1055 return NULL;
1056 }
1057 case WC_C_G0_CS96:
1058 buf[nbuf++] = c;
1059 p = buf;
1060 wc_parse_iso2022_esc(&p, st);
1061 break;
1062 }
1063 st->ss = 0;
1064 st->state = -1;
1065 return os;
1066 }
1067