1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2
3 #include "vt_char_encoding.h"
4
5 #include <stdio.h> /* sscanf */
6
7 #include <pobl/bl_str.h> /* bl_str_sep */
8 #include <pobl/bl_debug.h>
9 #include <pobl/bl_mem.h> /* alloca */
10 #include <pobl/bl_locale.h> /* bl_get_codeset */
11
12 #include <mef/ef_iso8859_parser.h>
13 #include <mef/ef_8bit_parser.h>
14 #include <mef/ef_eucjp_parser.h>
15 #include <mef/ef_euckr_parser.h>
16 #include <mef/ef_euccn_parser.h>
17 #include <mef/ef_euctw_parser.h>
18 #include <mef/ef_iso2022jp_parser.h>
19 #include <mef/ef_iso2022kr_parser.h>
20 #include <mef/ef_iso2022cn_parser.h>
21 #include <mef/ef_sjis_parser.h>
22 #include <mef/ef_johab_parser.h>
23 #include <mef/ef_big5_parser.h>
24 #include <mef/ef_hz_parser.h>
25 #include <mef/ef_utf8_parser.h>
26
27 #include <mef/ef_iso8859_conv.h>
28 #include <mef/ef_8bit_conv.h>
29 #include <mef/ef_eucjp_conv.h>
30 #include <mef/ef_euckr_conv.h>
31 #include <mef/ef_euccn_conv.h>
32 #include <mef/ef_euctw_conv.h>
33 #include <mef/ef_iso2022jp_conv.h>
34 #include <mef/ef_iso2022kr_conv.h>
35 #include <mef/ef_iso2022cn_conv.h>
36 #include <mef/ef_sjis_conv.h>
37 #include <mef/ef_johab_conv.h>
38 #include <mef/ef_big5_conv.h>
39 #include <mef/ef_hz_conv.h>
40 #include <mef/ef_utf8_conv.h>
41
42 #include <mef/ef_iso2022_conv.h> /* ef_iso2022_illegal_char */
43
44 #include "vt_drcs.h"
45
46 typedef struct encoding_table {
47 vt_char_encoding_t encoding;
48 char *name;
49 ef_parser_t *(*parser_new)(void);
50 ef_conv_t *(*conv_new)(void);
51
52 } encoding_table_t;
53
54 /* --- static variables --- */
55
56 /*
57 * !!! Notice !!!
58 * The order should be the same as vt_char_encoding_t in vt_char_encoding.h
59 * If the order is changed, x_font_manager.c:usascii_font_cs_table should be
60 * also changed.
61 */
62 static encoding_table_t encoding_table[] = {
63 { VT_ISO8859_1, "ISO88591", ef_iso8859_1_parser_new, ef_iso8859_1_conv_new, },
64 { VT_ISO8859_2, "ISO88592", ef_iso8859_2_parser_new, ef_iso8859_2_conv_new, },
65 { VT_ISO8859_3, "ISO88593", ef_iso8859_3_parser_new, ef_iso8859_3_conv_new, },
66 { VT_ISO8859_4, "ISO88594", ef_iso8859_4_parser_new, ef_iso8859_4_conv_new, },
67 { VT_ISO8859_5, "ISO88595", ef_iso8859_5_parser_new, ef_iso8859_5_conv_new, },
68 { VT_ISO8859_6, "ISO88596", ef_iso8859_6_parser_new, ef_iso8859_6_conv_new, },
69 { VT_ISO8859_7, "ISO88597", ef_iso8859_7_parser_new, ef_iso8859_7_conv_new, },
70 { VT_ISO8859_8, "ISO88598", ef_iso8859_8_parser_new, ef_iso8859_8_conv_new, },
71 { VT_ISO8859_9, "ISO88599", ef_iso8859_9_parser_new, ef_iso8859_9_conv_new, },
72 { VT_ISO8859_10, "ISO885910", ef_iso8859_10_parser_new, ef_iso8859_10_conv_new, },
73 { VT_TIS620, "ISO885911", ef_tis620_2533_parser_new, ef_tis620_2533_conv_new, },
74 { VT_ISO8859_13, "ISO885913", ef_iso8859_13_parser_new, ef_iso8859_13_conv_new, },
75 { VT_ISO8859_14, "ISO885914", ef_iso8859_14_parser_new, ef_iso8859_14_conv_new, },
76 { VT_ISO8859_15, "ISO885915", ef_iso8859_15_parser_new, ef_iso8859_15_conv_new, },
77 { VT_ISO8859_16, "ISO885916", ef_iso8859_16_parser_new, ef_iso8859_16_conv_new, },
78 { VT_TCVN5712, "TCVN5712", ef_tcvn5712_3_1993_parser_new, ef_tcvn5712_3_1993_conv_new, },
79
80 { VT_ISCII_ASSAMESE, "ISCIIASSAMESE", ef_iscii_assamese_parser_new, ef_iscii_assamese_conv_new, },
81 { VT_ISCII_BENGALI, "ISCIIBENGALI", ef_iscii_bengali_parser_new, ef_iscii_bengali_conv_new, },
82 { VT_ISCII_GUJARATI, "ISCIIGUJARATI", ef_iscii_gujarati_parser_new, ef_iscii_gujarati_conv_new, },
83 { VT_ISCII_HINDI, "ISCIIHINDI", ef_iscii_hindi_parser_new, ef_iscii_hindi_conv_new, },
84 { VT_ISCII_KANNADA, "ISCIIKANNADA", ef_iscii_kannada_parser_new, ef_iscii_kannada_conv_new, },
85 { VT_ISCII_MALAYALAM, "ISCIIMALAYALAM", ef_iscii_malayalam_parser_new,
86 ef_iscii_malayalam_conv_new, },
87 { VT_ISCII_ORIYA, "ISCIIORIYA", ef_iscii_oriya_parser_new, ef_iscii_oriya_conv_new, },
88 { VT_ISCII_PUNJABI, "ISCIIPUNJABI", ef_iscii_punjabi_parser_new, ef_iscii_punjabi_conv_new, },
89 { VT_ISCII_TELUGU, "ISCIITELUGU", ef_iscii_telugu_parser_new, ef_iscii_telugu_conv_new, },
90 { VT_VISCII, "VISCII", ef_viscii_parser_new, ef_viscii_conv_new, },
91 { VT_KOI8_R, "KOI8R", ef_koi8_r_parser_new, ef_koi8_r_conv_new, },
92 { VT_KOI8_U, "KOI8U", ef_koi8_u_parser_new, ef_koi8_u_conv_new, },
93 { VT_KOI8_T, "KOI8T", ef_koi8_t_parser_new, ef_koi8_t_conv_new, },
94 { VT_GEORGIAN_PS, "GEORGIANPS", ef_georgian_ps_parser_new, ef_georgian_ps_conv_new, },
95 { VT_CP1250, "CP1250", ef_cp1250_parser_new, ef_cp1250_conv_new, },
96 { VT_CP1251, "CP1251", ef_cp1251_parser_new, ef_cp1251_conv_new, },
97 { VT_CP1252, "CP1252", ef_cp1252_parser_new, ef_cp1252_conv_new, },
98 { VT_CP1253, "CP1253", ef_cp1253_parser_new, ef_cp1253_conv_new, },
99 { VT_CP1254, "CP1254", ef_cp1254_parser_new, ef_cp1254_conv_new, },
100 { VT_CP1255, "CP1255", ef_cp1255_parser_new, ef_cp1255_conv_new, },
101 { VT_CP1256, "CP1256", ef_cp1256_parser_new, ef_cp1256_conv_new, },
102 { VT_CP1257, "CP1257", ef_cp1257_parser_new, ef_cp1257_conv_new, },
103 { VT_CP1258, "CP1258", ef_cp1258_parser_new, ef_cp1258_conv_new, },
104 { VT_CP874, "CP874", ef_cp874_parser_new, ef_cp874_conv_new, },
105
106 { VT_UTF8, "UTF8", ef_utf8_parser_new, ef_utf8_conv_new, },
107
108 { VT_EUCJP, "EUCJP", ef_eucjp_parser_new, ef_eucjp_conv_new, },
109 { VT_EUCJISX0213, "EUCJISX0213", ef_eucjisx0213_parser_new, ef_eucjisx0213_conv_new, },
110 { VT_ISO2022JP, "ISO2022JP", ef_iso2022jp_7_parser_new, ef_iso2022jp_7_conv_new, },
111 { VT_ISO2022JP2, "ISO2022JP2", ef_iso2022jp2_parser_new, ef_iso2022jp2_conv_new, },
112 { VT_ISO2022JP3, "ISO2022JP3", ef_iso2022jp3_parser_new, ef_iso2022jp3_conv_new, },
113 { VT_SJIS, "SJIS", ef_sjis_parser_new, ef_sjis_conv_new, },
114 { VT_SJISX0213, "SJISX0213", ef_sjisx0213_parser_new, ef_sjisx0213_conv_new, },
115
116 { VT_EUCKR, "EUCKR", ef_euckr_parser_new, ef_euckr_conv_new, },
117 { VT_UHC, "UHC", ef_uhc_parser_new, ef_uhc_conv_new, },
118 { VT_JOHAB, "JOHAB", ef_johab_parser_new, ef_johab_conv_new, },
119 { VT_ISO2022KR, "ISO2022KR", ef_iso2022kr_parser_new, ef_iso2022kr_conv_new, },
120
121 { VT_BIG5, "BIG5", ef_big5_parser_new, ef_big5_conv_new, },
122 { VT_EUCTW, "EUCTW", ef_euctw_parser_new, ef_euctw_conv_new, },
123
124 { VT_BIG5HKSCS, "BIG5HKSCS", ef_big5hkscs_parser_new, ef_big5hkscs_conv_new, },
125
126 /* not listed in IANA. GB2312 is usually used instead. */
127 { VT_EUCCN, "EUCCN", ef_euccn_parser_new, ef_euccn_conv_new, },
128 { VT_GBK, "GBK", ef_gbk_parser_new, ef_gbk_conv_new, },
129 { VT_GB18030, "GB18030", ef_gb18030_2000_parser_new, ef_gb18030_2000_conv_new, },
130 { VT_HZ, "HZ", ef_hz_parser_new, ef_hz_conv_new, },
131
132 { VT_ISO2022CN, "ISO2022CN", ef_iso2022cn_parser_new, ef_iso2022cn_conv_new, },
133
134 /*
135 * alternative names.
136 * these are not used in vt_{parser|conv}_new , so parser_new/parser_conv
137 * members are not necessary.
138 */
139
140 { VT_TIS620, "TIS620", },
141
142 #if 0
143 /* XXX necessary ? */
144 { VT_EUCJP, "EXTENDEDUNIXCODEPACKEDFORMATFORJAPANESE", }, /* MIME */
145 { VT_EUCJP, "CSEUCPKDFMTJAPANESE", }, /* MIME */
146 #endif
147 { VT_EUCJP, "UJIS" },
148 { VT_SJIS, "SHIFTJIS", }, /* MIME */
149
150 { VT_EUCKR, "KSC56011987", }, /* for IIS error page(IIS bug?) */
151
152 { VT_EUCCN, "GB2312", },
153
154 { VT_HZ, "HZGB2312", },
155 };
156
157 /*
158 * MSB of these charsets are not set , but must be set manually for X font.
159 * These charsets are placed in an ascending order.
160 */
161 static u_int16_t /* ef_charset_t */ msb_set_cs_table[] = {
162 JISX0201_KATA, ISO8859_1_R, ISO8859_2_R, ISO8859_3_R, ISO8859_4_R, ISO8859_5_R,
163 ISO8859_6_R, ISO8859_7_R, ISO8859_8_R, ISO8859_9_R, ISO8859_10_R, TIS620_2533,
164 ISO8859_13_R, ISO8859_14_R, ISO8859_15_R, ISO8859_16_R, TCVN5712_3_1993,
165
166 };
167
168 static struct {
169 u_int16_t ucs;
170 u_char decsp;
171
172 } ucs_to_decsp_table[] = {
173 {0xa0, '_'},
174 {0xa3, '}'},
175 {0xb0, 'f'},
176 {0xb1, 'g'},
177 {0xb7, '~'},
178 {0x3c0, '{'},
179 {0x2260, '|'},
180 {0x2264, 'y'},
181 {0x2265, 'z'},
182 {0x23ba, 'o'},
183 {0x23bb, 'p'},
184 {0x23bc, 'r'},
185 {0x23bd, 's'},
186 {0x2409, 'b'},
187 {0x240a, 'e'},
188 {0x240b, 'i'},
189 {0x240c, 'c'},
190 {0x240d, 'd'},
191 {0x2424, 'h'},
192 {0x2500, 'q'},
193 {0x2502, 'x'},
194 {0x250c, 'l'},
195 {0x2510, 'k'},
196 {0x2514, 'm'},
197 {0x2518, 'j'},
198 {0x251c, 't'},
199 {0x2524, 'u'},
200 {0x252c, 'w'},
201 {0x2534, 'v'},
202 {0x253c, 'n'},
203 {0x2592, 'a'},
204 {0x25c6, '`'},
205 };
206
207 /* 0x21 - 0x7e */
208 static u_int16_t dectech_to_ucs_table[] = {
209 0x23B7, 0x250C, 0x2500, 0x2320, 0x2321, 0x2502, 0x23A1,
210 0x23A3, 0x23A4, 0x23A6, 0x239B, 0x239D, 0x239E, 0x23A0, 0x23A8,
211 0x23AC, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
212 0x0, 0x0, 0x0, 0x0, 0x2264, 0x2260, 0x2265, 0x222B,
213 0x2234, 0x221D, 0x221E, 0x00F7, 0x0394, 0x2207, 0x03A6, 0x0393,
214 0x223C, 0x2243, 0x0398, 0x00D7, 0x039B, 0x21D4, 0x21D2, 0x2261,
215 0x03A0, 0x03A8, 0x0, 0x03A3, 0x0, 0x0, 0x221A, 0x03A9,
216 0x039E, 0x03A5, 0x2282, 0x2283, 0x2229, 0x222A, 0x2227, 0x2228,
217 0x00AC, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3,
218 0x03B7, 0x03B9, 0x03B8, 0x03BA, 0x03BB, 0x0, 0x03BD, 0x2202,
219 0x03C0, 0x03C8, 0x03C1, 0x03C3, 0x03C4, 0x0, 0x0192, 0x03C9,
220 0x03BE, 0x03C5, 0x03B6, 0x2190, 0x2191, 0x2192, 0x2193 };
221
222 static void (*iso2022kr_conv_init)(ef_conv_t *);
223 static void (*iso2022kr_parser_init)(ef_parser_t *);
224
225 /* --- static functions --- */
226
ovrd_iso2022kr_conv_init(ef_conv_t * conv)227 static void ovrd_iso2022kr_conv_init(ef_conv_t *conv) {
228 u_char buf[5];
229 ef_parser_t *parser;
230
231 (*iso2022kr_conv_init)(conv);
232
233 if ((parser = ef_iso2022kr_parser_new()) == NULL) {
234 return;
235 }
236
237 /* designating KSC5601 to G1 */
238 (*parser->set_str)(parser, "\x1b$)Ca", 5);
239
240 /* this returns sequence of designating KSC5601 to G1 */
241 (*conv->convert)(conv, buf, sizeof(buf), parser);
242
243 (*parser->destroy)(parser);
244 }
245
ovrd_iso2022kr_parser_init(ef_parser_t * parser)246 static void ovrd_iso2022kr_parser_init(ef_parser_t *parser) {
247 u_char buf[5];
248 ef_conv_t *conv;
249
250 (*iso2022kr_parser_init)(parser);
251
252 if ((conv = ef_iso2022kr_conv_new()) == NULL) {
253 return;
254 }
255
256 /* designating KSC5601 to G1 */
257 (*parser->set_str)(parser, "\x1b$)Ca", 5);
258
259 /* this returns sequence of designating KSC5601 to G1 */
260 (*conv->convert)(conv, buf, sizeof(buf), parser);
261
262 (*conv->destroy)(conv);
263 }
264
iso2022_illegal_char(ef_conv_t * conv,u_char * dst,size_t dst_size,int * is_full,ef_char_t * ch)265 static size_t iso2022_illegal_char(ef_conv_t *conv, u_char *dst, size_t dst_size, int *is_full,
266 ef_char_t *ch) {
267 if (ch->cs == ISO10646_UCS4_1) {
268 vt_convert_unicode_pua_to_drcs(ch);
269 }
270
271 return ef_iso2022_illegal_char(conv, dst, dst_size, is_full, ch);
272 }
273
non_iso2022_illegal_char(ef_conv_t * conv,u_char * dst,size_t dst_size,int * is_full,ef_char_t * ch)274 static size_t non_iso2022_illegal_char(ef_conv_t *conv, u_char *dst, size_t dst_size, int *is_full,
275 ef_char_t *ch) {
276 *is_full = 0;
277
278 if (ch->cs == DEC_SPECIAL) {
279 if (dst_size < 7) {
280 *is_full = 1;
281
282 return 0;
283 }
284
285 dst[0] = '\x1b';
286 dst[1] = '(';
287 dst[2] = '0';
288 dst[3] = ch->ch[0];
289 dst[4] = '\x1b';
290 dst[5] = '(';
291 dst[6] = 'B';
292
293 return 7;
294 } else {
295 return 0;
296 }
297 }
298
utf8_illegal_char(ef_conv_t * conv,u_char * dst,size_t dst_size,int * is_full,ef_char_t * ch)299 static size_t utf8_illegal_char(ef_conv_t *conv, u_char *dst, size_t dst_size, int *is_full,
300 ef_char_t *ch) {
301 *is_full = 0;
302
303 if (ch->cs == DEC_SPECIAL) {
304 u_int16_t utf16;
305
306 if (dst_size < 3) {
307 *is_full = 1;
308 } else if ((utf16 = vt_convert_decsp_to_ucs(ef_char_to_int(ch)))) {
309 dst[0] = ((utf16 >> 12) & 0x0f) | 0xe0;
310 dst[1] = ((utf16 >> 6) & 0x3f) | 0x80;
311 dst[2] = (utf16 & 0x3f) | 0x80;
312
313 return 3;
314 }
315 }
316
317 return 0;
318 }
319
320 /* --- global functions --- */
321
vt_get_char_encoding_name(vt_char_encoding_t encoding)322 char *vt_get_char_encoding_name(vt_char_encoding_t encoding) {
323 if (encoding < 0 || MAX_CHAR_ENCODINGS <= encoding) {
324 return "ISO88591";
325 } else {
326 return encoding_table[encoding].name;
327 }
328 }
329
vt_get_char_encoding(const char * name)330 vt_char_encoding_t vt_get_char_encoding(const char *name /* '_' and '-' are ignored. */
331 ) {
332 int count;
333 char *_name;
334 char *encoding;
335 char *p;
336
337 /*
338 * duplicating name so as not to destroy its memory.
339 */
340 if ((_name = alloca(strlen(name) + 1)) == NULL ||
341 (encoding = alloca(strlen(name) + 1)) == NULL) {
342 return VT_UNKNOWN_ENCODING;
343 }
344 strcpy(_name, name);
345 encoding[0] = '\0';
346
347 /*
348 * removing '-' and '_' from name.
349 */
350 while ((p = bl_str_sep(&_name, "-_")) != NULL) {
351 strcat(encoding, p);
352 }
353
354 #ifdef __DEBUG
355 bl_debug_printf(BL_DEBUG_TAG " encoding -> %s.\n", encoding);
356 #endif
357
358 if (strcasecmp(encoding, "auto") == 0) {
359 /*
360 * XXX
361 * UTF-8 is used by default in cygwin, msys, win32, android and osx.
362 * (On osx, if mlterm.app is started from Finder,
363 * vt_get_char_encoding("auto") returns VT_ISO88591.)
364 * Note that vt_get_char_encoding("auto") is used to set character encoding
365 * of window/icon title string, not only to determine character encoding.
366 * (see vt_parser.c)
367 */
368 #if !defined(__CYGWIN__) && !defined(__MSYS__) && !defined(USE_WIN32API) && \
369 !defined(__ANDROID__) && !defined(__APPLE__)
370 vt_char_encoding_t e;
371
372 if ((e = vt_get_char_encoding(bl_get_codeset())) != VT_UNKNOWN_ENCODING) {
373 return e;
374 }
375 #endif
376
377 return VT_UTF8;
378 }
379
380 for (count = 0; count < sizeof(encoding_table) / sizeof(encoding_table_t); count++) {
381 if (strcasecmp(encoding, encoding_table[count].name) == 0) {
382 return encoding_table[count].encoding;
383 }
384 }
385
386 return VT_UNKNOWN_ENCODING;
387 }
388
vt_char_encoding_parser_new(vt_char_encoding_t encoding)389 ef_parser_t *vt_char_encoding_parser_new(vt_char_encoding_t encoding) {
390 ef_parser_t *parser;
391
392 if (encoding < 0 || MAX_CHAR_ENCODINGS <= encoding ||
393 encoding_table[encoding].encoding != encoding) {
394 #ifdef DEBUG
395 bl_warn_printf(BL_DEBUG_TAG " %d is illegal encoding.\n", encoding);
396 #endif
397
398 return NULL;
399 }
400
401 if ((parser = (*encoding_table[encoding].parser_new)()) == NULL) {
402 return NULL;
403 }
404
405 if (encoding == VT_ISO2022KR) {
406 /* overriding init method */
407
408 iso2022kr_parser_init = parser->init;
409 parser->init = ovrd_iso2022kr_parser_init;
410
411 (*parser->init)(parser);
412 }
413
414 return parser;
415 }
416
vt_char_encoding_conv_new(vt_char_encoding_t encoding)417 ef_conv_t *vt_char_encoding_conv_new(vt_char_encoding_t encoding) {
418 ef_conv_t *conv;
419
420 if (encoding < 0 || MAX_CHAR_ENCODINGS <= encoding ||
421 encoding_table[encoding].encoding != encoding) {
422 #ifdef DEBUG
423 bl_warn_printf(BL_DEBUG_TAG " %d is illegal encoding.\n", encoding);
424 #endif
425
426 return NULL;
427 }
428
429 if ((conv = (*encoding_table[encoding].conv_new)()) == NULL) {
430 return NULL;
431 }
432
433 if (encoding == VT_UTF8) {
434 conv->illegal_char = utf8_illegal_char;
435 } else if (IS_ENCODING_BASED_ON_ISO2022(encoding)) {
436 if (encoding == VT_ISO2022KR) {
437 /* overriding init method */
438
439 iso2022kr_conv_init = conv->init;
440 conv->init = ovrd_iso2022kr_conv_init;
441
442 (*conv->init)(conv);
443 }
444 }
445
446 return conv;
447 }
448
vt_is_msb_set(ef_charset_t cs)449 int vt_is_msb_set(ef_charset_t cs) {
450 if (msb_set_cs_table[0] <= cs &&
451 cs <= msb_set_cs_table[sizeof(msb_set_cs_table) / sizeof(msb_set_cs_table[0]) - 1]) {
452 int count;
453
454 for (count = 0; count < sizeof(msb_set_cs_table) / sizeof(msb_set_cs_table[0]); count++) {
455 if (msb_set_cs_table[count] == cs) {
456 return 1;
457 }
458 }
459 }
460
461 return 0;
462 }
463
vt_char_encoding_convert(u_char * dst,size_t dst_len,vt_char_encoding_t dst_encoding,u_char * src,size_t src_len,vt_char_encoding_t src_encoding)464 size_t vt_char_encoding_convert(u_char *dst, size_t dst_len, vt_char_encoding_t dst_encoding,
465 u_char *src, size_t src_len, vt_char_encoding_t src_encoding) {
466 ef_parser_t *parser;
467 size_t filled_len;
468
469 if ((parser = vt_char_encoding_parser_new(src_encoding)) == NULL) {
470 return 0;
471 }
472
473 (*parser->init)(parser);
474 (*parser->set_str)(parser, src, src_len);
475 filled_len = vt_char_encoding_convert_with_parser(dst, dst_len, dst_encoding, parser);
476 (*parser->destroy)(parser);
477
478 return filled_len;
479 }
480
vt_char_encoding_convert_with_parser(u_char * dst,size_t dst_len,vt_char_encoding_t dst_encoding,ef_parser_t * parser)481 size_t vt_char_encoding_convert_with_parser(u_char *dst, size_t dst_len,
482 vt_char_encoding_t dst_encoding, ef_parser_t *parser) {
483 ef_conv_t *conv;
484 size_t filled_len;
485
486 if ((conv = vt_char_encoding_conv_new(dst_encoding)) == NULL) {
487 return 0;
488 }
489
490 (*conv->init)(conv);
491 filled_len = (*conv->convert)(conv, dst, dst_len, parser);
492 (*conv->destroy)(conv);
493
494 return filled_len;
495 }
496
vt_parse_unicode_area(const char * str,u_int * min,u_int * max)497 int vt_parse_unicode_area(const char *str, u_int *min, u_int *max) {
498 if (sscanf(str, "U+%x-%x", min, max) != 2) {
499 if (sscanf(str, "U+%x", min) != 1) {
500 goto error;
501 } else {
502 *max = *min;
503 }
504 } else if (*min > *max) {
505 goto error;
506 }
507
508 return 1;
509
510 error:
511 bl_msg_printf("Illegal unicode area format: %s\n", str);
512
513 return 0;
514 }
515
516 /* XXX This function should be moved to mef */
vt_convert_ucs_to_decsp(u_int16_t ucs)517 u_char vt_convert_ucs_to_decsp(u_int16_t ucs) {
518 int l_idx;
519 int h_idx;
520 int idx;
521
522 l_idx = 0;
523 h_idx = sizeof(ucs_to_decsp_table) / sizeof(ucs_to_decsp_table[0]) - 1;
524
525 if (ucs < ucs_to_decsp_table[l_idx].ucs || ucs_to_decsp_table[h_idx].ucs < ucs) {
526 return 0;
527 }
528
529 while (1) {
530 idx = (l_idx + h_idx) / 2;
531
532 if (ucs == ucs_to_decsp_table[idx].ucs) {
533 return ucs_to_decsp_table[idx].decsp;
534 } else if (ucs < ucs_to_decsp_table[idx].ucs) {
535 h_idx = idx;
536 } else {
537 l_idx = idx + 1;
538 }
539
540 if (l_idx >= h_idx) {
541 return 0;
542 }
543 }
544 }
545
546 /* XXX This function should be moved to mef */
vt_convert_decsp_to_ucs(u_char decsp)547 u_int16_t vt_convert_decsp_to_ucs(u_char decsp) {
548 if ('`' <= decsp && decsp <= 'x') {
549 int count;
550
551 for (count = 0; count < sizeof(ucs_to_decsp_table) / sizeof(ucs_to_decsp_table[0]); count++) {
552 if (ucs_to_decsp_table[count].decsp == decsp) {
553 return ucs_to_decsp_table[count].ucs;
554 }
555 }
556 }
557
558 return 0;
559 }
560
561 /* XXX This function should be moved to mef */
vt_convert_dectech_to_ucs(u_char dectech)562 u_int16_t vt_convert_dectech_to_ucs(u_char dectech) {
563 if (0x21 <= dectech && dectech <= 0x7e) {
564 return dectech_to_ucs_table[dectech - 0x21];
565 }
566
567 return 0;
568 }
569
vt_char_encoding_conv_set_use_loose_rule(ef_conv_t * conv,vt_char_encoding_t encoding,int flag)570 void vt_char_encoding_conv_set_use_loose_rule(ef_conv_t *conv, vt_char_encoding_t encoding,
571 int flag) {
572 if (flag) {
573 if (IS_ENCODING_BASED_ON_ISO2022(encoding)) {
574 conv->illegal_char = iso2022_illegal_char;
575 } else {
576 conv->illegal_char = non_iso2022_illegal_char;
577 }
578 } else {
579 if (encoding == VT_UTF8) {
580 conv->illegal_char = utf8_illegal_char;
581 } else {
582 conv->illegal_char = NULL;
583 }
584 }
585 }
586