1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2 
3 #include "vt_char_encoding.h"
4 
5 #include <stdio.h> /* sscanf */
6 
7 #include <pobl/bl_str.h> /* bl_str_sep */
8 #include <pobl/bl_debug.h>
9 #include <pobl/bl_mem.h>    /* alloca */
10 #include <pobl/bl_locale.h> /* bl_get_codeset */
11 
12 #include <mef/ef_iso8859_parser.h>
13 #include <mef/ef_8bit_parser.h>
14 #include <mef/ef_eucjp_parser.h>
15 #include <mef/ef_euckr_parser.h>
16 #include <mef/ef_euccn_parser.h>
17 #include <mef/ef_euctw_parser.h>
18 #include <mef/ef_iso2022jp_parser.h>
19 #include <mef/ef_iso2022kr_parser.h>
20 #include <mef/ef_iso2022cn_parser.h>
21 #include <mef/ef_sjis_parser.h>
22 #include <mef/ef_johab_parser.h>
23 #include <mef/ef_big5_parser.h>
24 #include <mef/ef_hz_parser.h>
25 #include <mef/ef_utf8_parser.h>
26 
27 #include <mef/ef_iso8859_conv.h>
28 #include <mef/ef_8bit_conv.h>
29 #include <mef/ef_eucjp_conv.h>
30 #include <mef/ef_euckr_conv.h>
31 #include <mef/ef_euccn_conv.h>
32 #include <mef/ef_euctw_conv.h>
33 #include <mef/ef_iso2022jp_conv.h>
34 #include <mef/ef_iso2022kr_conv.h>
35 #include <mef/ef_iso2022cn_conv.h>
36 #include <mef/ef_sjis_conv.h>
37 #include <mef/ef_johab_conv.h>
38 #include <mef/ef_big5_conv.h>
39 #include <mef/ef_hz_conv.h>
40 #include <mef/ef_utf8_conv.h>
41 
42 #include <mef/ef_iso2022_conv.h> /* ef_iso2022_illegal_char */
43 
44 #include "vt_drcs.h"
45 
46 typedef struct encoding_table {
47   vt_char_encoding_t encoding;
48   char *name;
49   ef_parser_t *(*parser_new)(void);
50   ef_conv_t *(*conv_new)(void);
51 
52 } encoding_table_t;
53 
54 /* --- static variables --- */
55 
56 /*
57  * !!! Notice !!!
58  * The order should be the same as vt_char_encoding_t in vt_char_encoding.h
59  * If the order is changed, x_font_manager.c:usascii_font_cs_table should be
60  * also changed.
61  */
62 static encoding_table_t encoding_table[] = {
63   { VT_ISO8859_1, "ISO88591", ef_iso8859_1_parser_new, ef_iso8859_1_conv_new, },
64   { VT_ISO8859_2, "ISO88592", ef_iso8859_2_parser_new, ef_iso8859_2_conv_new, },
65   { VT_ISO8859_3, "ISO88593", ef_iso8859_3_parser_new, ef_iso8859_3_conv_new, },
66   { VT_ISO8859_4, "ISO88594", ef_iso8859_4_parser_new, ef_iso8859_4_conv_new, },
67   { VT_ISO8859_5, "ISO88595", ef_iso8859_5_parser_new, ef_iso8859_5_conv_new, },
68   { VT_ISO8859_6, "ISO88596", ef_iso8859_6_parser_new, ef_iso8859_6_conv_new, },
69   { VT_ISO8859_7, "ISO88597", ef_iso8859_7_parser_new, ef_iso8859_7_conv_new, },
70   { VT_ISO8859_8, "ISO88598", ef_iso8859_8_parser_new, ef_iso8859_8_conv_new, },
71   { VT_ISO8859_9, "ISO88599", ef_iso8859_9_parser_new, ef_iso8859_9_conv_new, },
72   { VT_ISO8859_10, "ISO885910", ef_iso8859_10_parser_new, ef_iso8859_10_conv_new, },
73   { VT_TIS620, "ISO885911", ef_tis620_2533_parser_new, ef_tis620_2533_conv_new, },
74   { VT_ISO8859_13, "ISO885913", ef_iso8859_13_parser_new, ef_iso8859_13_conv_new, },
75   { VT_ISO8859_14, "ISO885914", ef_iso8859_14_parser_new, ef_iso8859_14_conv_new, },
76   { VT_ISO8859_15, "ISO885915", ef_iso8859_15_parser_new, ef_iso8859_15_conv_new, },
77   { VT_ISO8859_16, "ISO885916", ef_iso8859_16_parser_new, ef_iso8859_16_conv_new, },
78   { VT_TCVN5712, "TCVN5712", ef_tcvn5712_3_1993_parser_new, ef_tcvn5712_3_1993_conv_new, },
79 
80   { VT_ISCII_ASSAMESE, "ISCIIASSAMESE", ef_iscii_assamese_parser_new, ef_iscii_assamese_conv_new, },
81   { VT_ISCII_BENGALI, "ISCIIBENGALI", ef_iscii_bengali_parser_new, ef_iscii_bengali_conv_new, },
82   { VT_ISCII_GUJARATI, "ISCIIGUJARATI", ef_iscii_gujarati_parser_new, ef_iscii_gujarati_conv_new, },
83   { VT_ISCII_HINDI, "ISCIIHINDI", ef_iscii_hindi_parser_new, ef_iscii_hindi_conv_new, },
84   { VT_ISCII_KANNADA, "ISCIIKANNADA", ef_iscii_kannada_parser_new, ef_iscii_kannada_conv_new, },
85   { VT_ISCII_MALAYALAM, "ISCIIMALAYALAM", ef_iscii_malayalam_parser_new,
86     ef_iscii_malayalam_conv_new, },
87   { VT_ISCII_ORIYA, "ISCIIORIYA", ef_iscii_oriya_parser_new, ef_iscii_oriya_conv_new, },
88   { VT_ISCII_PUNJABI, "ISCIIPUNJABI", ef_iscii_punjabi_parser_new, ef_iscii_punjabi_conv_new, },
89   { VT_ISCII_TELUGU, "ISCIITELUGU", ef_iscii_telugu_parser_new, ef_iscii_telugu_conv_new, },
90   { VT_VISCII, "VISCII", ef_viscii_parser_new, ef_viscii_conv_new, },
91   { VT_KOI8_R, "KOI8R", ef_koi8_r_parser_new, ef_koi8_r_conv_new, },
92   { VT_KOI8_U, "KOI8U", ef_koi8_u_parser_new, ef_koi8_u_conv_new, },
93   { VT_KOI8_T, "KOI8T", ef_koi8_t_parser_new, ef_koi8_t_conv_new, },
94   { VT_GEORGIAN_PS, "GEORGIANPS", ef_georgian_ps_parser_new, ef_georgian_ps_conv_new, },
95   { VT_CP1250, "CP1250", ef_cp1250_parser_new, ef_cp1250_conv_new, },
96   { VT_CP1251, "CP1251", ef_cp1251_parser_new, ef_cp1251_conv_new, },
97   { VT_CP1252, "CP1252", ef_cp1252_parser_new, ef_cp1252_conv_new, },
98   { VT_CP1253, "CP1253", ef_cp1253_parser_new, ef_cp1253_conv_new, },
99   { VT_CP1254, "CP1254", ef_cp1254_parser_new, ef_cp1254_conv_new, },
100   { VT_CP1255, "CP1255", ef_cp1255_parser_new, ef_cp1255_conv_new, },
101   { VT_CP1256, "CP1256", ef_cp1256_parser_new, ef_cp1256_conv_new, },
102   { VT_CP1257, "CP1257", ef_cp1257_parser_new, ef_cp1257_conv_new, },
103   { VT_CP1258, "CP1258", ef_cp1258_parser_new, ef_cp1258_conv_new, },
104   { VT_CP874, "CP874", ef_cp874_parser_new, ef_cp874_conv_new, },
105 
106   { VT_UTF8, "UTF8", ef_utf8_parser_new, ef_utf8_conv_new, },
107 
108   { VT_EUCJP, "EUCJP", ef_eucjp_parser_new, ef_eucjp_conv_new, },
109   { VT_EUCJISX0213, "EUCJISX0213", ef_eucjisx0213_parser_new, ef_eucjisx0213_conv_new, },
110   { VT_ISO2022JP, "ISO2022JP", ef_iso2022jp_7_parser_new, ef_iso2022jp_7_conv_new, },
111   { VT_ISO2022JP2, "ISO2022JP2", ef_iso2022jp2_parser_new, ef_iso2022jp2_conv_new, },
112   { VT_ISO2022JP3, "ISO2022JP3", ef_iso2022jp3_parser_new, ef_iso2022jp3_conv_new, },
113   { VT_SJIS, "SJIS", ef_sjis_parser_new, ef_sjis_conv_new, },
114   { VT_SJISX0213, "SJISX0213", ef_sjisx0213_parser_new, ef_sjisx0213_conv_new, },
115 
116   { VT_EUCKR, "EUCKR", ef_euckr_parser_new, ef_euckr_conv_new, },
117   { VT_UHC, "UHC", ef_uhc_parser_new, ef_uhc_conv_new, },
118   { VT_JOHAB, "JOHAB", ef_johab_parser_new, ef_johab_conv_new, },
119   { VT_ISO2022KR, "ISO2022KR", ef_iso2022kr_parser_new, ef_iso2022kr_conv_new, },
120 
121   { VT_BIG5, "BIG5", ef_big5_parser_new, ef_big5_conv_new, },
122   { VT_EUCTW, "EUCTW", ef_euctw_parser_new, ef_euctw_conv_new, },
123 
124   { VT_BIG5HKSCS, "BIG5HKSCS", ef_big5hkscs_parser_new, ef_big5hkscs_conv_new, },
125 
126   /* not listed in IANA. GB2312 is usually used instead. */
127   { VT_EUCCN, "EUCCN", ef_euccn_parser_new, ef_euccn_conv_new, },
128   { VT_GBK, "GBK", ef_gbk_parser_new, ef_gbk_conv_new, },
129   { VT_GB18030, "GB18030", ef_gb18030_2000_parser_new, ef_gb18030_2000_conv_new, },
130   { VT_HZ, "HZ", ef_hz_parser_new, ef_hz_conv_new, },
131 
132   { VT_ISO2022CN, "ISO2022CN", ef_iso2022cn_parser_new, ef_iso2022cn_conv_new, },
133 
134   /*
135    * alternative names.
136    * these are not used in vt_{parser|conv}_new , so parser_new/parser_conv
137    * members are not necessary.
138    */
139 
140   { VT_TIS620, "TIS620", },
141 
142 #if 0
143   /* XXX necessary ? */
144   { VT_EUCJP, "EXTENDEDUNIXCODEPACKEDFORMATFORJAPANESE", }, /* MIME */
145   { VT_EUCJP, "CSEUCPKDFMTJAPANESE", }, /* MIME */
146 #endif
147   { VT_EUCJP, "UJIS" },
148   { VT_SJIS, "SHIFTJIS", }, /* MIME */
149 
150   { VT_EUCKR, "KSC56011987", }, /* for IIS error page(IIS bug?) */
151 
152   { VT_EUCCN, "GB2312", },
153 
154   { VT_HZ, "HZGB2312", },
155 };
156 
157 /*
158  * MSB of these charsets are not set , but must be set manually for X font.
159  * These charsets are placed in an ascending order.
160  */
161 static u_int16_t /* ef_charset_t */ msb_set_cs_table[] = {
162     JISX0201_KATA, ISO8859_1_R,  ISO8859_2_R,  ISO8859_3_R,  ISO8859_4_R,     ISO8859_5_R,
163     ISO8859_6_R,   ISO8859_7_R,  ISO8859_8_R,  ISO8859_9_R,  ISO8859_10_R,    TIS620_2533,
164     ISO8859_13_R,  ISO8859_14_R, ISO8859_15_R, ISO8859_16_R, TCVN5712_3_1993,
165 
166 };
167 
168 static struct {
169   u_int16_t ucs;
170   u_char decsp;
171 
172 } ucs_to_decsp_table[] = {
173   {0xa0, '_'},
174   {0xa3, '}'},
175   {0xb0, 'f'},
176   {0xb1, 'g'},
177   {0xb7, '~'},
178   {0x3c0, '{'},
179   {0x2260, '|'},
180   {0x2264, 'y'},
181   {0x2265, 'z'},
182   {0x23ba, 'o'},
183   {0x23bb, 'p'},
184   {0x23bc, 'r'},
185   {0x23bd, 's'},
186   {0x2409, 'b'},
187   {0x240a, 'e'},
188   {0x240b, 'i'},
189   {0x240c, 'c'},
190   {0x240d, 'd'},
191   {0x2424, 'h'},
192   {0x2500, 'q'},
193   {0x2502, 'x'},
194   {0x250c, 'l'},
195   {0x2510, 'k'},
196   {0x2514, 'm'},
197   {0x2518, 'j'},
198   {0x251c, 't'},
199   {0x2524, 'u'},
200   {0x252c, 'w'},
201   {0x2534, 'v'},
202   {0x253c, 'n'},
203   {0x2592, 'a'},
204   {0x25c6, '`'},
205 };
206 
207 /* 0x21 - 0x7e */
208 static u_int16_t dectech_to_ucs_table[] = {
209           0x23B7, 0x250C, 0x2500, 0x2320, 0x2321, 0x2502, 0x23A1,
210   0x23A3, 0x23A4, 0x23A6, 0x239B, 0x239D, 0x239E, 0x23A0, 0x23A8,
211   0x23AC, 0x0,    0x0,    0x0,    0x0,    0x0,    0x0,    0x0,
212   0x0,    0x0,    0x0,    0x0,    0x2264, 0x2260, 0x2265, 0x222B,
213   0x2234, 0x221D, 0x221E, 0x00F7, 0x0394, 0x2207, 0x03A6, 0x0393,
214   0x223C, 0x2243, 0x0398, 0x00D7, 0x039B, 0x21D4, 0x21D2, 0x2261,
215   0x03A0, 0x03A8, 0x0,    0x03A3, 0x0,    0x0,    0x221A, 0x03A9,
216   0x039E, 0x03A5, 0x2282, 0x2283, 0x2229, 0x222A, 0x2227, 0x2228,
217   0x00AC, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3,
218   0x03B7, 0x03B9, 0x03B8, 0x03BA, 0x03BB, 0x0,    0x03BD, 0x2202,
219   0x03C0, 0x03C8, 0x03C1, 0x03C3, 0x03C4, 0x0,    0x0192, 0x03C9,
220   0x03BE, 0x03C5, 0x03B6, 0x2190, 0x2191, 0x2192, 0x2193 };
221 
222 static void (*iso2022kr_conv_init)(ef_conv_t *);
223 static void (*iso2022kr_parser_init)(ef_parser_t *);
224 
225 /* --- static functions --- */
226 
ovrd_iso2022kr_conv_init(ef_conv_t * conv)227 static void ovrd_iso2022kr_conv_init(ef_conv_t *conv) {
228   u_char buf[5];
229   ef_parser_t *parser;
230 
231   (*iso2022kr_conv_init)(conv);
232 
233   if ((parser = ef_iso2022kr_parser_new()) == NULL) {
234     return;
235   }
236 
237   /* designating KSC5601 to G1 */
238   (*parser->set_str)(parser, "\x1b$)Ca", 5);
239 
240   /* this returns sequence of designating KSC5601 to G1 */
241   (*conv->convert)(conv, buf, sizeof(buf), parser);
242 
243   (*parser->destroy)(parser);
244 }
245 
ovrd_iso2022kr_parser_init(ef_parser_t * parser)246 static void ovrd_iso2022kr_parser_init(ef_parser_t *parser) {
247   u_char buf[5];
248   ef_conv_t *conv;
249 
250   (*iso2022kr_parser_init)(parser);
251 
252   if ((conv = ef_iso2022kr_conv_new()) == NULL) {
253     return;
254   }
255 
256   /* designating KSC5601 to G1 */
257   (*parser->set_str)(parser, "\x1b$)Ca", 5);
258 
259   /* this returns sequence of designating KSC5601 to G1 */
260   (*conv->convert)(conv, buf, sizeof(buf), parser);
261 
262   (*conv->destroy)(conv);
263 }
264 
iso2022_illegal_char(ef_conv_t * conv,u_char * dst,size_t dst_size,int * is_full,ef_char_t * ch)265 static size_t iso2022_illegal_char(ef_conv_t *conv, u_char *dst, size_t dst_size, int *is_full,
266                                    ef_char_t *ch) {
267   if (ch->cs == ISO10646_UCS4_1) {
268     vt_convert_unicode_pua_to_drcs(ch);
269   }
270 
271   return ef_iso2022_illegal_char(conv, dst, dst_size, is_full, ch);
272 }
273 
non_iso2022_illegal_char(ef_conv_t * conv,u_char * dst,size_t dst_size,int * is_full,ef_char_t * ch)274 static size_t non_iso2022_illegal_char(ef_conv_t *conv, u_char *dst, size_t dst_size, int *is_full,
275                                        ef_char_t *ch) {
276   *is_full = 0;
277 
278   if (ch->cs == DEC_SPECIAL) {
279     if (dst_size < 7) {
280       *is_full = 1;
281 
282       return 0;
283     }
284 
285     dst[0] = '\x1b';
286     dst[1] = '(';
287     dst[2] = '0';
288     dst[3] = ch->ch[0];
289     dst[4] = '\x1b';
290     dst[5] = '(';
291     dst[6] = 'B';
292 
293     return 7;
294   } else {
295     return 0;
296   }
297 }
298 
utf8_illegal_char(ef_conv_t * conv,u_char * dst,size_t dst_size,int * is_full,ef_char_t * ch)299 static size_t utf8_illegal_char(ef_conv_t *conv, u_char *dst, size_t dst_size, int *is_full,
300                                 ef_char_t *ch) {
301   *is_full = 0;
302 
303   if (ch->cs == DEC_SPECIAL) {
304     u_int16_t utf16;
305 
306     if (dst_size < 3) {
307       *is_full = 1;
308     } else if ((utf16 = vt_convert_decsp_to_ucs(ef_char_to_int(ch)))) {
309       dst[0] = ((utf16 >> 12) & 0x0f) | 0xe0;
310       dst[1] = ((utf16 >> 6) & 0x3f) | 0x80;
311       dst[2] = (utf16 & 0x3f) | 0x80;
312 
313       return 3;
314     }
315   }
316 
317   return 0;
318 }
319 
320 /* --- global functions --- */
321 
vt_get_char_encoding_name(vt_char_encoding_t encoding)322 char *vt_get_char_encoding_name(vt_char_encoding_t encoding) {
323   if (encoding < 0 || MAX_CHAR_ENCODINGS <= encoding) {
324     return "ISO88591";
325   } else {
326     return encoding_table[encoding].name;
327   }
328 }
329 
vt_get_char_encoding(const char * name)330 vt_char_encoding_t vt_get_char_encoding(const char *name /* '_' and '-' are ignored. */
331                                         ) {
332   int count;
333   char *_name;
334   char *encoding;
335   char *p;
336 
337   /*
338    * duplicating name so as not to destroy its memory.
339    */
340   if ((_name = alloca(strlen(name) + 1)) == NULL ||
341       (encoding = alloca(strlen(name) + 1)) == NULL) {
342     return VT_UNKNOWN_ENCODING;
343   }
344   strcpy(_name, name);
345   encoding[0] = '\0';
346 
347   /*
348    * removing '-' and '_' from name.
349    */
350   while ((p = bl_str_sep(&_name, "-_")) != NULL) {
351     strcat(encoding, p);
352   }
353 
354 #ifdef __DEBUG
355   bl_debug_printf(BL_DEBUG_TAG " encoding -> %s.\n", encoding);
356 #endif
357 
358   if (strcasecmp(encoding, "auto") == 0) {
359     /*
360      * XXX
361      * UTF-8 is used by default in cygwin, msys, win32, android and osx.
362      * (On osx, if mlterm.app is started from Finder,
363      * vt_get_char_encoding("auto") returns VT_ISO88591.)
364      * Note that vt_get_char_encoding("auto") is used to set character encoding
365      * of window/icon title string, not only to determine character encoding.
366      * (see vt_parser.c)
367      */
368 #if !defined(__CYGWIN__) && !defined(__MSYS__) && !defined(USE_WIN32API) && \
369   !defined(__ANDROID__) && !defined(__APPLE__)
370     vt_char_encoding_t e;
371 
372     if ((e = vt_get_char_encoding(bl_get_codeset())) != VT_UNKNOWN_ENCODING) {
373       return e;
374     }
375 #endif
376 
377     return VT_UTF8;
378   }
379 
380   for (count = 0; count < sizeof(encoding_table) / sizeof(encoding_table_t); count++) {
381     if (strcasecmp(encoding, encoding_table[count].name) == 0) {
382       return encoding_table[count].encoding;
383     }
384   }
385 
386   return VT_UNKNOWN_ENCODING;
387 }
388 
vt_char_encoding_parser_new(vt_char_encoding_t encoding)389 ef_parser_t *vt_char_encoding_parser_new(vt_char_encoding_t encoding) {
390   ef_parser_t *parser;
391 
392   if (encoding < 0 || MAX_CHAR_ENCODINGS <= encoding ||
393       encoding_table[encoding].encoding != encoding) {
394 #ifdef DEBUG
395     bl_warn_printf(BL_DEBUG_TAG " %d is illegal encoding.\n", encoding);
396 #endif
397 
398     return NULL;
399   }
400 
401   if ((parser = (*encoding_table[encoding].parser_new)()) == NULL) {
402     return NULL;
403   }
404 
405   if (encoding == VT_ISO2022KR) {
406     /* overriding init method */
407 
408     iso2022kr_parser_init = parser->init;
409     parser->init = ovrd_iso2022kr_parser_init;
410 
411     (*parser->init)(parser);
412   }
413 
414   return parser;
415 }
416 
vt_char_encoding_conv_new(vt_char_encoding_t encoding)417 ef_conv_t *vt_char_encoding_conv_new(vt_char_encoding_t encoding) {
418   ef_conv_t *conv;
419 
420   if (encoding < 0 || MAX_CHAR_ENCODINGS <= encoding ||
421       encoding_table[encoding].encoding != encoding) {
422 #ifdef DEBUG
423     bl_warn_printf(BL_DEBUG_TAG " %d is illegal encoding.\n", encoding);
424 #endif
425 
426     return NULL;
427   }
428 
429   if ((conv = (*encoding_table[encoding].conv_new)()) == NULL) {
430     return NULL;
431   }
432 
433   if (encoding == VT_UTF8) {
434     conv->illegal_char = utf8_illegal_char;
435   } else if (IS_ENCODING_BASED_ON_ISO2022(encoding)) {
436     if (encoding == VT_ISO2022KR) {
437       /* overriding init method */
438 
439       iso2022kr_conv_init = conv->init;
440       conv->init = ovrd_iso2022kr_conv_init;
441 
442       (*conv->init)(conv);
443     }
444   }
445 
446   return conv;
447 }
448 
vt_is_msb_set(ef_charset_t cs)449 int vt_is_msb_set(ef_charset_t cs) {
450   if (msb_set_cs_table[0] <= cs &&
451       cs <= msb_set_cs_table[sizeof(msb_set_cs_table) / sizeof(msb_set_cs_table[0]) - 1]) {
452     int count;
453 
454     for (count = 0; count < sizeof(msb_set_cs_table) / sizeof(msb_set_cs_table[0]); count++) {
455       if (msb_set_cs_table[count] == cs) {
456         return 1;
457       }
458     }
459   }
460 
461   return 0;
462 }
463 
vt_char_encoding_convert(u_char * dst,size_t dst_len,vt_char_encoding_t dst_encoding,u_char * src,size_t src_len,vt_char_encoding_t src_encoding)464 size_t vt_char_encoding_convert(u_char *dst, size_t dst_len, vt_char_encoding_t dst_encoding,
465                                 u_char *src, size_t src_len, vt_char_encoding_t src_encoding) {
466   ef_parser_t *parser;
467   size_t filled_len;
468 
469   if ((parser = vt_char_encoding_parser_new(src_encoding)) == NULL) {
470     return 0;
471   }
472 
473   (*parser->init)(parser);
474   (*parser->set_str)(parser, src, src_len);
475   filled_len = vt_char_encoding_convert_with_parser(dst, dst_len, dst_encoding, parser);
476   (*parser->destroy)(parser);
477 
478   return filled_len;
479 }
480 
vt_char_encoding_convert_with_parser(u_char * dst,size_t dst_len,vt_char_encoding_t dst_encoding,ef_parser_t * parser)481 size_t vt_char_encoding_convert_with_parser(u_char *dst, size_t dst_len,
482                                             vt_char_encoding_t dst_encoding, ef_parser_t *parser) {
483   ef_conv_t *conv;
484   size_t filled_len;
485 
486   if ((conv = vt_char_encoding_conv_new(dst_encoding)) == NULL) {
487     return 0;
488   }
489 
490   (*conv->init)(conv);
491   filled_len = (*conv->convert)(conv, dst, dst_len, parser);
492   (*conv->destroy)(conv);
493 
494   return filled_len;
495 }
496 
vt_parse_unicode_area(const char * str,u_int * min,u_int * max)497 int vt_parse_unicode_area(const char *str, u_int *min, u_int *max) {
498   if (sscanf(str, "U+%x-%x", min, max) != 2) {
499     if (sscanf(str, "U+%x", min) != 1) {
500       goto error;
501     } else {
502       *max = *min;
503     }
504   } else if (*min > *max) {
505     goto error;
506   }
507 
508   return 1;
509 
510 error:
511   bl_msg_printf("Illegal unicode area format: %s\n", str);
512 
513   return 0;
514 }
515 
516 /* XXX This function should be moved to mef */
vt_convert_ucs_to_decsp(u_int16_t ucs)517 u_char vt_convert_ucs_to_decsp(u_int16_t ucs) {
518   int l_idx;
519   int h_idx;
520   int idx;
521 
522   l_idx = 0;
523   h_idx = sizeof(ucs_to_decsp_table) / sizeof(ucs_to_decsp_table[0]) - 1;
524 
525   if (ucs < ucs_to_decsp_table[l_idx].ucs || ucs_to_decsp_table[h_idx].ucs < ucs) {
526     return 0;
527   }
528 
529   while (1) {
530     idx = (l_idx + h_idx) / 2;
531 
532     if (ucs == ucs_to_decsp_table[idx].ucs) {
533       return ucs_to_decsp_table[idx].decsp;
534     } else if (ucs < ucs_to_decsp_table[idx].ucs) {
535       h_idx = idx;
536     } else {
537       l_idx = idx + 1;
538     }
539 
540     if (l_idx >= h_idx) {
541       return 0;
542     }
543   }
544 }
545 
546 /* XXX This function should be moved to mef */
vt_convert_decsp_to_ucs(u_char decsp)547 u_int16_t vt_convert_decsp_to_ucs(u_char decsp) {
548   if ('`' <= decsp && decsp <= 'x') {
549     int count;
550 
551     for (count = 0; count < sizeof(ucs_to_decsp_table) / sizeof(ucs_to_decsp_table[0]); count++) {
552       if (ucs_to_decsp_table[count].decsp == decsp) {
553         return ucs_to_decsp_table[count].ucs;
554       }
555     }
556   }
557 
558   return 0;
559 }
560 
561 /* XXX This function should be moved to mef */
vt_convert_dectech_to_ucs(u_char dectech)562 u_int16_t vt_convert_dectech_to_ucs(u_char dectech) {
563   if (0x21 <= dectech && dectech <= 0x7e) {
564     return dectech_to_ucs_table[dectech - 0x21];
565   }
566 
567   return 0;
568 }
569 
vt_char_encoding_conv_set_use_loose_rule(ef_conv_t * conv,vt_char_encoding_t encoding,int flag)570 void vt_char_encoding_conv_set_use_loose_rule(ef_conv_t *conv, vt_char_encoding_t encoding,
571                                               int flag) {
572   if (flag) {
573     if (IS_ENCODING_BASED_ON_ISO2022(encoding)) {
574       conv->illegal_char = iso2022_illegal_char;
575     } else {
576       conv->illegal_char = non_iso2022_illegal_char;
577     }
578   } else {
579     if (encoding == VT_UTF8) {
580       conv->illegal_char = utf8_illegal_char;
581     } else {
582       conv->illegal_char = NULL;
583     }
584   }
585 }
586