1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2 
3 #include "ef_ucs4_map.h"
4 
5 #include <string.h>
6 #include <pobl/bl_debug.h>
7 
8 #include "ef_ucs4_iso8859.h"
9 #include "ef_ucs4_viscii.h"
10 #include "ef_ucs4_koi8.h"
11 #include "ef_ucs4_iscii.h"
12 #include "ef_ucs4_georgian_ps.h"
13 #include "ef_ucs4_cp125x.h"
14 #include "ef_ucs4_jisx0201.h"
15 #include "ef_ucs4_jisx0208.h"
16 #include "ef_ucs4_jisx0212.h"
17 #include "ef_ucs4_jisx0213.h"
18 #include "ef_ucs4_ksc5601.h"
19 #include "ef_ucs4_uhc.h"
20 #include "ef_ucs4_johab.h"
21 #include "ef_ucs4_gb2312.h"
22 #include "ef_ucs4_gbk.h"
23 #include "ef_ucs4_big5.h"
24 #include "ef_ucs4_cns11643.h"
25 
26 typedef struct map {
27   ef_charset_t cs;
28   int (*map_ucs4_to)(ef_char_t*, u_int32_t);
29   int (*map_to_ucs4)(ef_char_t*, u_int16_t);
30 
31 } map_t;
32 
33 /* --- static variables --- */
34 
35 static map_t map_table[] = {
36     {ISO8859_1_R, ef_map_ucs4_to_iso8859_1_r, ef_map_iso8859_1_r_to_ucs4},
37     {ISO8859_2_R, ef_map_ucs4_to_iso8859_2_r, ef_map_iso8859_2_r_to_ucs4},
38     {ISO8859_3_R, ef_map_ucs4_to_iso8859_3_r, ef_map_iso8859_3_r_to_ucs4},
39     {ISO8859_4_R, ef_map_ucs4_to_iso8859_4_r, ef_map_iso8859_4_r_to_ucs4},
40     {ISO8859_5_R, ef_map_ucs4_to_iso8859_5_r, ef_map_iso8859_5_r_to_ucs4},
41     {ISO8859_6_R, ef_map_ucs4_to_iso8859_6_r, ef_map_iso8859_6_r_to_ucs4},
42     {ISO8859_7_R, ef_map_ucs4_to_iso8859_7_r, ef_map_iso8859_7_r_to_ucs4},
43     {ISO8859_8_R, ef_map_ucs4_to_iso8859_8_r, ef_map_iso8859_8_r_to_ucs4},
44     {ISO8859_9_R, ef_map_ucs4_to_iso8859_9_r, ef_map_iso8859_9_r_to_ucs4},
45     {ISO8859_10_R, ef_map_ucs4_to_iso8859_10_r, ef_map_iso8859_10_r_to_ucs4},
46     {TIS620_2533, ef_map_ucs4_to_tis620_2533, ef_map_tis620_2533_to_ucs4},
47     {ISO8859_13_R, ef_map_ucs4_to_iso8859_13_r, ef_map_iso8859_13_r_to_ucs4},
48     {ISO8859_14_R, ef_map_ucs4_to_iso8859_14_r, ef_map_iso8859_14_r_to_ucs4},
49     {ISO8859_15_R, ef_map_ucs4_to_iso8859_15_r, ef_map_iso8859_15_r_to_ucs4},
50     {ISO8859_16_R, ef_map_ucs4_to_iso8859_16_r, ef_map_iso8859_16_r_to_ucs4},
51     {TCVN5712_3_1993, ef_map_ucs4_to_tcvn5712_3_1993, ef_map_tcvn5712_3_1993_to_ucs4},
52 
53     {VISCII, ef_map_ucs4_to_viscii, ef_map_viscii_to_ucs4},
54     {KOI8_R, ef_map_ucs4_to_koi8_r, ef_map_koi8_r_to_ucs4},
55     {KOI8_U, ef_map_ucs4_to_koi8_u, ef_map_koi8_u_to_ucs4},
56     {ISCII_ASSAMESE, ef_map_ucs4_to_iscii, ef_map_iscii_assamese_to_ucs4},
57     {ISCII_BENGALI, ef_map_ucs4_to_iscii, ef_map_iscii_bengali_to_ucs4},
58     {ISCII_GUJARATI, ef_map_ucs4_to_iscii, ef_map_iscii_gujarati_to_ucs4},
59     {ISCII_HINDI, ef_map_ucs4_to_iscii, ef_map_iscii_hindi_to_ucs4},
60     {ISCII_KANNADA, ef_map_ucs4_to_iscii, ef_map_iscii_kannada_to_ucs4},
61     {ISCII_MALAYALAM, ef_map_ucs4_to_iscii, ef_map_iscii_malayalam_to_ucs4},
62     {ISCII_ORIYA, ef_map_ucs4_to_iscii, ef_map_iscii_oriya_to_ucs4},
63     {ISCII_PUNJABI, ef_map_ucs4_to_iscii, ef_map_iscii_punjabi_to_ucs4},
64     {ISCII_TAMIL, ef_map_ucs4_to_iscii, ef_map_iscii_tamil_to_ucs4},
65     {ISCII_TELUGU, ef_map_ucs4_to_iscii, ef_map_iscii_telugu_to_ucs4},
66     {KOI8_T, ef_map_ucs4_to_koi8_t, ef_map_koi8_t_to_ucs4},
67     {GEORGIAN_PS, ef_map_ucs4_to_georgian_ps, ef_map_georgian_ps_to_ucs4},
68     {CP1250, ef_map_ucs4_to_cp1250, ef_map_cp1250_to_ucs4},
69     {CP1251, ef_map_ucs4_to_cp1251, ef_map_cp1251_to_ucs4},
70     {CP1252, ef_map_ucs4_to_cp1252, ef_map_cp1252_to_ucs4},
71     {CP1253, ef_map_ucs4_to_cp1253, ef_map_cp1253_to_ucs4},
72     {CP1254, ef_map_ucs4_to_cp1254, ef_map_cp1254_to_ucs4},
73     {CP1255, ef_map_ucs4_to_cp1255, ef_map_cp1255_to_ucs4},
74     {CP1256, ef_map_ucs4_to_cp1256, ef_map_cp1256_to_ucs4},
75     {CP1257, ef_map_ucs4_to_cp1257, ef_map_cp1257_to_ucs4},
76     {CP1258, ef_map_ucs4_to_cp1258, ef_map_cp1258_to_ucs4},
77     {CP874, ef_map_ucs4_to_cp874, ef_map_cp874_to_ucs4},
78 
79     {JISX0201_ROMAN, ef_map_ucs4_to_jisx0201_roman, ef_map_jisx0201_roman_to_ucs4},
80     {JISX0201_KATA, ef_map_ucs4_to_jisx0201_kata, ef_map_jisx0201_kata_to_ucs4},
81     {JISX0208_1983, ef_map_ucs4_to_jisx0208_1983, ef_map_jisx0208_1983_to_ucs4},
82     {JISX0212_1990, ef_map_ucs4_to_jisx0212_1990, ef_map_jisx0212_1990_to_ucs4},
83     {JISX0213_2000_1, ef_map_ucs4_to_jisx0213_2000_1, ef_map_jisx0213_2000_1_to_ucs4},
84     {JISX0213_2000_2, ef_map_ucs4_to_jisx0213_2000_2, ef_map_jisx0213_2000_2_to_ucs4},
85     {JISC6226_1978_NEC_EXT, ef_map_ucs4_to_jisx0208_nec_ext, ef_map_jisx0208_nec_ext_to_ucs4},
86     {JISC6226_1978_NECIBM_EXT, ef_map_ucs4_to_jisx0208_necibm_ext,
87      ef_map_jisx0208_necibm_ext_to_ucs4},
88     {SJIS_IBM_EXT, ef_map_ucs4_to_sjis_ibm_ext, ef_map_sjis_ibm_ext_to_ucs4},
89 
90     {GB2312_80, ef_map_ucs4_to_gb2312_80, ef_map_gb2312_80_to_ucs4},
91     {GBK, ef_map_ucs4_to_gbk, ef_map_gbk_to_ucs4},
92 
93     {CNS11643_1992_1, ef_map_ucs4_to_cns11643_1992_1, ef_map_cns11643_1992_1_to_ucs4},
94     {CNS11643_1992_2, ef_map_ucs4_to_cns11643_1992_2, ef_map_cns11643_1992_2_to_ucs4},
95     {CNS11643_1992_3, ef_map_ucs4_to_cns11643_1992_3, ef_map_cns11643_1992_3_to_ucs4},
96     {BIG5, ef_map_ucs4_to_big5, ef_map_big5_to_ucs4},
97     {HKSCS, ef_map_ucs4_to_hkscs, ef_map_hkscs_to_ucs4},
98 
99     {KSC5601_1987, ef_map_ucs4_to_ksc5601_1987, ef_map_ksc5601_1987_to_ucs4},
100     {UHC, ef_map_ucs4_to_uhc, ef_map_uhc_to_ucs4},
101     {JOHAB, ef_map_ucs4_to_johab, ef_map_johab_to_ucs4},
102 
103 };
104 
105 /* --- global functions --- */
106 
ef_map_ucs4_to_cs(ef_char_t * non_ucs,ef_char_t * ucs4,ef_charset_t cs)107 int ef_map_ucs4_to_cs(ef_char_t *non_ucs, ef_char_t *ucs4, ef_charset_t cs) {
108   u_int32_t ucs4_code;
109   map_t *map;
110   static map_t *cached_map;
111 
112 #ifdef DEBUG
113   if (ucs4->cs != ISO10646_UCS4_1) {
114     bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
115 
116     return 0;
117   }
118 #endif
119 
120   ucs4_code = ef_char_to_int(ucs4);
121 
122   if (!(map = cached_map) || map->cs != cs) {
123     size_t count;
124 
125     for (count = 0; count < sizeof(map_table) / sizeof(map_t); count++) {
126       if (map_table[count].cs == cs) {
127         cached_map = map = &map_table[count];
128 
129         goto found;
130       }
131     }
132 
133 #ifdef DEBUG
134     bl_warn_printf(BL_DEBUG_TAG " %x cs is not supported to map to ucs4.\n", cs);
135 #endif
136 
137     return 0;
138   }
139 
140 found:
141   if ((*map->map_ucs4_to)(non_ucs, ucs4_code)) {
142     return 1;
143   } else {
144 #ifdef DEBUG
145     bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported to %x cs.\n",
146                    ucs4->ch[0], ucs4->ch[1], ucs4->ch[2], ucs4->ch[3], cs);
147 #endif
148 
149     return 0;
150   }
151 }
152 
ef_map_ucs4_to_with_funcs(ef_char_t * non_ucs,ef_char_t * ucs4,ef_map_ucs4_to_func_t * map_ucs4_to_funcs,size_t list_size)153 int ef_map_ucs4_to_with_funcs(ef_char_t *non_ucs, ef_char_t *ucs4,
154                                ef_map_ucs4_to_func_t *map_ucs4_to_funcs, size_t list_size) {
155   size_t count;
156   u_int32_t ucs4_code;
157 
158 #ifdef DEBUG
159   if (ucs4->cs != ISO10646_UCS4_1) {
160     bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
161 
162     return 0;
163   }
164 #endif
165 
166   ucs4_code = ef_char_to_int(ucs4);
167 
168   for (count = 0; count < list_size; count++) {
169     if ((*map_ucs4_to_funcs[count])(non_ucs, ucs4_code)) {
170       return 1;
171     }
172   }
173 
174 #ifdef DEBUG
175   bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported.\n", ucs4->ch[0],
176                  ucs4->ch[1], ucs4->ch[2], ucs4->ch[3]);
177 #endif
178 
179   return 0;
180 }
181 
182 /*
183  * using the default order of the mapping table.
184  */
ef_map_ucs4_to(ef_char_t * non_ucs,ef_char_t * ucs4)185 int ef_map_ucs4_to(ef_char_t *non_ucs, ef_char_t *ucs4) {
186   size_t count;
187   u_int32_t ucs4_code;
188   map_t *map;
189   static map_t *cached_map;
190 
191 #ifdef DEBUG
192   if (ucs4->cs != ISO10646_UCS4_1) {
193     bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
194 
195     return 0;
196   }
197 #endif
198 
199   ucs4_code = ef_char_to_int(ucs4);
200 
201   if ((map = cached_map) && (*map->map_ucs4_to)(non_ucs, ucs4_code)) {
202     return 1;
203   }
204 
205   for (count = 0; count < sizeof(map_table) / sizeof(map_table[0]); count++) {
206     if ((*map_table[count].map_ucs4_to)(non_ucs, ucs4_code)) {
207       ef_charset_t cs;
208 
209       cs = map_table[count].cs;
210 
211       /*
212        * Don't cache the map functions of JISX0213_2000_1 and
213        * non ISO2022 cs (GBK etc), in order not to map the
214        * following chars automatically to JISX0213_2000_1,
215        * GBK etc if a ucs4 character is mapped to the one of
216        * JISX0213_2000_1, GBK etc which doesn't exist in
217        * JISX0208, GB2312 etc.
218        */
219       if (!IS_NON_ISO2022(cs) && cs != JISX0213_2000_1) {
220         cached_map = &map_table[count];
221       }
222 
223       return 1;
224     }
225   }
226 
227 #ifdef DEBUG
228   bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported.\n", ucs4->ch[0],
229                  ucs4->ch[1], ucs4->ch[2], ucs4->ch[3]);
230 #endif
231 
232   return 0;
233 }
234 
235 /*
236  * using the default order of the mapping table.
237  */
ef_map_ucs4_to_iso2022cs(ef_char_t * non_ucs,ef_char_t * ucs4)238 int ef_map_ucs4_to_iso2022cs(ef_char_t *non_ucs, ef_char_t *ucs4) {
239   size_t count;
240   u_int32_t ucs4_code;
241   map_t *map;
242   static map_t *cached_map;
243 
244 #ifdef DEBUG
245   if (ucs4->cs != ISO10646_UCS4_1) {
246     bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
247 
248     return 0;
249   }
250 #endif
251 
252   ucs4_code = ef_char_to_int(ucs4);
253 
254   if ((map = cached_map) && (*map->map_ucs4_to)(non_ucs, ucs4_code)) {
255     return 1;
256   }
257 
258   for (count = 0; count < sizeof(map_table) / sizeof(map_table[0]); count++) {
259     if (IS_CS_BASED_ON_ISO2022(map_table[count].cs)) {
260       if ((*map_table[count].map_ucs4_to)(non_ucs, ucs4_code)) {
261         cached_map = &map_table[count];
262 
263         return 1;
264       }
265     }
266   }
267 
268 #ifdef DEBUG
269   bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported.\n", ucs4->ch[0],
270                  ucs4->ch[1], ucs4->ch[2], ucs4->ch[3]);
271 #endif
272 
273   return 0;
274 }
275 
ef_map_to_ucs4(ef_char_t * ucs4,ef_char_t * non_ucs)276 int ef_map_to_ucs4(ef_char_t *ucs4, ef_char_t *non_ucs) {
277   u_int32_t code;
278   map_t *map;
279   static map_t *cached_map;
280 
281   if (non_ucs->cs == ISO10646_UCS4_1) {
282     *ucs4 = *non_ucs;
283 
284     return 1;
285   }
286 
287   code = ef_char_to_int(non_ucs);
288 
289   if (!(map = cached_map) || map->cs != non_ucs->cs) {
290     size_t count;
291 
292     for (count = 0; count < sizeof(map_table) / sizeof(map_t); count++) {
293       if (map_table[count].cs == non_ucs->cs) {
294         cached_map = map = &map_table[count];
295 
296         goto found;
297       }
298     }
299 
300 #ifdef DEBUG
301     bl_warn_printf(BL_DEBUG_TAG " %x cs is not supported to map to ucs4.\n", non_ucs->cs);
302 #endif
303 
304     return 0;
305   }
306 
307 found:
308   if ((*map->map_to_ucs4)(ucs4, code)) {
309     return 1;
310   } else {
311 #ifdef DEBUG
312     bl_warn_printf(BL_DEBUG_TAG " this cs(%x) (code %x) cannot be mapped to UCS4.\n", non_ucs->cs,
313                    code);
314 #endif
315 
316     return 0;
317   }
318 }
319 
ef_map_via_ucs(ef_char_t * dst,ef_char_t * src,ef_charset_t cs)320 int ef_map_via_ucs(ef_char_t *dst, ef_char_t *src, ef_charset_t cs) {
321   ef_char_t ucs4;
322 
323   if (!ef_map_to_ucs4(&ucs4, src) || !ef_map_ucs4_to_cs(dst, &ucs4, cs)) {
324     return 0;
325   }
326 
327   return 1;
328 }
329