1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2
3 #include "ef_ucs4_map.h"
4
5 #include <string.h>
6 #include <pobl/bl_debug.h>
7
8 #include "ef_ucs4_iso8859.h"
9 #include "ef_ucs4_viscii.h"
10 #include "ef_ucs4_koi8.h"
11 #include "ef_ucs4_iscii.h"
12 #include "ef_ucs4_georgian_ps.h"
13 #include "ef_ucs4_cp125x.h"
14 #include "ef_ucs4_jisx0201.h"
15 #include "ef_ucs4_jisx0208.h"
16 #include "ef_ucs4_jisx0212.h"
17 #include "ef_ucs4_jisx0213.h"
18 #include "ef_ucs4_ksc5601.h"
19 #include "ef_ucs4_uhc.h"
20 #include "ef_ucs4_johab.h"
21 #include "ef_ucs4_gb2312.h"
22 #include "ef_ucs4_gbk.h"
23 #include "ef_ucs4_big5.h"
24 #include "ef_ucs4_cns11643.h"
25
26 typedef struct map {
27 ef_charset_t cs;
28 int (*map_ucs4_to)(ef_char_t*, u_int32_t);
29 int (*map_to_ucs4)(ef_char_t*, u_int16_t);
30
31 } map_t;
32
33 /* --- static variables --- */
34
35 static map_t map_table[] = {
36 {ISO8859_1_R, ef_map_ucs4_to_iso8859_1_r, ef_map_iso8859_1_r_to_ucs4},
37 {ISO8859_2_R, ef_map_ucs4_to_iso8859_2_r, ef_map_iso8859_2_r_to_ucs4},
38 {ISO8859_3_R, ef_map_ucs4_to_iso8859_3_r, ef_map_iso8859_3_r_to_ucs4},
39 {ISO8859_4_R, ef_map_ucs4_to_iso8859_4_r, ef_map_iso8859_4_r_to_ucs4},
40 {ISO8859_5_R, ef_map_ucs4_to_iso8859_5_r, ef_map_iso8859_5_r_to_ucs4},
41 {ISO8859_6_R, ef_map_ucs4_to_iso8859_6_r, ef_map_iso8859_6_r_to_ucs4},
42 {ISO8859_7_R, ef_map_ucs4_to_iso8859_7_r, ef_map_iso8859_7_r_to_ucs4},
43 {ISO8859_8_R, ef_map_ucs4_to_iso8859_8_r, ef_map_iso8859_8_r_to_ucs4},
44 {ISO8859_9_R, ef_map_ucs4_to_iso8859_9_r, ef_map_iso8859_9_r_to_ucs4},
45 {ISO8859_10_R, ef_map_ucs4_to_iso8859_10_r, ef_map_iso8859_10_r_to_ucs4},
46 {TIS620_2533, ef_map_ucs4_to_tis620_2533, ef_map_tis620_2533_to_ucs4},
47 {ISO8859_13_R, ef_map_ucs4_to_iso8859_13_r, ef_map_iso8859_13_r_to_ucs4},
48 {ISO8859_14_R, ef_map_ucs4_to_iso8859_14_r, ef_map_iso8859_14_r_to_ucs4},
49 {ISO8859_15_R, ef_map_ucs4_to_iso8859_15_r, ef_map_iso8859_15_r_to_ucs4},
50 {ISO8859_16_R, ef_map_ucs4_to_iso8859_16_r, ef_map_iso8859_16_r_to_ucs4},
51 {TCVN5712_3_1993, ef_map_ucs4_to_tcvn5712_3_1993, ef_map_tcvn5712_3_1993_to_ucs4},
52
53 {VISCII, ef_map_ucs4_to_viscii, ef_map_viscii_to_ucs4},
54 {KOI8_R, ef_map_ucs4_to_koi8_r, ef_map_koi8_r_to_ucs4},
55 {KOI8_U, ef_map_ucs4_to_koi8_u, ef_map_koi8_u_to_ucs4},
56 {ISCII_ASSAMESE, ef_map_ucs4_to_iscii, ef_map_iscii_assamese_to_ucs4},
57 {ISCII_BENGALI, ef_map_ucs4_to_iscii, ef_map_iscii_bengali_to_ucs4},
58 {ISCII_GUJARATI, ef_map_ucs4_to_iscii, ef_map_iscii_gujarati_to_ucs4},
59 {ISCII_HINDI, ef_map_ucs4_to_iscii, ef_map_iscii_hindi_to_ucs4},
60 {ISCII_KANNADA, ef_map_ucs4_to_iscii, ef_map_iscii_kannada_to_ucs4},
61 {ISCII_MALAYALAM, ef_map_ucs4_to_iscii, ef_map_iscii_malayalam_to_ucs4},
62 {ISCII_ORIYA, ef_map_ucs4_to_iscii, ef_map_iscii_oriya_to_ucs4},
63 {ISCII_PUNJABI, ef_map_ucs4_to_iscii, ef_map_iscii_punjabi_to_ucs4},
64 {ISCII_TAMIL, ef_map_ucs4_to_iscii, ef_map_iscii_tamil_to_ucs4},
65 {ISCII_TELUGU, ef_map_ucs4_to_iscii, ef_map_iscii_telugu_to_ucs4},
66 {KOI8_T, ef_map_ucs4_to_koi8_t, ef_map_koi8_t_to_ucs4},
67 {GEORGIAN_PS, ef_map_ucs4_to_georgian_ps, ef_map_georgian_ps_to_ucs4},
68 {CP1250, ef_map_ucs4_to_cp1250, ef_map_cp1250_to_ucs4},
69 {CP1251, ef_map_ucs4_to_cp1251, ef_map_cp1251_to_ucs4},
70 {CP1252, ef_map_ucs4_to_cp1252, ef_map_cp1252_to_ucs4},
71 {CP1253, ef_map_ucs4_to_cp1253, ef_map_cp1253_to_ucs4},
72 {CP1254, ef_map_ucs4_to_cp1254, ef_map_cp1254_to_ucs4},
73 {CP1255, ef_map_ucs4_to_cp1255, ef_map_cp1255_to_ucs4},
74 {CP1256, ef_map_ucs4_to_cp1256, ef_map_cp1256_to_ucs4},
75 {CP1257, ef_map_ucs4_to_cp1257, ef_map_cp1257_to_ucs4},
76 {CP1258, ef_map_ucs4_to_cp1258, ef_map_cp1258_to_ucs4},
77 {CP874, ef_map_ucs4_to_cp874, ef_map_cp874_to_ucs4},
78
79 {JISX0201_ROMAN, ef_map_ucs4_to_jisx0201_roman, ef_map_jisx0201_roman_to_ucs4},
80 {JISX0201_KATA, ef_map_ucs4_to_jisx0201_kata, ef_map_jisx0201_kata_to_ucs4},
81 {JISX0208_1983, ef_map_ucs4_to_jisx0208_1983, ef_map_jisx0208_1983_to_ucs4},
82 {JISX0212_1990, ef_map_ucs4_to_jisx0212_1990, ef_map_jisx0212_1990_to_ucs4},
83 {JISX0213_2000_1, ef_map_ucs4_to_jisx0213_2000_1, ef_map_jisx0213_2000_1_to_ucs4},
84 {JISX0213_2000_2, ef_map_ucs4_to_jisx0213_2000_2, ef_map_jisx0213_2000_2_to_ucs4},
85 {JISC6226_1978_NEC_EXT, ef_map_ucs4_to_jisx0208_nec_ext, ef_map_jisx0208_nec_ext_to_ucs4},
86 {JISC6226_1978_NECIBM_EXT, ef_map_ucs4_to_jisx0208_necibm_ext,
87 ef_map_jisx0208_necibm_ext_to_ucs4},
88 {SJIS_IBM_EXT, ef_map_ucs4_to_sjis_ibm_ext, ef_map_sjis_ibm_ext_to_ucs4},
89
90 {GB2312_80, ef_map_ucs4_to_gb2312_80, ef_map_gb2312_80_to_ucs4},
91 {GBK, ef_map_ucs4_to_gbk, ef_map_gbk_to_ucs4},
92
93 {CNS11643_1992_1, ef_map_ucs4_to_cns11643_1992_1, ef_map_cns11643_1992_1_to_ucs4},
94 {CNS11643_1992_2, ef_map_ucs4_to_cns11643_1992_2, ef_map_cns11643_1992_2_to_ucs4},
95 {CNS11643_1992_3, ef_map_ucs4_to_cns11643_1992_3, ef_map_cns11643_1992_3_to_ucs4},
96 {BIG5, ef_map_ucs4_to_big5, ef_map_big5_to_ucs4},
97 {HKSCS, ef_map_ucs4_to_hkscs, ef_map_hkscs_to_ucs4},
98
99 {KSC5601_1987, ef_map_ucs4_to_ksc5601_1987, ef_map_ksc5601_1987_to_ucs4},
100 {UHC, ef_map_ucs4_to_uhc, ef_map_uhc_to_ucs4},
101 {JOHAB, ef_map_ucs4_to_johab, ef_map_johab_to_ucs4},
102
103 };
104
105 /* --- global functions --- */
106
ef_map_ucs4_to_cs(ef_char_t * non_ucs,ef_char_t * ucs4,ef_charset_t cs)107 int ef_map_ucs4_to_cs(ef_char_t *non_ucs, ef_char_t *ucs4, ef_charset_t cs) {
108 u_int32_t ucs4_code;
109 map_t *map;
110 static map_t *cached_map;
111
112 #ifdef DEBUG
113 if (ucs4->cs != ISO10646_UCS4_1) {
114 bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
115
116 return 0;
117 }
118 #endif
119
120 ucs4_code = ef_char_to_int(ucs4);
121
122 if (!(map = cached_map) || map->cs != cs) {
123 size_t count;
124
125 for (count = 0; count < sizeof(map_table) / sizeof(map_t); count++) {
126 if (map_table[count].cs == cs) {
127 cached_map = map = &map_table[count];
128
129 goto found;
130 }
131 }
132
133 #ifdef DEBUG
134 bl_warn_printf(BL_DEBUG_TAG " %x cs is not supported to map to ucs4.\n", cs);
135 #endif
136
137 return 0;
138 }
139
140 found:
141 if ((*map->map_ucs4_to)(non_ucs, ucs4_code)) {
142 return 1;
143 } else {
144 #ifdef DEBUG
145 bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported to %x cs.\n",
146 ucs4->ch[0], ucs4->ch[1], ucs4->ch[2], ucs4->ch[3], cs);
147 #endif
148
149 return 0;
150 }
151 }
152
ef_map_ucs4_to_with_funcs(ef_char_t * non_ucs,ef_char_t * ucs4,ef_map_ucs4_to_func_t * map_ucs4_to_funcs,size_t list_size)153 int ef_map_ucs4_to_with_funcs(ef_char_t *non_ucs, ef_char_t *ucs4,
154 ef_map_ucs4_to_func_t *map_ucs4_to_funcs, size_t list_size) {
155 size_t count;
156 u_int32_t ucs4_code;
157
158 #ifdef DEBUG
159 if (ucs4->cs != ISO10646_UCS4_1) {
160 bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
161
162 return 0;
163 }
164 #endif
165
166 ucs4_code = ef_char_to_int(ucs4);
167
168 for (count = 0; count < list_size; count++) {
169 if ((*map_ucs4_to_funcs[count])(non_ucs, ucs4_code)) {
170 return 1;
171 }
172 }
173
174 #ifdef DEBUG
175 bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported.\n", ucs4->ch[0],
176 ucs4->ch[1], ucs4->ch[2], ucs4->ch[3]);
177 #endif
178
179 return 0;
180 }
181
182 /*
183 * using the default order of the mapping table.
184 */
ef_map_ucs4_to(ef_char_t * non_ucs,ef_char_t * ucs4)185 int ef_map_ucs4_to(ef_char_t *non_ucs, ef_char_t *ucs4) {
186 size_t count;
187 u_int32_t ucs4_code;
188 map_t *map;
189 static map_t *cached_map;
190
191 #ifdef DEBUG
192 if (ucs4->cs != ISO10646_UCS4_1) {
193 bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
194
195 return 0;
196 }
197 #endif
198
199 ucs4_code = ef_char_to_int(ucs4);
200
201 if ((map = cached_map) && (*map->map_ucs4_to)(non_ucs, ucs4_code)) {
202 return 1;
203 }
204
205 for (count = 0; count < sizeof(map_table) / sizeof(map_table[0]); count++) {
206 if ((*map_table[count].map_ucs4_to)(non_ucs, ucs4_code)) {
207 ef_charset_t cs;
208
209 cs = map_table[count].cs;
210
211 /*
212 * Don't cache the map functions of JISX0213_2000_1 and
213 * non ISO2022 cs (GBK etc), in order not to map the
214 * following chars automatically to JISX0213_2000_1,
215 * GBK etc if a ucs4 character is mapped to the one of
216 * JISX0213_2000_1, GBK etc which doesn't exist in
217 * JISX0208, GB2312 etc.
218 */
219 if (!IS_NON_ISO2022(cs) && cs != JISX0213_2000_1) {
220 cached_map = &map_table[count];
221 }
222
223 return 1;
224 }
225 }
226
227 #ifdef DEBUG
228 bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported.\n", ucs4->ch[0],
229 ucs4->ch[1], ucs4->ch[2], ucs4->ch[3]);
230 #endif
231
232 return 0;
233 }
234
235 /*
236 * using the default order of the mapping table.
237 */
ef_map_ucs4_to_iso2022cs(ef_char_t * non_ucs,ef_char_t * ucs4)238 int ef_map_ucs4_to_iso2022cs(ef_char_t *non_ucs, ef_char_t *ucs4) {
239 size_t count;
240 u_int32_t ucs4_code;
241 map_t *map;
242 static map_t *cached_map;
243
244 #ifdef DEBUG
245 if (ucs4->cs != ISO10646_UCS4_1) {
246 bl_debug_printf(BL_DEBUG_TAG " ucs4 is not ucs4.\n");
247
248 return 0;
249 }
250 #endif
251
252 ucs4_code = ef_char_to_int(ucs4);
253
254 if ((map = cached_map) && (*map->map_ucs4_to)(non_ucs, ucs4_code)) {
255 return 1;
256 }
257
258 for (count = 0; count < sizeof(map_table) / sizeof(map_table[0]); count++) {
259 if (IS_CS_BASED_ON_ISO2022(map_table[count].cs)) {
260 if ((*map_table[count].map_ucs4_to)(non_ucs, ucs4_code)) {
261 cached_map = &map_table[count];
262
263 return 1;
264 }
265 }
266 }
267
268 #ifdef DEBUG
269 bl_warn_printf(BL_DEBUG_TAG " UCS4 char(0x%.2x%.2x%.2x%.2x) is not supported.\n", ucs4->ch[0],
270 ucs4->ch[1], ucs4->ch[2], ucs4->ch[3]);
271 #endif
272
273 return 0;
274 }
275
ef_map_to_ucs4(ef_char_t * ucs4,ef_char_t * non_ucs)276 int ef_map_to_ucs4(ef_char_t *ucs4, ef_char_t *non_ucs) {
277 u_int32_t code;
278 map_t *map;
279 static map_t *cached_map;
280
281 if (non_ucs->cs == ISO10646_UCS4_1) {
282 *ucs4 = *non_ucs;
283
284 return 1;
285 }
286
287 code = ef_char_to_int(non_ucs);
288
289 if (!(map = cached_map) || map->cs != non_ucs->cs) {
290 size_t count;
291
292 for (count = 0; count < sizeof(map_table) / sizeof(map_t); count++) {
293 if (map_table[count].cs == non_ucs->cs) {
294 cached_map = map = &map_table[count];
295
296 goto found;
297 }
298 }
299
300 #ifdef DEBUG
301 bl_warn_printf(BL_DEBUG_TAG " %x cs is not supported to map to ucs4.\n", non_ucs->cs);
302 #endif
303
304 return 0;
305 }
306
307 found:
308 if ((*map->map_to_ucs4)(ucs4, code)) {
309 return 1;
310 } else {
311 #ifdef DEBUG
312 bl_warn_printf(BL_DEBUG_TAG " this cs(%x) (code %x) cannot be mapped to UCS4.\n", non_ucs->cs,
313 code);
314 #endif
315
316 return 0;
317 }
318 }
319
ef_map_via_ucs(ef_char_t * dst,ef_char_t * src,ef_charset_t cs)320 int ef_map_via_ucs(ef_char_t *dst, ef_char_t *src, ef_charset_t cs) {
321 ef_char_t ucs4;
322
323 if (!ef_map_to_ucs4(&ucs4, src) || !ef_map_ucs4_to_cs(dst, &ucs4, cs)) {
324 return 0;
325 }
326
327 return 1;
328 }
329