1 /*
2
3 Copyright (c) 2003-2013 uim Project https://github.com/uim/uim
4
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16 3. Neither the name of authors nor the names of its contributors
17 may be used to endorse or promote products derived from this software
18 without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 SUCH DAMAGE.
31
32 */
33
34 #include <config.h>
35
36 #include <stdlib.h>
37 #include <string.h>
38 #include <iconv.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef HAVE_ALLOCA_H
43 # include <alloca.h>
44 #endif
45
46 #include "uim.h"
47 #include "uim-internal.h"
48 #include "uim-util.h"
49 #include "uim-scm.h"
50 #include "uim-scm-abbrev.h"
51 #include "uim-iconv.h"
52
53 #define MBCHAR_LEN_MAX 6 /* assumes CESU-8 */
54
55 static void *uim_iconv_open(const char *tocode, const char *fromcode);
56 static int uim_iconv_is_convertible(const char *tocode, const char *fromcode);
57 static void *uim_iconv_create(const char *tocode, const char *fromcode);
58 static char *uim_iconv_code_conv(void *obj, const char *str);
59 static void uim_iconv_release(void *obj);
60
61 static int check_encoding_equivalence(const char *tocode,
62 const char *fromcode);
63 static const char **uim_get_encoding_alias(const char *encoding);
64
65
66 static struct uim_code_converter uim_iconv_tbl = {
67 uim_iconv_is_convertible,
68 uim_iconv_create,
69 uim_iconv_code_conv,
70 uim_iconv_release
71 };
72 struct uim_code_converter *uim_iconv = &uim_iconv_tbl;
73
74 #include "encoding-table.c"
75
76
77 static int
check_encoding_equivalence(const char * tocode,const char * fromcode)78 check_encoding_equivalence(const char *tocode, const char *fromcode)
79 {
80 const char **alias_tocode;
81 const char **alias_fromcode;
82 int i, j;
83 int alias_tocode_alloced = 0;
84 int alias_fromcode_alloced = 0;
85 int found = 0;
86
87 assert(tocode);
88 assert(fromcode);
89
90 alias_tocode = uim_get_encoding_alias(tocode);
91 alias_fromcode = uim_get_encoding_alias(fromcode);
92
93 if (!alias_tocode) {
94 alias_tocode = uim_malloc(sizeof(char *) * 2);
95 alias_tocode[0] = tocode;
96 alias_tocode[1] = NULL;
97 alias_tocode_alloced = 1;
98 }
99 if (!alias_fromcode) {
100 alias_fromcode = uim_malloc(sizeof(char *) * 2);
101 alias_fromcode[0] = fromcode;
102 alias_fromcode[1] = NULL;
103 alias_fromcode_alloced = 1;
104 }
105
106 for (i = 0; alias_tocode[i]; i++) {
107 for (j = 0; alias_fromcode[j]; j++) {
108 if (!strcmp(alias_tocode[i], alias_fromcode[j])) {
109 found = 1;
110 break;
111 }
112 }
113 if (found)
114 break;
115 }
116
117 if (alias_tocode_alloced)
118 free(alias_tocode);
119 if (alias_fromcode_alloced)
120 free(alias_fromcode);
121 return found;
122 }
123
124 static int
uim_iconv_is_convertible(const char * tocode,const char * fromcode)125 uim_iconv_is_convertible(const char *tocode, const char *fromcode)
126 {
127 iconv_t ic;
128 uim_bool result;
129
130 if (UIM_CATCH_ERROR_BEGIN())
131 return UIM_FALSE;
132
133 assert(tocode);
134 assert(fromcode);
135
136 do {
137 if (check_encoding_equivalence(tocode, fromcode)) {
138 result = UIM_TRUE;
139 break;
140 }
141
142 /* TODO cache the result */
143 ic = (iconv_t)uim_iconv_open(tocode, fromcode);
144 if (ic == (iconv_t)-1) {
145 result = UIM_FALSE;
146 break;
147 }
148 iconv_close(ic);
149 result = UIM_TRUE;
150 } while (/* CONSTCOND */ 0);
151
152 UIM_CATCH_ERROR_END();
153
154 return result;
155 }
156
157 static const char **
uim_get_encoding_alias(const char * encoding)158 uim_get_encoding_alias(const char *encoding)
159 {
160 int i, j;
161 const char **alias;
162
163 assert(encoding);
164
165 for (i = 0; (alias = uim_encoding_list[i]); i++) {
166 for (j = 0; alias[j]; j++) {
167 if (!strcmp(alias[j], encoding))
168 return alias;
169 }
170 }
171 return NULL;
172 }
173
174 static void *
uim_iconv_open(const char * tocode,const char * fromcode)175 uim_iconv_open(const char *tocode, const char *fromcode)
176 {
177 iconv_t cd = (iconv_t)-1;
178 int i, j;
179 const char **alias_tocode, **alias_fromcode;
180 int alias_tocode_alloced = 0;
181 int alias_fromcode_alloced = 0;
182 int opened = 0;
183
184 assert(tocode);
185 assert(fromcode);
186
187 alias_tocode = uim_get_encoding_alias(tocode);
188 alias_fromcode = uim_get_encoding_alias(fromcode);
189
190 if (!alias_tocode) {
191 alias_tocode = uim_malloc(sizeof(char *) * 2);
192 alias_tocode[0] = tocode;
193 alias_tocode[1] = NULL;
194 alias_tocode_alloced = 1;
195 }
196 if (!alias_fromcode) {
197 alias_fromcode = uim_malloc(sizeof(char *) * 2);
198 alias_fromcode[0] = fromcode;
199 alias_fromcode[1] = NULL;
200 alias_fromcode_alloced = 1;
201 }
202
203 for (i = 0; alias_tocode[i]; i++) {
204 for (j = 0; alias_fromcode[j]; j++) {
205 cd = iconv_open(alias_tocode[i], alias_fromcode[j]);
206 if (cd != (iconv_t)-1) {
207 opened = 1;
208 break;
209 }
210 }
211 if (opened)
212 break;
213 }
214
215 if (alias_tocode_alloced)
216 free(alias_tocode);
217 if (alias_fromcode_alloced)
218 free(alias_fromcode);
219 return (void *)cd;
220 }
221
222 static void *
uim_iconv_create(const char * tocode,const char * fromcode)223 uim_iconv_create(const char *tocode, const char *fromcode)
224 {
225 iconv_t ic;
226
227 if (UIM_CATCH_ERROR_BEGIN())
228 return NULL;
229
230 assert(tocode);
231 assert(fromcode);
232
233 do {
234 if (check_encoding_equivalence(tocode, fromcode)) {
235 ic = (iconv_t)0;
236 break;
237 }
238
239 ic = (iconv_t)uim_iconv_open(tocode, fromcode);
240 if (ic == (iconv_t)-1) {
241 /* since iconv_t is not explicit pointer, use 0 instead of NULL */
242 ic = (iconv_t)0;
243 }
244 } while (/* CONSTCOND */ 0);
245
246 UIM_CATCH_ERROR_END();
247
248 return (void *)ic;
249 }
250
251 static char *
uim_iconv_code_conv(void * obj,const char * instr)252 uim_iconv_code_conv(void *obj, const char *instr)
253 {
254 iconv_t cd = (iconv_t)obj;
255 size_t ins;
256 const char *in;
257 size_t outbufsiz, outs;
258 char *outbuf = NULL, *out;
259 size_t ret = 0;
260 size_t nconv = 0;
261 size_t idx = 0;
262 char *str = NULL;
263
264 if (UIM_CATCH_ERROR_BEGIN())
265 return NULL;
266
267 if (!instr)
268 goto err;
269
270 if (!obj) {
271 UIM_CATCH_ERROR_END();
272 return uim_strdup(instr);
273 }
274
275 ins = strlen(instr);
276 in = instr;
277
278 outbufsiz = (ins + sizeof("")) * MBCHAR_LEN_MAX;
279 out = outbuf = uim_malloc(outbufsiz);
280
281 while (ins > 0) {
282 out = outbuf;
283 outs = outbufsiz;
284
285 ret = iconv(cd, (ICONV_CONST char **)&in, &ins, &out, &outs);
286 nconv = outbufsiz - outs;
287 if (ret == (size_t)-1) {
288 switch (errno) {
289 case EINVAL:
290 goto err;
291 case E2BIG:
292 outbufsiz *= 2;
293 out = uim_realloc(outbuf, outbufsiz);
294 outbuf = out;
295 break;
296 default:
297 goto err;
298 }
299 } else {
300 /* XXX: irreversible characters */
301 }
302 if (nconv > 0) {
303 if (str == NULL)
304 str = uim_malloc(nconv + 1);
305 else
306 str = uim_realloc(str, idx + nconv + 1);
307 memcpy(&str[idx], outbuf, nconv);
308 idx += nconv;
309 }
310 }
311 do {
312 out = outbuf;
313 outs = outbufsiz;
314
315 ret = iconv(cd, NULL, NULL, &out, &outs);
316 nconv = outbufsiz - outs;
317
318 if (ret == (size_t)-1) {
319 outbufsiz *= 2;
320 out = uim_realloc(outbuf, outbufsiz);
321 outbuf = out;
322 } else {
323 /* XXX: irreversible characters */
324 }
325 if (nconv > 0) {
326 if (str == NULL)
327 str = uim_malloc(nconv + 1);
328 else
329 str = uim_realloc(str, idx + nconv + 1);
330 memcpy(&str[idx], outbuf, nconv);
331 idx += nconv;
332 }
333 } while (ret == (size_t)-1);
334
335 if (str == NULL)
336 str = uim_strdup("");
337 else
338 str[idx] = '\0';
339 free(outbuf);
340
341 UIM_CATCH_ERROR_END();
342
343 return str;
344
345 err:
346
347 free(str);
348 free(outbuf);
349
350 UIM_CATCH_ERROR_END();
351
352 return uim_strdup("");
353 }
354
355 static void
uim_iconv_release(void * obj)356 uim_iconv_release(void *obj)
357 {
358 if (UIM_CATCH_ERROR_BEGIN())
359 return;
360
361 if (obj)
362 iconv_close((iconv_t)obj);
363
364 UIM_CATCH_ERROR_END();
365 }
366
367 static uim_lisp
uim_ext_iconv_open(uim_lisp tocode_,uim_lisp fromcode_)368 uim_ext_iconv_open(uim_lisp tocode_, uim_lisp fromcode_)
369 {
370 const char *tocode = REFER_C_STR(tocode_);
371 const char *fromcode = REFER_C_STR(fromcode_);
372 iconv_t ic;
373
374 ic = uim_iconv_create(tocode, fromcode);
375 if (!ic)
376 return uim_scm_f();
377
378 return MAKE_PTR(ic);
379 }
380
381 static uim_lisp
uim_ext_iconv_code_conv(uim_lisp ic_,uim_lisp inbuf_)382 uim_ext_iconv_code_conv(uim_lisp ic_, uim_lisp inbuf_)
383 {
384 char *outbuf;
385
386 outbuf = uim_iconv_code_conv(C_PTR(ic_), REFER_C_STR(inbuf_));
387 if (!outbuf)
388 return uim_scm_f();
389
390 return MAKE_STR_DIRECTLY(outbuf);
391 }
392
393 static uim_lisp
uim_ext_iconv_release(uim_lisp ic_)394 uim_ext_iconv_release(uim_lisp ic_)
395 {
396 uim_iconv_release(C_PTR(ic_));
397 return uim_scm_t();
398 }
399
400 void
uim_init_iconv_subrs(void)401 uim_init_iconv_subrs(void)
402 {
403 uim_scm_init_proc2("iconv-open", uim_ext_iconv_open);
404 uim_scm_init_proc2("iconv-code-conv", uim_ext_iconv_code_conv);
405 uim_scm_init_proc1("iconv-release", uim_ext_iconv_release);
406 }
407