1 /*
2 
3   Copyright (c) 2003-2013 uim Project https://github.com/uim/uim
4 
5   All rights reserved.
6 
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions
9   are met:
10 
11   1. Redistributions of source code must retain the above copyright
12      notice, this list of conditions and the following disclaimer.
13   2. Redistributions in binary form must reproduce the above copyright
14      notice, this list of conditions and the following disclaimer in the
15      documentation and/or other materials provided with the distribution.
16   3. Neither the name of authors nor the names of its contributors
17      may be used to endorse or promote products derived from this software
18      without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26   OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30   SUCH DAMAGE.
31 
32 */
33 
34 #include <config.h>
35 
36 #include <stdlib.h>
37 #include <string.h>
38 #include <iconv.h>
39 #include <assert.h>
40 #include <errno.h>
41 
42 #ifdef HAVE_ALLOCA_H
43 # include <alloca.h>
44 #endif
45 
46 #include "uim.h"
47 #include "uim-internal.h"
48 #include "uim-util.h"
49 #include "uim-scm.h"
50 #include "uim-scm-abbrev.h"
51 #include "uim-iconv.h"
52 
53 #define MBCHAR_LEN_MAX 6  /* assumes CESU-8 */
54 
55 static void *uim_iconv_open(const char *tocode, const char *fromcode);
56 static int uim_iconv_is_convertible(const char *tocode, const char *fromcode);
57 static void *uim_iconv_create(const char *tocode, const char *fromcode);
58 static char *uim_iconv_code_conv(void *obj, const char *str);
59 static void uim_iconv_release(void *obj);
60 
61 static int check_encoding_equivalence(const char *tocode,
62                                       const char *fromcode);
63 static const char **uim_get_encoding_alias(const char *encoding);
64 
65 
66 static struct uim_code_converter uim_iconv_tbl = {
67   uim_iconv_is_convertible,
68   uim_iconv_create,
69   uim_iconv_code_conv,
70   uim_iconv_release
71 };
72 struct uim_code_converter *uim_iconv = &uim_iconv_tbl;
73 
74 #include "encoding-table.c"
75 
76 
77 static int
check_encoding_equivalence(const char * tocode,const char * fromcode)78 check_encoding_equivalence(const char *tocode, const char *fromcode)
79 {
80   const char **alias_tocode;
81   const char **alias_fromcode;
82   int i, j;
83   int alias_tocode_alloced = 0;
84   int alias_fromcode_alloced = 0;
85   int found = 0;
86 
87   assert(tocode);
88   assert(fromcode);
89 
90   alias_tocode = uim_get_encoding_alias(tocode);
91   alias_fromcode = uim_get_encoding_alias(fromcode);
92 
93   if (!alias_tocode) {
94     alias_tocode = uim_malloc(sizeof(char *) * 2);
95     alias_tocode[0] = tocode;
96     alias_tocode[1] = NULL;
97     alias_tocode_alloced = 1;
98   }
99   if (!alias_fromcode) {
100     alias_fromcode = uim_malloc(sizeof(char *) * 2);
101     alias_fromcode[0] = fromcode;
102     alias_fromcode[1] = NULL;
103     alias_fromcode_alloced = 1;
104   }
105 
106   for (i = 0; alias_tocode[i]; i++) {
107     for (j = 0; alias_fromcode[j]; j++) {
108       if (!strcmp(alias_tocode[i], alias_fromcode[j])) {
109         found = 1;
110 	break;
111       }
112     }
113     if (found)
114       break;
115   }
116 
117   if (alias_tocode_alloced)
118     free(alias_tocode);
119   if (alias_fromcode_alloced)
120     free(alias_fromcode);
121   return found;
122 }
123 
124 static int
uim_iconv_is_convertible(const char * tocode,const char * fromcode)125 uim_iconv_is_convertible(const char *tocode, const char *fromcode)
126 {
127   iconv_t ic;
128   uim_bool result;
129 
130   if (UIM_CATCH_ERROR_BEGIN())
131     return UIM_FALSE;
132 
133   assert(tocode);
134   assert(fromcode);
135 
136   do {
137     if (check_encoding_equivalence(tocode, fromcode)) {
138       result = UIM_TRUE;
139       break;
140     }
141 
142     /* TODO cache the result */
143     ic = (iconv_t)uim_iconv_open(tocode, fromcode);
144     if (ic == (iconv_t)-1) {
145       result = UIM_FALSE;
146       break;
147     }
148     iconv_close(ic);
149     result = UIM_TRUE;
150   } while (/* CONSTCOND */ 0);
151 
152   UIM_CATCH_ERROR_END();
153 
154   return result;
155 }
156 
157 static const char **
uim_get_encoding_alias(const char * encoding)158 uim_get_encoding_alias(const char *encoding)
159 {
160   int i, j;
161   const char **alias;
162 
163   assert(encoding);
164 
165   for (i = 0; (alias = uim_encoding_list[i]); i++) {
166     for (j = 0; alias[j]; j++) {
167       if (!strcmp(alias[j], encoding))
168         return alias;
169     }
170   }
171   return NULL;
172 }
173 
174 static void *
uim_iconv_open(const char * tocode,const char * fromcode)175 uim_iconv_open(const char *tocode, const char *fromcode)
176 {
177   iconv_t cd = (iconv_t)-1;
178   int i, j;
179   const char **alias_tocode, **alias_fromcode;
180   int alias_tocode_alloced = 0;
181   int alias_fromcode_alloced = 0;
182   int opened = 0;
183 
184   assert(tocode);
185   assert(fromcode);
186 
187   alias_tocode = uim_get_encoding_alias(tocode);
188   alias_fromcode = uim_get_encoding_alias(fromcode);
189 
190   if (!alias_tocode) {
191     alias_tocode = uim_malloc(sizeof(char *) * 2);
192     alias_tocode[0] = tocode;
193     alias_tocode[1] = NULL;
194     alias_tocode_alloced = 1;
195   }
196   if (!alias_fromcode) {
197     alias_fromcode = uim_malloc(sizeof(char *) * 2);
198     alias_fromcode[0] = fromcode;
199     alias_fromcode[1] = NULL;
200     alias_fromcode_alloced = 1;
201   }
202 
203   for (i = 0; alias_tocode[i]; i++) {
204     for (j = 0; alias_fromcode[j]; j++) {
205       cd = iconv_open(alias_tocode[i], alias_fromcode[j]);
206       if (cd != (iconv_t)-1) {
207 	opened = 1;
208 	break;
209       }
210     }
211     if (opened)
212       break;
213   }
214 
215   if (alias_tocode_alloced)
216     free(alias_tocode);
217   if (alias_fromcode_alloced)
218     free(alias_fromcode);
219   return (void *)cd;
220 }
221 
222 static void *
uim_iconv_create(const char * tocode,const char * fromcode)223 uim_iconv_create(const char *tocode, const char *fromcode)
224 {
225   iconv_t ic;
226 
227   if (UIM_CATCH_ERROR_BEGIN())
228     return NULL;
229 
230   assert(tocode);
231   assert(fromcode);
232 
233   do {
234     if (check_encoding_equivalence(tocode, fromcode)) {
235       ic = (iconv_t)0;
236       break;
237     }
238 
239     ic = (iconv_t)uim_iconv_open(tocode, fromcode);
240     if (ic == (iconv_t)-1) {
241       /* since iconv_t is not explicit pointer, use 0 instead of NULL */
242       ic = (iconv_t)0;
243     }
244   } while (/* CONSTCOND */ 0);
245 
246   UIM_CATCH_ERROR_END();
247 
248   return (void *)ic;
249 }
250 
251 static char *
uim_iconv_code_conv(void * obj,const char * instr)252 uim_iconv_code_conv(void *obj, const char *instr)
253 {
254   iconv_t cd = (iconv_t)obj;
255   size_t ins;
256   const char *in;
257   size_t outbufsiz, outs;
258   char   *outbuf = NULL, *out;
259   size_t ret = 0;
260   size_t nconv = 0;
261   size_t idx = 0;
262   char *str = NULL;
263 
264   if (UIM_CATCH_ERROR_BEGIN())
265     return NULL;
266 
267   if (!instr)
268     goto err;
269 
270   if (!obj) {
271     UIM_CATCH_ERROR_END();
272     return uim_strdup(instr);
273   }
274 
275   ins = strlen(instr);
276   in = instr;
277 
278   outbufsiz = (ins + sizeof("")) * MBCHAR_LEN_MAX;
279   out = outbuf = uim_malloc(outbufsiz);
280 
281   while (ins > 0) {
282     out = outbuf;
283     outs = outbufsiz;
284 
285     ret = iconv(cd, (ICONV_CONST char **)&in, &ins, &out, &outs);
286     nconv = outbufsiz - outs;
287     if (ret == (size_t)-1) {
288       switch (errno) {
289       case EINVAL:
290 	goto err;
291       case E2BIG:
292 	outbufsiz *= 2;
293 	out = uim_realloc(outbuf, outbufsiz);
294 	outbuf = out;
295 	break;
296       default:
297 	goto err;
298       }
299     } else {
300       /* XXX: irreversible characters */
301     }
302     if (nconv > 0) {
303       if (str == NULL)
304 	str = uim_malloc(nconv + 1);
305       else
306 	str = uim_realloc(str, idx + nconv + 1);
307       memcpy(&str[idx], outbuf, nconv);
308       idx += nconv;
309     }
310   }
311   do {
312     out = outbuf;
313     outs = outbufsiz;
314 
315     ret = iconv(cd, NULL, NULL, &out, &outs);
316     nconv = outbufsiz - outs;
317 
318     if (ret == (size_t)-1) {
319       outbufsiz *= 2;
320       out = uim_realloc(outbuf, outbufsiz);
321       outbuf = out;
322     } else {
323       /* XXX: irreversible characters */
324     }
325     if (nconv > 0) {
326       if (str == NULL)
327 	str = uim_malloc(nconv + 1);
328       else
329 	str = uim_realloc(str, idx + nconv + 1);
330       memcpy(&str[idx], outbuf, nconv);
331       idx += nconv;
332     }
333   } while (ret == (size_t)-1);
334 
335   if (str == NULL)
336     str = uim_strdup("");
337   else
338     str[idx] = '\0';
339   free(outbuf);
340 
341   UIM_CATCH_ERROR_END();
342 
343   return str;
344 
345  err:
346 
347   free(str);
348   free(outbuf);
349 
350   UIM_CATCH_ERROR_END();
351 
352   return uim_strdup("");
353 }
354 
355 static void
uim_iconv_release(void * obj)356 uim_iconv_release(void *obj)
357 {
358   if (UIM_CATCH_ERROR_BEGIN())
359     return;
360 
361   if (obj)
362     iconv_close((iconv_t)obj);
363 
364   UIM_CATCH_ERROR_END();
365 }
366 
367 static uim_lisp
uim_ext_iconv_open(uim_lisp tocode_,uim_lisp fromcode_)368 uim_ext_iconv_open(uim_lisp tocode_, uim_lisp fromcode_)
369 {
370   const char *tocode = REFER_C_STR(tocode_);
371   const char *fromcode = REFER_C_STR(fromcode_);
372   iconv_t ic;
373 
374   ic = uim_iconv_create(tocode, fromcode);
375   if (!ic)
376     return uim_scm_f();
377 
378   return MAKE_PTR(ic);
379 }
380 
381 static uim_lisp
uim_ext_iconv_code_conv(uim_lisp ic_,uim_lisp inbuf_)382 uim_ext_iconv_code_conv(uim_lisp ic_, uim_lisp inbuf_)
383 {
384   char *outbuf;
385 
386   outbuf = uim_iconv_code_conv(C_PTR(ic_), REFER_C_STR(inbuf_));
387   if (!outbuf)
388     return uim_scm_f();
389 
390   return MAKE_STR_DIRECTLY(outbuf);
391 }
392 
393 static uim_lisp
uim_ext_iconv_release(uim_lisp ic_)394 uim_ext_iconv_release(uim_lisp ic_)
395 {
396   uim_iconv_release(C_PTR(ic_));
397   return uim_scm_t();
398 }
399 
400 void
uim_init_iconv_subrs(void)401 uim_init_iconv_subrs(void)
402 {
403   uim_scm_init_proc2("iconv-open", uim_ext_iconv_open);
404   uim_scm_init_proc2("iconv-code-conv", uim_ext_iconv_code_conv);
405   uim_scm_init_proc1("iconv-release", uim_ext_iconv_release);
406 }
407