1 /* Charset conversion.
2 Copyright (C) 2001-2006 Free Software Foundation, Inc.
3 Written by Bruno Haible and Simon Josefsson.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #include <config.h>
20
21 /* Specification. */
22 #include "striconv.h"
23
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #if HAVE_ICONV
29 # include <iconv.h>
30 /* Get MB_LEN_MAX, CHAR_BIT. */
31 # include <limits.h>
32 #endif
33
34 #include "strdup.h"
35 #include "c-strcase.h"
36
37 #ifndef SIZE_MAX
38 # define SIZE_MAX ((size_t) -1)
39 #endif
40
41
42 #if HAVE_ICONV
43
44 int
mem_cd_iconv(const char * src,size_t srclen,iconv_t cd,char ** resultp,size_t * lengthp)45 mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
46 char **resultp, size_t *lengthp)
47 {
48 # define tmpbufsize 4096
49 size_t length;
50 char *result;
51
52 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
53 # if defined _LIBICONV_VERSION \
54 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
55 /* Set to the initial state. */
56 iconv (cd, NULL, NULL, NULL, NULL);
57 # endif
58
59 /* Determine the length we need. */
60 {
61 size_t count = 0;
62 char tmpbuf[tmpbufsize];
63 const char *inptr = src;
64 size_t insize = srclen;
65
66 while (insize > 0)
67 {
68 char *outptr = tmpbuf;
69 size_t outsize = tmpbufsize;
70 size_t res = iconv (cd,
71 (ICONV_CONST char **) &inptr, &insize,
72 &outptr, &outsize);
73
74 if (res == (size_t)(-1))
75 {
76 if (errno == E2BIG)
77 ;
78 else if (errno == EINVAL)
79 break;
80 else
81 return -1;
82 }
83 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
84 /* Irix iconv() inserts a NUL byte if it cannot convert.
85 NetBSD iconv() inserts a question mark if it cannot convert.
86 Only GNU libiconv and GNU libc are known to prefer to fail rather
87 than doing a lossy conversion. */
88 else if (res > 0)
89 {
90 errno = EILSEQ;
91 return -1;
92 }
93 # endif
94 count += outptr - tmpbuf;
95 }
96 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
97 # if defined _LIBICONV_VERSION \
98 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
99 {
100 char *outptr = tmpbuf;
101 size_t outsize = tmpbufsize;
102 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
103
104 if (res == (size_t)(-1))
105 return -1;
106 count += outptr - tmpbuf;
107 }
108 # endif
109 length = count;
110 }
111
112 if (length == 0)
113 {
114 *lengthp = 0;
115 return 0;
116 }
117 result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
118 if (result == NULL)
119 {
120 errno = ENOMEM;
121 return -1;
122 }
123 *resultp = result;
124 *lengthp = length;
125
126 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
127 # if defined _LIBICONV_VERSION \
128 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
129 /* Return to the initial state. */
130 iconv (cd, NULL, NULL, NULL, NULL);
131 # endif
132
133 /* Do the conversion for real. */
134 {
135 const char *inptr = src;
136 size_t insize = srclen;
137 char *outptr = result;
138 size_t outsize = length;
139
140 while (insize > 0)
141 {
142 size_t res = iconv (cd,
143 (ICONV_CONST char **) &inptr, &insize,
144 &outptr, &outsize);
145
146 if (res == (size_t)(-1))
147 {
148 if (errno == EINVAL)
149 break;
150 else
151 return -1;
152 }
153 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
154 /* Irix iconv() inserts a NUL byte if it cannot convert.
155 NetBSD iconv() inserts a question mark if it cannot convert.
156 Only GNU libiconv and GNU libc are known to prefer to fail rather
157 than doing a lossy conversion. */
158 else if (res > 0)
159 {
160 errno = EILSEQ;
161 return -1;
162 }
163 # endif
164 }
165 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
166 # if defined _LIBICONV_VERSION \
167 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
168 {
169 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
170
171 if (res == (size_t)(-1))
172 return -1;
173 }
174 # endif
175 if (outsize != 0)
176 abort ();
177 }
178
179 return 0;
180 # undef tmpbufsize
181 }
182
183 char *
str_cd_iconv(const char * src,iconv_t cd)184 str_cd_iconv (const char *src, iconv_t cd)
185 {
186 /* For most encodings, a trailing NUL byte in the input will be converted
187 to a trailing NUL byte in the output. But not for UTF-7. So that this
188 function is usable for UTF-7, we have to exclude the NUL byte from the
189 conversion and add it by hand afterwards. */
190 # if PROBABLY_SLOWER
191
192 char *result = NULL;
193 size_t length;
194 int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
195 char *final_result;
196
197 if (retval < 0)
198 {
199 if (result != NULL)
200 {
201 int saved_errno = errno;
202 free (result);
203 errno = saved_errno;
204 }
205 return NULL;
206 }
207
208 /* Add the terminating NUL byte. */
209 final_result =
210 (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
211 if (final_result == NULL)
212 {
213 if (result != NULL)
214 free (result);
215 errno = ENOMEM;
216 return NULL;
217 }
218 final_result[length] = '\0';
219
220 return final_result;
221
222 # else
223
224 char *result;
225 size_t result_size;
226 size_t length;
227 const char *inptr = src;
228 size_t inbytes_remaining = strlen (src);
229
230 /* Make a guess for the worst-case output size, in order to avoid a
231 realloc. It's OK if the guess is wrong as long as it is not zero and
232 doesn't lead to an integer overflow. */
233 result_size = inbytes_remaining;
234 {
235 size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
236 if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
237 result_size *= MB_LEN_MAX;
238 }
239 result_size += 1; /* for the terminating NUL */
240
241 result = (char *) malloc (result_size);
242 if (result == NULL)
243 {
244 errno = ENOMEM;
245 return NULL;
246 }
247
248 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
249 # if defined _LIBICONV_VERSION \
250 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
251 /* Set to the initial state. */
252 iconv (cd, NULL, NULL, NULL, NULL);
253 # endif
254
255 /* Do the conversion. */
256 {
257 char *outptr = result;
258 size_t outbytes_remaining = result_size - 1;
259
260 for (;;)
261 {
262 /* Here inptr + inbytes_remaining = src + strlen (src),
263 outptr + outbytes_remaining = result + result_size - 1. */
264 size_t res = iconv (cd,
265 (ICONV_CONST char **) &inptr, &inbytes_remaining,
266 &outptr, &outbytes_remaining);
267
268 if (res == (size_t)(-1))
269 {
270 if (errno == EINVAL)
271 break;
272 else if (errno == E2BIG)
273 {
274 size_t used = outptr - result;
275 size_t newsize = result_size * 2;
276 char *newresult;
277
278 if (!(newsize > result_size))
279 {
280 errno = ENOMEM;
281 goto failed;
282 }
283 newresult = (char *) realloc (result, newsize);
284 if (newresult == NULL)
285 {
286 errno = ENOMEM;
287 goto failed;
288 }
289 result = newresult;
290 result_size = newsize;
291 outptr = result + used;
292 outbytes_remaining = result_size - 1 - used;
293 }
294 else
295 goto failed;
296 }
297 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
298 /* Irix iconv() inserts a NUL byte if it cannot convert.
299 NetBSD iconv() inserts a question mark if it cannot convert.
300 Only GNU libiconv and GNU libc are known to prefer to fail rather
301 than doing a lossy conversion. */
302 else if (res > 0)
303 {
304 errno = EILSEQ;
305 goto failed;
306 }
307 # endif
308 else
309 break;
310 }
311 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
312 # if defined _LIBICONV_VERSION \
313 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
314 for (;;)
315 {
316 /* Here outptr + outbytes_remaining = result + result_size - 1. */
317 size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
318
319 if (res == (size_t)(-1))
320 {
321 if (errno == E2BIG)
322 {
323 size_t used = outptr - result;
324 size_t newsize = result_size * 2;
325 char *newresult;
326
327 if (!(newsize > result_size))
328 {
329 errno = ENOMEM;
330 goto failed;
331 }
332 newresult = (char *) realloc (result, newsize);
333 if (newresult == NULL)
334 {
335 errno = ENOMEM;
336 goto failed;
337 }
338 result = newresult;
339 result_size = newsize;
340 outptr = result + used;
341 outbytes_remaining = result_size - 1 - used;
342 }
343 else
344 goto failed;
345 }
346 else
347 break;
348 }
349 # endif
350
351 /* Add the terminating NUL byte. */
352 *outptr++ = '\0';
353
354 length = outptr - result;
355 }
356
357 /* Give away unused memory. */
358 if (length < result_size)
359 {
360 char *smaller_result = (char *) realloc (result, length);
361
362 if (smaller_result != NULL)
363 result = smaller_result;
364 }
365
366 return result;
367
368 failed:
369 {
370 int saved_errno = errno;
371 free (result);
372 errno = saved_errno;
373 return NULL;
374 }
375
376 # endif
377 }
378
379 #endif
380
381 char *
str_iconv(const char * src,const char * from_codeset,const char * to_codeset)382 str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
383 {
384 if (c_strcasecmp (from_codeset, to_codeset) == 0)
385 return strdup (src);
386 else
387 {
388 #if HAVE_ICONV
389 iconv_t cd;
390 char *result;
391
392 /* Avoid glibc-2.1 bug with EUC-KR. */
393 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
394 if (c_strcasecmp (from_codeset, "EUC-KR") == 0
395 || c_strcasecmp (to_codeset, "EUC-KR") == 0)
396 {
397 errno = EINVAL;
398 return NULL;
399 }
400 # endif
401 cd = iconv_open (to_codeset, from_codeset);
402 if (cd == (iconv_t) -1)
403 return NULL;
404
405 result = str_cd_iconv (src, cd);
406
407 if (result == NULL)
408 {
409 /* Close cd, but preserve the errno from str_cd_iconv. */
410 int saved_errno = errno;
411 iconv_close (cd);
412 errno = saved_errno;
413 }
414 else
415 {
416 if (iconv_close (cd) < 0)
417 {
418 /* Return NULL, but free the allocated memory, and while doing
419 that, preserve the errno from iconv_close. */
420 int saved_errno = errno;
421 free (result);
422 errno = saved_errno;
423 return NULL;
424 }
425 }
426 return result;
427 #else
428 /* This is a different error code than if iconv_open existed but didn't
429 support from_codeset and to_codeset, so that the caller can emit
430 an error message such as
431 "iconv() is not supported. Installing GNU libiconv and
432 then reinstalling this package would fix this." */
433 errno = ENOSYS;
434 return NULL;
435 #endif
436 }
437 }
438