1 /* Charset conversion.
2    Copyright (C) 2001-2006 Free Software Foundation, Inc.
3    Written by Bruno Haible and Simon Josefsson.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #include <config.h>
20 
21 /* Specification.  */
22 #include "striconv.h"
23 
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #if HAVE_ICONV
29 # include <iconv.h>
30 /* Get MB_LEN_MAX, CHAR_BIT.  */
31 # include <limits.h>
32 #endif
33 
34 #include "strdup.h"
35 #include "c-strcase.h"
36 
37 #ifndef SIZE_MAX
38 # define SIZE_MAX ((size_t) -1)
39 #endif
40 
41 
42 #if HAVE_ICONV
43 
44 int
mem_cd_iconv(const char * src,size_t srclen,iconv_t cd,char ** resultp,size_t * lengthp)45 mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
46 	      char **resultp, size_t *lengthp)
47 {
48 # define tmpbufsize 4096
49   size_t length;
50   char *result;
51 
52   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
53 # if defined _LIBICONV_VERSION \
54     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
55   /* Set to the initial state.  */
56   iconv (cd, NULL, NULL, NULL, NULL);
57 # endif
58 
59   /* Determine the length we need.  */
60   {
61     size_t count = 0;
62     char tmpbuf[tmpbufsize];
63     const char *inptr = src;
64     size_t insize = srclen;
65 
66     while (insize > 0)
67       {
68 	char *outptr = tmpbuf;
69 	size_t outsize = tmpbufsize;
70 	size_t res = iconv (cd,
71 			    (ICONV_CONST char **) &inptr, &insize,
72 			    &outptr, &outsize);
73 
74 	if (res == (size_t)(-1))
75 	  {
76 	    if (errno == E2BIG)
77 	      ;
78 	    else if (errno == EINVAL)
79 	      break;
80 	    else
81 	      return -1;
82 	  }
83 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
84 	/* Irix iconv() inserts a NUL byte if it cannot convert.
85 	   NetBSD iconv() inserts a question mark if it cannot convert.
86 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
87 	   than doing a lossy conversion.  */
88 	else if (res > 0)
89 	  {
90 	    errno = EILSEQ;
91 	    return -1;
92 	  }
93 # endif
94 	count += outptr - tmpbuf;
95       }
96     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
97 # if defined _LIBICONV_VERSION \
98     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
99     {
100       char *outptr = tmpbuf;
101       size_t outsize = tmpbufsize;
102       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
103 
104       if (res == (size_t)(-1))
105 	return -1;
106       count += outptr - tmpbuf;
107     }
108 # endif
109     length = count;
110   }
111 
112   if (length == 0)
113     {
114       *lengthp = 0;
115       return 0;
116     }
117   result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
118   if (result == NULL)
119     {
120       errno = ENOMEM;
121       return -1;
122     }
123   *resultp = result;
124   *lengthp = length;
125 
126   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
127 # if defined _LIBICONV_VERSION \
128     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
129   /* Return to the initial state.  */
130   iconv (cd, NULL, NULL, NULL, NULL);
131 # endif
132 
133   /* Do the conversion for real.  */
134   {
135     const char *inptr = src;
136     size_t insize = srclen;
137     char *outptr = result;
138     size_t outsize = length;
139 
140     while (insize > 0)
141       {
142 	size_t res = iconv (cd,
143 			    (ICONV_CONST char **) &inptr, &insize,
144 			    &outptr, &outsize);
145 
146 	if (res == (size_t)(-1))
147 	  {
148 	    if (errno == EINVAL)
149 	      break;
150 	    else
151 	      return -1;
152 	  }
153 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
154 	/* Irix iconv() inserts a NUL byte if it cannot convert.
155 	   NetBSD iconv() inserts a question mark if it cannot convert.
156 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
157 	   than doing a lossy conversion.  */
158 	else if (res > 0)
159 	  {
160 	    errno = EILSEQ;
161 	    return -1;
162 	  }
163 # endif
164       }
165     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
166 # if defined _LIBICONV_VERSION \
167     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
168     {
169       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
170 
171       if (res == (size_t)(-1))
172 	return -1;
173     }
174 # endif
175     if (outsize != 0)
176       abort ();
177   }
178 
179   return 0;
180 # undef tmpbufsize
181 }
182 
183 char *
str_cd_iconv(const char * src,iconv_t cd)184 str_cd_iconv (const char *src, iconv_t cd)
185 {
186   /* For most encodings, a trailing NUL byte in the input will be converted
187      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
188      function is usable for UTF-7, we have to exclude the NUL byte from the
189      conversion and add it by hand afterwards.  */
190 # if PROBABLY_SLOWER
191 
192   char *result = NULL;
193   size_t length;
194   int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
195   char *final_result;
196 
197   if (retval < 0)
198     {
199       if (result != NULL)
200 	{
201 	  int saved_errno = errno;
202 	  free (result);
203 	  errno = saved_errno;
204 	}
205       return NULL;
206     }
207 
208   /* Add the terminating NUL byte.  */
209   final_result =
210     (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
211   if (final_result == NULL)
212     {
213       if (result != NULL)
214 	free (result);
215       errno = ENOMEM;
216       return NULL;
217     }
218   final_result[length] = '\0';
219 
220   return final_result;
221 
222 # else
223 
224   char *result;
225   size_t result_size;
226   size_t length;
227   const char *inptr = src;
228   size_t inbytes_remaining = strlen (src);
229 
230   /* Make a guess for the worst-case output size, in order to avoid a
231      realloc.  It's OK if the guess is wrong as long as it is not zero and
232      doesn't lead to an integer overflow.  */
233   result_size = inbytes_remaining;
234   {
235     size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
236     if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
237       result_size *= MB_LEN_MAX;
238   }
239   result_size += 1; /* for the terminating NUL */
240 
241   result = (char *) malloc (result_size);
242   if (result == NULL)
243     {
244       errno = ENOMEM;
245       return NULL;
246     }
247 
248   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
249 # if defined _LIBICONV_VERSION \
250     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
251   /* Set to the initial state.  */
252   iconv (cd, NULL, NULL, NULL, NULL);
253 # endif
254 
255   /* Do the conversion.  */
256   {
257     char *outptr = result;
258     size_t outbytes_remaining = result_size - 1;
259 
260     for (;;)
261       {
262 	/* Here inptr + inbytes_remaining = src + strlen (src),
263 		outptr + outbytes_remaining = result + result_size - 1.  */
264 	size_t res = iconv (cd,
265 			    (ICONV_CONST char **) &inptr, &inbytes_remaining,
266 			    &outptr, &outbytes_remaining);
267 
268 	if (res == (size_t)(-1))
269 	  {
270 	    if (errno == EINVAL)
271 	      break;
272 	    else if (errno == E2BIG)
273 	      {
274 		size_t used = outptr - result;
275 		size_t newsize = result_size * 2;
276 		char *newresult;
277 
278 		if (!(newsize > result_size))
279 		  {
280 		    errno = ENOMEM;
281 		    goto failed;
282 		  }
283 		newresult = (char *) realloc (result, newsize);
284 		if (newresult == NULL)
285 		  {
286 		    errno = ENOMEM;
287 		    goto failed;
288 		  }
289 		result = newresult;
290 		result_size = newsize;
291 		outptr = result + used;
292 		outbytes_remaining = result_size - 1 - used;
293 	      }
294 	    else
295 	      goto failed;
296 	  }
297 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
298 	/* Irix iconv() inserts a NUL byte if it cannot convert.
299 	   NetBSD iconv() inserts a question mark if it cannot convert.
300 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
301 	   than doing a lossy conversion.  */
302 	else if (res > 0)
303 	  {
304 	    errno = EILSEQ;
305 	    goto failed;
306 	  }
307 # endif
308 	else
309 	  break;
310       }
311     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
312 # if defined _LIBICONV_VERSION \
313     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
314     for (;;)
315       {
316 	/* Here outptr + outbytes_remaining = result + result_size - 1.  */
317 	size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
318 
319 	if (res == (size_t)(-1))
320 	  {
321 	    if (errno == E2BIG)
322 	      {
323 		size_t used = outptr - result;
324 		size_t newsize = result_size * 2;
325 		char *newresult;
326 
327 		if (!(newsize > result_size))
328 		  {
329 		    errno = ENOMEM;
330 		    goto failed;
331 		  }
332 		newresult = (char *) realloc (result, newsize);
333 		if (newresult == NULL)
334 		  {
335 		    errno = ENOMEM;
336 		    goto failed;
337 		  }
338 		result = newresult;
339 		result_size = newsize;
340 		outptr = result + used;
341 		outbytes_remaining = result_size - 1 - used;
342 	      }
343 	    else
344 	      goto failed;
345 	  }
346 	else
347 	  break;
348       }
349 # endif
350 
351     /* Add the terminating NUL byte.  */
352     *outptr++ = '\0';
353 
354     length = outptr - result;
355   }
356 
357   /* Give away unused memory.  */
358   if (length < result_size)
359     {
360       char *smaller_result = (char *) realloc (result, length);
361 
362       if (smaller_result != NULL)
363 	result = smaller_result;
364     }
365 
366   return result;
367 
368  failed:
369   {
370     int saved_errno = errno;
371     free (result);
372     errno = saved_errno;
373     return NULL;
374   }
375 
376 # endif
377 }
378 
379 #endif
380 
381 char *
str_iconv(const char * src,const char * from_codeset,const char * to_codeset)382 str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
383 {
384   if (c_strcasecmp (from_codeset, to_codeset) == 0)
385     return strdup (src);
386   else
387     {
388 #if HAVE_ICONV
389       iconv_t cd;
390       char *result;
391 
392       /* Avoid glibc-2.1 bug with EUC-KR.  */
393 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
394       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
395 	  || c_strcasecmp (to_codeset, "EUC-KR") == 0)
396 	{
397 	  errno = EINVAL;
398 	  return NULL;
399 	}
400 # endif
401       cd = iconv_open (to_codeset, from_codeset);
402       if (cd == (iconv_t) -1)
403 	return NULL;
404 
405       result = str_cd_iconv (src, cd);
406 
407       if (result == NULL)
408 	{
409 	  /* Close cd, but preserve the errno from str_cd_iconv.  */
410 	  int saved_errno = errno;
411 	  iconv_close (cd);
412 	  errno = saved_errno;
413 	}
414       else
415 	{
416 	  if (iconv_close (cd) < 0)
417 	    {
418 	      /* Return NULL, but free the allocated memory, and while doing
419 		 that, preserve the errno from iconv_close.  */
420 	      int saved_errno = errno;
421 	      free (result);
422 	      errno = saved_errno;
423 	      return NULL;
424 	    }
425 	}
426       return result;
427 #else
428       /* This is a different error code than if iconv_open existed but didn't
429 	 support from_codeset and to_codeset, so that the caller can emit
430 	 an error message such as
431 	   "iconv() is not supported. Installing GNU libiconv and
432 	    then reinstalling this package would fix this."  */
433       errno = ENOSYS;
434       return NULL;
435 #endif
436     }
437 }
438