1 /* -*- buffer-read-only: t -*- vi: set ro: */ 2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 3 /* Charset conversion. 4 Copyright (C) 2001-2007, 2009-2010 Free Software Foundation, Inc. 5 Written by Bruno Haible and Simon Josefsson. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software Foundation, 19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 20 21 #include <config.h> 22 23 /* Specification. */ 24 #include "striconv.h" 25 26 #include <errno.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #if HAVE_ICONV 31 # include <iconv.h> 32 /* Get MB_LEN_MAX, CHAR_BIT. */ 33 # include <limits.h> 34 #endif 35 36 #include "c-strcase.h" 37 38 #ifndef SIZE_MAX 39 # define SIZE_MAX ((size_t) -1) 40 #endif 41 42 43 #if HAVE_ICONV 44 45 int 46 mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, 47 char **resultp, size_t *lengthp) 48 { 49 # define tmpbufsize 4096 50 size_t length; 51 char *result; 52 53 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 54 # if defined _LIBICONV_VERSION \ 55 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 56 /* Set to the initial state. */ 57 iconv (cd, NULL, NULL, NULL, NULL); 58 # endif 59 60 /* Determine the length we need. */ 61 { 62 size_t count = 0; 63 /* The alignment is needed when converting e.g. to glibc's WCHAR_T or 64 libiconv's UCS-4-INTERNAL encoding. */ 65 union { unsigned int align; char buf[tmpbufsize]; } tmp; 66 # define tmpbuf tmp.buf 67 const char *inptr = src; 68 size_t insize = srclen; 69 70 while (insize > 0) 71 { 72 char *outptr = tmpbuf; 73 size_t outsize = tmpbufsize; 74 size_t res = iconv (cd, 75 (ICONV_CONST char **) &inptr, &insize, 76 &outptr, &outsize); 77 78 if (res == (size_t)(-1)) 79 { 80 if (errno == E2BIG) 81 ; 82 else if (errno == EINVAL) 83 break; 84 else 85 return -1; 86 } 87 # if !defined _LIBICONV_VERSION && !defined __GLIBC__ 88 /* Irix iconv() inserts a NUL byte if it cannot convert. 89 NetBSD iconv() inserts a question mark if it cannot convert. 90 Only GNU libiconv and GNU libc are known to prefer to fail rather 91 than doing a lossy conversion. */ 92 else if (res > 0) 93 { 94 errno = EILSEQ; 95 return -1; 96 } 97 # endif 98 count += outptr - tmpbuf; 99 } 100 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 101 # if defined _LIBICONV_VERSION \ 102 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 103 { 104 char *outptr = tmpbuf; 105 size_t outsize = tmpbufsize; 106 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); 107 108 if (res == (size_t)(-1)) 109 return -1; 110 count += outptr - tmpbuf; 111 } 112 # endif 113 length = count; 114 # undef tmpbuf 115 } 116 117 if (length == 0) 118 { 119 *lengthp = 0; 120 return 0; 121 } 122 if (*resultp != NULL && *lengthp >= length) 123 result = *resultp; 124 else 125 { 126 result = (char *) malloc (length); 127 if (result == NULL) 128 { 129 errno = ENOMEM; 130 return -1; 131 } 132 } 133 134 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 135 # if defined _LIBICONV_VERSION \ 136 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 137 /* Return to the initial state. */ 138 iconv (cd, NULL, NULL, NULL, NULL); 139 # endif 140 141 /* Do the conversion for real. */ 142 { 143 const char *inptr = src; 144 size_t insize = srclen; 145 char *outptr = result; 146 size_t outsize = length; 147 148 while (insize > 0) 149 { 150 size_t res = iconv (cd, 151 (ICONV_CONST char **) &inptr, &insize, 152 &outptr, &outsize); 153 154 if (res == (size_t)(-1)) 155 { 156 if (errno == EINVAL) 157 break; 158 else 159 goto fail; 160 } 161 # if !defined _LIBICONV_VERSION && !defined __GLIBC__ 162 /* Irix iconv() inserts a NUL byte if it cannot convert. 163 NetBSD iconv() inserts a question mark if it cannot convert. 164 Only GNU libiconv and GNU libc are known to prefer to fail rather 165 than doing a lossy conversion. */ 166 else if (res > 0) 167 { 168 errno = EILSEQ; 169 goto fail; 170 } 171 # endif 172 } 173 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 174 # if defined _LIBICONV_VERSION \ 175 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 176 { 177 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); 178 179 if (res == (size_t)(-1)) 180 goto fail; 181 } 182 # endif 183 if (outsize != 0) 184 abort (); 185 } 186 187 *resultp = result; 188 *lengthp = length; 189 190 return 0; 191 192 fail: 193 { 194 if (result != *resultp) 195 { 196 int saved_errno = errno; 197 free (result); 198 errno = saved_errno; 199 } 200 return -1; 201 } 202 # undef tmpbufsize 203 } 204 205 char * 206 str_cd_iconv (const char *src, iconv_t cd) 207 { 208 /* For most encodings, a trailing NUL byte in the input will be converted 209 to a trailing NUL byte in the output. But not for UTF-7. So that this 210 function is usable for UTF-7, we have to exclude the NUL byte from the 211 conversion and add it by hand afterwards. */ 212 # if !defined _LIBICONV_VERSION && !defined __GLIBC__ 213 /* Irix iconv() inserts a NUL byte if it cannot convert. 214 NetBSD iconv() inserts a question mark if it cannot convert. 215 Only GNU libiconv and GNU libc are known to prefer to fail rather 216 than doing a lossy conversion. For other iconv() implementations, 217 we have to look at the number of irreversible conversions returned; 218 but this information is lost when iconv() returns for an E2BIG reason. 219 Therefore we cannot use the second, faster algorithm. */ 220 221 char *result = NULL; 222 size_t length = 0; 223 int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); 224 char *final_result; 225 226 if (retval < 0) 227 { 228 if (result != NULL) 229 abort (); 230 return NULL; 231 } 232 233 /* Add the terminating NUL byte. */ 234 final_result = 235 (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); 236 if (final_result == NULL) 237 { 238 free (result); 239 errno = ENOMEM; 240 return NULL; 241 } 242 final_result[length] = '\0'; 243 244 return final_result; 245 246 # else 247 /* This algorithm is likely faster than the one above. But it may produce 248 iconv() returns for an E2BIG reason, when the output size guess is too 249 small. Therefore it can only be used when we don't need the number of 250 irreversible conversions performed. */ 251 char *result; 252 size_t result_size; 253 size_t length; 254 const char *inptr = src; 255 size_t inbytes_remaining = strlen (src); 256 257 /* Make a guess for the worst-case output size, in order to avoid a 258 realloc. It's OK if the guess is wrong as long as it is not zero and 259 doesn't lead to an integer overflow. */ 260 result_size = inbytes_remaining; 261 { 262 size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); 263 if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) 264 result_size *= MB_LEN_MAX; 265 } 266 result_size += 1; /* for the terminating NUL */ 267 268 result = (char *) malloc (result_size); 269 if (result == NULL) 270 { 271 errno = ENOMEM; 272 return NULL; 273 } 274 275 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 276 # if defined _LIBICONV_VERSION \ 277 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 278 /* Set to the initial state. */ 279 iconv (cd, NULL, NULL, NULL, NULL); 280 # endif 281 282 /* Do the conversion. */ 283 { 284 char *outptr = result; 285 size_t outbytes_remaining = result_size - 1; 286 287 for (;;) 288 { 289 /* Here inptr + inbytes_remaining = src + strlen (src), 290 outptr + outbytes_remaining = result + result_size - 1. */ 291 size_t res = iconv (cd, 292 (ICONV_CONST char **) &inptr, &inbytes_remaining, 293 &outptr, &outbytes_remaining); 294 295 if (res == (size_t)(-1)) 296 { 297 if (errno == EINVAL) 298 break; 299 else if (errno == E2BIG) 300 { 301 size_t used = outptr - result; 302 size_t newsize = result_size * 2; 303 char *newresult; 304 305 if (!(newsize > result_size)) 306 { 307 errno = ENOMEM; 308 goto failed; 309 } 310 newresult = (char *) realloc (result, newsize); 311 if (newresult == NULL) 312 { 313 errno = ENOMEM; 314 goto failed; 315 } 316 result = newresult; 317 result_size = newsize; 318 outptr = result + used; 319 outbytes_remaining = result_size - 1 - used; 320 } 321 else 322 goto failed; 323 } 324 else 325 break; 326 } 327 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 328 # if defined _LIBICONV_VERSION \ 329 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 330 for (;;) 331 { 332 /* Here outptr + outbytes_remaining = result + result_size - 1. */ 333 size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); 334 335 if (res == (size_t)(-1)) 336 { 337 if (errno == E2BIG) 338 { 339 size_t used = outptr - result; 340 size_t newsize = result_size * 2; 341 char *newresult; 342 343 if (!(newsize > result_size)) 344 { 345 errno = ENOMEM; 346 goto failed; 347 } 348 newresult = (char *) realloc (result, newsize); 349 if (newresult == NULL) 350 { 351 errno = ENOMEM; 352 goto failed; 353 } 354 result = newresult; 355 result_size = newsize; 356 outptr = result + used; 357 outbytes_remaining = result_size - 1 - used; 358 } 359 else 360 goto failed; 361 } 362 else 363 break; 364 } 365 # endif 366 367 /* Add the terminating NUL byte. */ 368 *outptr++ = '\0'; 369 370 length = outptr - result; 371 } 372 373 /* Give away unused memory. */ 374 if (length < result_size) 375 { 376 char *smaller_result = (char *) realloc (result, length); 377 378 if (smaller_result != NULL) 379 result = smaller_result; 380 } 381 382 return result; 383 384 failed: 385 { 386 int saved_errno = errno; 387 free (result); 388 errno = saved_errno; 389 return NULL; 390 } 391 392 # endif 393 } 394 395 #endif 396 397 char * 398 str_iconv (const char *src, const char *from_codeset, const char *to_codeset) 399 { 400 if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) 401 { 402 char *result = strdup (src); 403 404 if (result == NULL) 405 errno = ENOMEM; 406 return result; 407 } 408 else 409 { 410 #if HAVE_ICONV 411 iconv_t cd; 412 char *result; 413 414 /* Avoid glibc-2.1 bug with EUC-KR. */ 415 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 416 if (c_strcasecmp (from_codeset, "EUC-KR") == 0 417 || c_strcasecmp (to_codeset, "EUC-KR") == 0) 418 { 419 errno = EINVAL; 420 return NULL; 421 } 422 # endif 423 cd = iconv_open (to_codeset, from_codeset); 424 if (cd == (iconv_t) -1) 425 return NULL; 426 427 result = str_cd_iconv (src, cd); 428 429 if (result == NULL) 430 { 431 /* Close cd, but preserve the errno from str_cd_iconv. */ 432 int saved_errno = errno; 433 iconv_close (cd); 434 errno = saved_errno; 435 } 436 else 437 { 438 if (iconv_close (cd) < 0) 439 { 440 /* Return NULL, but free the allocated memory, and while doing 441 that, preserve the errno from iconv_close. */ 442 int saved_errno = errno; 443 free (result); 444 errno = saved_errno; 445 return NULL; 446 } 447 } 448 return result; 449 #else 450 /* This is a different error code than if iconv_open existed but didn't 451 support from_codeset and to_codeset, so that the caller can emit 452 an error message such as 453 "iconv() is not supported. Installing GNU libiconv and 454 then reinstalling this package would fix this." */ 455 errno = ENOSYS; 456 return NULL; 457 #endif 458 } 459 } 460