1 /* -*- buffer-read-only: t -*- vi: set ro: */ 2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 3 /* Charset conversion. 4 Copyright (C) 2001-2007, 2010-2011 Free Software Foundation, Inc. 5 Written by Bruno Haible and Simon Josefsson. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software Foundation, 19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 20 21 #include <config.h> 22 23 /* Specification. */ 24 #include "striconv.h" 25 26 #include <errno.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #if HAVE_ICONV 31 # include <iconv.h> 32 /* Get MB_LEN_MAX, CHAR_BIT. */ 33 # include <limits.h> 34 #endif 35 36 #include "c-strcase.h" 37 38 #ifndef SIZE_MAX 39 # define SIZE_MAX ((size_t) -1) 40 #endif 41 42 43 #if HAVE_ICONV 44 45 int 46 mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, 47 char **resultp, size_t *lengthp) 48 { 49 # define tmpbufsize 4096 50 size_t length; 51 char *result; 52 53 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 54 # if defined _LIBICONV_VERSION \ 55 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 56 || defined __sun) 57 /* Set to the initial state. */ 58 iconv (cd, NULL, NULL, NULL, NULL); 59 # endif 60 61 /* Determine the length we need. */ 62 { 63 size_t count = 0; 64 /* The alignment is needed when converting e.g. to glibc's WCHAR_T or 65 libiconv's UCS-4-INTERNAL encoding. */ 66 union { unsigned int align; char buf[tmpbufsize]; } tmp; 67 # define tmpbuf tmp.buf 68 const char *inptr = src; 69 size_t insize = srclen; 70 71 while (insize > 0) 72 { 73 char *outptr = tmpbuf; 74 size_t outsize = tmpbufsize; 75 size_t res = iconv (cd, 76 (ICONV_CONST char **) &inptr, &insize, 77 &outptr, &outsize); 78 79 if (res == (size_t)(-1)) 80 { 81 if (errno == E2BIG) 82 ; 83 else if (errno == EINVAL) 84 break; 85 else 86 return -1; 87 } 88 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) 89 /* Irix iconv() inserts a NUL byte if it cannot convert. 90 NetBSD iconv() inserts a question mark if it cannot convert. 91 Only GNU libiconv and GNU libc are known to prefer to fail rather 92 than doing a lossy conversion. */ 93 else if (res > 0) 94 { 95 errno = EILSEQ; 96 return -1; 97 } 98 # endif 99 count += outptr - tmpbuf; 100 } 101 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 102 # if defined _LIBICONV_VERSION \ 103 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 104 || defined __sun) 105 { 106 char *outptr = tmpbuf; 107 size_t outsize = tmpbufsize; 108 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); 109 110 if (res == (size_t)(-1)) 111 return -1; 112 count += outptr - tmpbuf; 113 } 114 # endif 115 length = count; 116 # undef tmpbuf 117 } 118 119 if (length == 0) 120 { 121 *lengthp = 0; 122 return 0; 123 } 124 if (*resultp != NULL && *lengthp >= length) 125 result = *resultp; 126 else 127 { 128 result = (char *) malloc (length); 129 if (result == NULL) 130 { 131 errno = ENOMEM; 132 return -1; 133 } 134 } 135 136 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 137 # if defined _LIBICONV_VERSION \ 138 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 139 || defined __sun) 140 /* Return to the initial state. */ 141 iconv (cd, NULL, NULL, NULL, NULL); 142 # endif 143 144 /* Do the conversion for real. */ 145 { 146 const char *inptr = src; 147 size_t insize = srclen; 148 char *outptr = result; 149 size_t outsize = length; 150 151 while (insize > 0) 152 { 153 size_t res = iconv (cd, 154 (ICONV_CONST char **) &inptr, &insize, 155 &outptr, &outsize); 156 157 if (res == (size_t)(-1)) 158 { 159 if (errno == EINVAL) 160 break; 161 else 162 goto fail; 163 } 164 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) 165 /* Irix iconv() inserts a NUL byte if it cannot convert. 166 NetBSD iconv() inserts a question mark if it cannot convert. 167 Only GNU libiconv and GNU libc are known to prefer to fail rather 168 than doing a lossy conversion. */ 169 else if (res > 0) 170 { 171 errno = EILSEQ; 172 goto fail; 173 } 174 # endif 175 } 176 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 177 # if defined _LIBICONV_VERSION \ 178 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 179 || defined __sun) 180 { 181 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); 182 183 if (res == (size_t)(-1)) 184 goto fail; 185 } 186 # endif 187 if (outsize != 0) 188 abort (); 189 } 190 191 *resultp = result; 192 *lengthp = length; 193 194 return 0; 195 196 fail: 197 { 198 if (result != *resultp) 199 { 200 int saved_errno = errno; 201 free (result); 202 errno = saved_errno; 203 } 204 return -1; 205 } 206 # undef tmpbufsize 207 } 208 209 char * 210 str_cd_iconv (const char *src, iconv_t cd) 211 { 212 /* For most encodings, a trailing NUL byte in the input will be converted 213 to a trailing NUL byte in the output. But not for UTF-7. So that this 214 function is usable for UTF-7, we have to exclude the NUL byte from the 215 conversion and add it by hand afterwards. */ 216 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) 217 /* Irix iconv() inserts a NUL byte if it cannot convert. 218 NetBSD iconv() inserts a question mark if it cannot convert. 219 Only GNU libiconv and GNU libc are known to prefer to fail rather 220 than doing a lossy conversion. For other iconv() implementations, 221 we have to look at the number of irreversible conversions returned; 222 but this information is lost when iconv() returns for an E2BIG reason. 223 Therefore we cannot use the second, faster algorithm. */ 224 225 char *result = NULL; 226 size_t length = 0; 227 int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); 228 char *final_result; 229 230 if (retval < 0) 231 { 232 if (result != NULL) 233 abort (); 234 return NULL; 235 } 236 237 /* Add the terminating NUL byte. */ 238 final_result = 239 (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); 240 if (final_result == NULL) 241 { 242 free (result); 243 errno = ENOMEM; 244 return NULL; 245 } 246 final_result[length] = '\0'; 247 248 return final_result; 249 250 # else 251 /* This algorithm is likely faster than the one above. But it may produce 252 iconv() returns for an E2BIG reason, when the output size guess is too 253 small. Therefore it can only be used when we don't need the number of 254 irreversible conversions performed. */ 255 char *result; 256 size_t result_size; 257 size_t length; 258 const char *inptr = src; 259 size_t inbytes_remaining = strlen (src); 260 261 /* Make a guess for the worst-case output size, in order to avoid a 262 realloc. It's OK if the guess is wrong as long as it is not zero and 263 doesn't lead to an integer overflow. */ 264 result_size = inbytes_remaining; 265 { 266 size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); 267 if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) 268 result_size *= MB_LEN_MAX; 269 } 270 result_size += 1; /* for the terminating NUL */ 271 272 result = (char *) malloc (result_size); 273 if (result == NULL) 274 { 275 errno = ENOMEM; 276 return NULL; 277 } 278 279 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 280 # if defined _LIBICONV_VERSION \ 281 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 282 || defined __sun) 283 /* Set to the initial state. */ 284 iconv (cd, NULL, NULL, NULL, NULL); 285 # endif 286 287 /* Do the conversion. */ 288 { 289 char *outptr = result; 290 size_t outbytes_remaining = result_size - 1; 291 292 for (;;) 293 { 294 /* Here inptr + inbytes_remaining = src + strlen (src), 295 outptr + outbytes_remaining = result + result_size - 1. */ 296 size_t res = iconv (cd, 297 (ICONV_CONST char **) &inptr, &inbytes_remaining, 298 &outptr, &outbytes_remaining); 299 300 if (res == (size_t)(-1)) 301 { 302 if (errno == EINVAL) 303 break; 304 else if (errno == E2BIG) 305 { 306 size_t used = outptr - result; 307 size_t newsize = result_size * 2; 308 char *newresult; 309 310 if (!(newsize > result_size)) 311 { 312 errno = ENOMEM; 313 goto failed; 314 } 315 newresult = (char *) realloc (result, newsize); 316 if (newresult == NULL) 317 { 318 errno = ENOMEM; 319 goto failed; 320 } 321 result = newresult; 322 result_size = newsize; 323 outptr = result + used; 324 outbytes_remaining = result_size - 1 - used; 325 } 326 else 327 goto failed; 328 } 329 else 330 break; 331 } 332 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 333 # if defined _LIBICONV_VERSION \ 334 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 335 || defined __sun) 336 for (;;) 337 { 338 /* Here outptr + outbytes_remaining = result + result_size - 1. */ 339 size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); 340 341 if (res == (size_t)(-1)) 342 { 343 if (errno == E2BIG) 344 { 345 size_t used = outptr - result; 346 size_t newsize = result_size * 2; 347 char *newresult; 348 349 if (!(newsize > result_size)) 350 { 351 errno = ENOMEM; 352 goto failed; 353 } 354 newresult = (char *) realloc (result, newsize); 355 if (newresult == NULL) 356 { 357 errno = ENOMEM; 358 goto failed; 359 } 360 result = newresult; 361 result_size = newsize; 362 outptr = result + used; 363 outbytes_remaining = result_size - 1 - used; 364 } 365 else 366 goto failed; 367 } 368 else 369 break; 370 } 371 # endif 372 373 /* Add the terminating NUL byte. */ 374 *outptr++ = '\0'; 375 376 length = outptr - result; 377 } 378 379 /* Give away unused memory. */ 380 if (length < result_size) 381 { 382 char *smaller_result = (char *) realloc (result, length); 383 384 if (smaller_result != NULL) 385 result = smaller_result; 386 } 387 388 return result; 389 390 failed: 391 { 392 int saved_errno = errno; 393 free (result); 394 errno = saved_errno; 395 return NULL; 396 } 397 398 # endif 399 } 400 401 #endif 402 403 char * 404 str_iconv (const char *src, const char *from_codeset, const char *to_codeset) 405 { 406 if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) 407 { 408 char *result = strdup (src); 409 410 if (result == NULL) 411 errno = ENOMEM; 412 return result; 413 } 414 else 415 { 416 #if HAVE_ICONV 417 iconv_t cd; 418 char *result; 419 420 /* Avoid glibc-2.1 bug with EUC-KR. */ 421 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ 422 && !defined _LIBICONV_VERSION 423 if (c_strcasecmp (from_codeset, "EUC-KR") == 0 424 || c_strcasecmp (to_codeset, "EUC-KR") == 0) 425 { 426 errno = EINVAL; 427 return NULL; 428 } 429 # endif 430 cd = iconv_open (to_codeset, from_codeset); 431 if (cd == (iconv_t) -1) 432 return NULL; 433 434 result = str_cd_iconv (src, cd); 435 436 if (result == NULL) 437 { 438 /* Close cd, but preserve the errno from str_cd_iconv. */ 439 int saved_errno = errno; 440 iconv_close (cd); 441 errno = saved_errno; 442 } 443 else 444 { 445 if (iconv_close (cd) < 0) 446 { 447 /* Return NULL, but free the allocated memory, and while doing 448 that, preserve the errno from iconv_close. */ 449 int saved_errno = errno; 450 free (result); 451 errno = saved_errno; 452 return NULL; 453 } 454 } 455 return result; 456 #else 457 /* This is a different error code than if iconv_open existed but didn't 458 support from_codeset and to_codeset, so that the caller can emit 459 an error message such as 460 "iconv() is not supported. Installing GNU libiconv and 461 then reinstalling this package would fix this." */ 462 errno = ENOSYS; 463 return NULL; 464 #endif 465 } 466 } 467