1 /* $NetBSD: converter.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */ 2 3 #ifndef lint 4 static char *rcsid = "Id: converter.c,v 1.1 2003/06/04 00:25:51 marka Exp "; 5 #endif 6 7 /* 8 * Copyright (c) 2000,2002 Japan Network Information Center. 9 * All rights reserved. 10 * 11 * By using this file, you agree to the terms and conditions set forth bellow. 12 * 13 * LICENSE TERMS AND CONDITIONS 14 * 15 * The following License Terms and Conditions apply, unless a different 16 * license is obtained from Japan Network Information Center ("JPNIC"), 17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 18 * Chiyoda-ku, Tokyo 101-0047, Japan. 19 * 20 * 1. Use, Modification and Redistribution (including distribution of any 21 * modified or derived work) in source and/or binary forms is permitted 22 * under this License Terms and Conditions. 23 * 24 * 2. Redistribution of source code must retain the copyright notices as they 25 * appear in each source code file, this License Terms and Conditions. 26 * 27 * 3. Redistribution in binary form must reproduce the Copyright Notice, 28 * this License Terms and Conditions, in the documentation and/or other 29 * materials provided with the distribution. For the purposes of binary 30 * distribution the "Copyright Notice" refers to the following language: 31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 32 * 33 * 4. The name of JPNIC may not be used to endorse or promote products 34 * derived from this Software without specific prior written approval of 35 * JPNIC. 36 * 37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 48 */ 49 50 #include <config.h> 51 52 #include <stddef.h> 53 #include <stdlib.h> 54 #include <stdio.h> 55 #include <string.h> 56 #include <ctype.h> 57 #include <errno.h> 58 #ifndef WITHOUT_ICONV 59 #include <iconv.h> 60 #endif 61 62 #include <idn/result.h> 63 #include <idn/assert.h> 64 #include <idn/logmacro.h> 65 #include <idn/converter.h> 66 #include <idn/aliaslist.h> 67 #include <idn/strhash.h> 68 #include <idn/debug.h> 69 #include <idn/ucs4.h> 70 #include <idn/punycode.h> 71 #include <idn/race.h> 72 #include <idn/util.h> 73 74 #ifndef IDN_UTF8_ENCODING_NAME 75 #define IDN_UTF8_ENCODING_NAME "UTF-8" /* by IANA */ 76 #endif 77 #ifndef IDN_RACE_ENCODING_NAME 78 #define IDN_RACE_ENCODING_NAME "RACE" 79 #endif 80 #ifndef IDN_AMCACEZ_ENCODING_NAME 81 #define IDN_AMCACEZ_ENCODING_NAME "AMC-ACE-Z" 82 #endif 83 #ifndef IDN_PUNYCODE_ENCODING_NAME 84 #define IDN_PUNYCODE_ENCODING_NAME "Punycode" 85 #endif 86 87 #define MAX_RECURSE 20 88 89 #ifdef WIN32 90 91 #define IDNKEY_IDNKIT "Software\\JPNIC\\IDN" 92 #define IDNVAL_ALIASFILE "AliasFile" 93 94 #else /* WIN32 */ 95 96 #ifndef IDN_RESCONF_DIR 97 #define IDN_RESCONF_DIR "/etc" 98 #endif 99 #define IDN_ALIAS_FILE IDN_RESCONF_DIR "/idnalias.conf" 100 101 #endif /* WIN32 */ 102 103 typedef struct { 104 idn_converter_openproc_t openfromucs4; 105 idn_converter_openproc_t opentoucs4; 106 idn_converter_convfromucs4proc_t convfromucs4; 107 idn_converter_convtoucs4proc_t convtoucs4; 108 idn_converter_closeproc_t close; 109 int encoding_type; 110 } converter_ops_t; 111 112 struct idn_converter { 113 char *local_encoding_name; 114 converter_ops_t *ops; 115 int flags; 116 int opened_convfromucs4; 117 int opened_convtoucs4; 118 int reference_count; 119 void *private_data; 120 }; 121 122 static idn__strhash_t encoding_name_hash; 123 static idn__aliaslist_t encoding_alias_list; 124 125 static idn_result_t register_standard_encoding(void); 126 static idn_result_t roundtrip_check(idn_converter_t ctx, 127 const unsigned long *from, 128 const char *to); 129 130 static idn_result_t 131 converter_none_open(idn_converter_t ctx, void **privdata); 132 static idn_result_t 133 converter_none_close(idn_converter_t ctx, void *privdata); 134 static idn_result_t 135 converter_none_convfromucs4(idn_converter_t ctx, 136 void *privdata, 137 const unsigned long *from, 138 char *to, size_t tolen); 139 static idn_result_t 140 converter_none_convtoucs4(idn_converter_t ctx, 141 void *privdata, const char *from, 142 unsigned long *to, size_t tolen); 143 144 #ifndef WITHOUT_ICONV 145 static idn_result_t 146 converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata); 147 static idn_result_t 148 converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata); 149 static idn_result_t 150 converter_iconv_close(idn_converter_t ctx, void *privdata); 151 static idn_result_t 152 converter_iconv_convfromucs4(idn_converter_t ctx, 153 void *privdata, 154 const unsigned long *from, 155 char *to, size_t tolen); 156 static idn_result_t 157 converter_iconv_convtoucs4(idn_converter_t ctx, 158 void *privdata, 159 const char *from, 160 unsigned long *to, size_t tolen); 161 162 static idn_result_t 163 iconv_initialize_privdata(void **privdata); 164 static void 165 iconv_finalize_privdata(void *privdata); 166 167 static char * get_system_aliasfile(void); 168 static int file_exist(const char *filename); 169 170 #endif /* !WITHOUT_ICONV */ 171 172 #ifdef DEBUG 173 static idn_result_t 174 converter_uescape_convfromucs4(idn_converter_t ctx, 175 void *privdata, 176 const unsigned long *from, 177 char *to, size_t tolen); 178 static idn_result_t 179 converter_uescape_convtoucs4(idn_converter_t ctx, 180 void *privdata, 181 const char *from, 182 unsigned long *to, 183 size_t tolen); 184 #endif /* DEBUG */ 185 186 static converter_ops_t none_converter_ops = { 187 converter_none_open, 188 converter_none_open, 189 converter_none_convfromucs4, 190 converter_none_convtoucs4, 191 converter_none_close, 192 IDN_NONACE, 193 }; 194 195 #ifndef WITHOUT_ICONV 196 static converter_ops_t iconv_converter_ops = { 197 converter_iconv_openfromucs4, 198 converter_iconv_opentoucs4, 199 converter_iconv_convfromucs4, 200 converter_iconv_convtoucs4, 201 converter_iconv_close, 202 IDN_NONACE, 203 }; 204 #endif 205 206 /* 207 * Initialize. 208 */ 209 210 idn_result_t 211 idn_converter_initialize(void) { 212 idn_result_t r; 213 idn__strhash_t hash; 214 idn__aliaslist_t list; 215 #ifndef WITHOUT_ICONV 216 const char *fname; 217 #endif 218 219 TRACE(("idn_converter_initialize()\n")); 220 221 if (encoding_name_hash == NULL) { 222 if ((r = idn__strhash_create(&hash)) != idn_success) 223 goto ret; 224 encoding_name_hash = hash; 225 r = register_standard_encoding(); 226 } 227 if (encoding_alias_list == NULL) { 228 if ((r = idn__aliaslist_create(&list)) != idn_success) 229 goto ret; 230 encoding_alias_list = list; 231 #ifndef WITHOUT_ICONV 232 fname = get_system_aliasfile(); 233 if (fname != NULL && file_exist(fname)) 234 idn_converter_aliasfile(fname); 235 #endif 236 } 237 238 r = idn_success; 239 ret: 240 TRACE(("idn_converter_initialize(): %s\n", idn_result_tostring(r))); 241 return (r); 242 } 243 244 #ifndef WITHOUT_ICONV 245 static char * 246 get_system_aliasfile() { 247 #ifdef WIN32 248 static char alias_path[500]; /* a good longer than MAX_PATH */ 249 250 if (idn__util_getregistrystring(idn__util_hkey_localmachine, 251 IDNVAL_ALIASFILE, 252 alias_path, sizeof(alias_path))) { 253 return (alias_path); 254 } else { 255 return (NULL); 256 } 257 #else 258 return (IDN_ALIAS_FILE); 259 #endif 260 } 261 262 static int 263 file_exist(const char *filename) { 264 FILE *fp; 265 266 if ((fp = fopen(filename, "r")) == NULL) 267 return (0); 268 fclose(fp); 269 return (1); 270 } 271 #endif 272 273 idn_result_t 274 idn_converter_create(const char *name, idn_converter_t *ctxp, int flags) { 275 const char *realname; 276 idn_converter_t ctx; 277 idn_result_t r; 278 void *v; 279 280 assert(name != NULL && ctxp != NULL); 281 282 TRACE(("idn_converter_create(%s)\n", name)); 283 284 realname = idn_converter_getrealname(name); 285 #ifdef DEBUG 286 if (strcmp(name, realname) != 0) { 287 TRACE(("idn_converter_create: realname=%s\n", realname)); 288 } 289 #endif 290 291 *ctxp = NULL; 292 293 /* Allocate memory for a converter context and the name. */ 294 ctx = malloc(sizeof(struct idn_converter) + strlen(realname) + 1); 295 if (ctx == NULL) { 296 r = idn_nomemory; 297 goto ret; 298 } 299 300 ctx->local_encoding_name = (char *)(ctx + 1); 301 (void)strcpy(ctx->local_encoding_name, realname); 302 ctx->flags = flags; 303 ctx->reference_count = 1; 304 ctx->opened_convfromucs4 = 0; 305 ctx->opened_convtoucs4 = 0; 306 ctx->private_data = NULL; 307 308 assert(encoding_name_hash != NULL); 309 310 if (strcmp(realname, IDN_UTF8_ENCODING_NAME) == 0) { 311 /* No conversion needed */ 312 ctx->ops = &none_converter_ops; 313 } else if ((r = idn__strhash_get(encoding_name_hash, realname, &v)) 314 == idn_success) { 315 /* Special converter found */ 316 ctx->ops = (converter_ops_t *)v; 317 } else { 318 /* General case */ 319 #ifdef WITHOUT_ICONV 320 free(ctx); 321 *ctxp = NULL; 322 r = idn_invalid_name; 323 goto ret; 324 #else 325 ctx->ops = &iconv_converter_ops; 326 #endif 327 } 328 329 if ((flags & IDN_CONVERTER_DELAYEDOPEN) == 0) { 330 r = (ctx->ops->openfromucs4)(ctx, &(ctx->private_data)); 331 if (r != idn_success) { 332 WARNING(("idn_converter_create(): open failed " 333 "(ucs4->local)\n")); 334 free(ctx); 335 *ctxp = NULL; 336 goto ret; 337 } 338 ctx->opened_convfromucs4 = 1; 339 340 r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data)); 341 if (r != idn_success) { 342 WARNING(("idn_converter_create(): open failed " 343 "(local->ucs4)\n")); 344 free(ctx); 345 *ctxp = NULL; 346 goto ret; 347 } 348 ctx->opened_convtoucs4 = 1; 349 } 350 351 *ctxp = ctx; 352 r = idn_success; 353 ret: 354 TRACE(("idn_converter_create(): %s\n", idn_result_tostring(r))); 355 return (r); 356 } 357 358 void 359 idn_converter_destroy(idn_converter_t ctx) { 360 assert(ctx != NULL); 361 362 TRACE(("idn_converter_destroy(ctx=%s)\n", ctx->local_encoding_name)); 363 364 ctx->reference_count--; 365 if (ctx->reference_count <= 0) { 366 TRACE(("idn_converter_destroy(): the object is destroyed\n")); 367 (void)(*ctx->ops->close)(ctx, ctx->private_data); 368 free(ctx); 369 } else { 370 TRACE(("idn_converter_destroy(): " 371 "update reference count (%d->%d)\n", 372 ctx->reference_count + 1, ctx->reference_count)); 373 } 374 } 375 376 void 377 idn_converter_incrref(idn_converter_t ctx) { 378 assert(ctx != NULL); 379 380 TRACE(("idn_converter_incrref(ctx=%s)\n", ctx->local_encoding_name)); 381 TRACE(("idn_converter_incrref: update reference count (%d->%d)\n", 382 ctx->reference_count, ctx->reference_count + 1)); 383 384 ctx->reference_count++; 385 } 386 387 char * 388 idn_converter_localencoding(idn_converter_t ctx) { 389 assert(ctx != NULL); 390 TRACE(("idn_converter_localencoding(ctx=%s)\n", 391 ctx->local_encoding_name)); 392 return (ctx->local_encoding_name); 393 } 394 395 int 396 idn_converter_encodingtype(idn_converter_t ctx) { 397 int encoding_type; 398 399 assert(ctx != NULL); 400 TRACE(("idn_converter_encodingtype(ctx=%s)\n", 401 ctx->local_encoding_name)); 402 403 encoding_type = ctx->ops->encoding_type; 404 TRACE(("idn_converter_encodingtype(): %d\n", encoding_type)); 405 return (encoding_type); 406 } 407 408 int 409 idn_converter_isasciicompatible(idn_converter_t ctx) { 410 int iscompat; 411 412 assert(ctx != NULL); 413 TRACE(("idn_converter_isasciicompatible(ctx=%s)\n", 414 ctx->local_encoding_name)); 415 416 iscompat = (ctx->ops->encoding_type != IDN_NONACE); 417 TRACE(("idn_converter_isasciicompatible(): %d\n", iscompat)); 418 return (iscompat); 419 } 420 421 idn_result_t 422 idn_converter_convfromucs4(idn_converter_t ctx, const unsigned long *from, 423 char *to, size_t tolen) { 424 idn_result_t r; 425 426 assert(ctx != NULL && from != NULL && to != NULL); 427 428 TRACE(("idn_converter_convfromucs4(ctx=%s, from=\"%s\", tolen=%d)\n", 429 ctx->local_encoding_name, idn__debug_ucs4xstring(from, 50), 430 (int)tolen)); 431 432 if (!ctx->opened_convfromucs4) { 433 r = (*ctx->ops->openfromucs4)(ctx, &(ctx->private_data)); 434 if (r != idn_success) 435 goto ret; 436 ctx->opened_convfromucs4 = 1; 437 } 438 439 r = (*ctx->ops->convfromucs4)(ctx, ctx->private_data, from, to, tolen); 440 if (r != idn_success) 441 goto ret; 442 if ((ctx->flags & IDN_CONVERTER_RTCHECK) != 0) { 443 r = roundtrip_check(ctx, from, to); 444 if (r != idn_success) 445 goto ret; 446 } 447 448 r = idn_success; 449 ret: 450 if (r == idn_success) { 451 TRACE(("idn_converter_convfromucs4(): success (to=\"%s\")\n", 452 idn__debug_xstring(to, 50))); 453 } else { 454 TRACE(("idn_converter_convfromucs4(): %s\n", 455 idn_result_tostring(r))); 456 } 457 return (r); 458 } 459 460 idn_result_t 461 idn_converter_convtoucs4(idn_converter_t ctx, const char *from, 462 unsigned long *to, size_t tolen) { 463 idn_result_t r; 464 465 assert(ctx != NULL && from != NULL && to != NULL); 466 467 TRACE(("idn_converter_convtoucs4(ctx=%s, from=\"%s\", tolen=%d)\n", 468 ctx->local_encoding_name, idn__debug_xstring(from, 50), 469 (int)tolen)); 470 471 if (!ctx->opened_convtoucs4) { 472 r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data)); 473 if (r != idn_success) 474 goto ret; 475 ctx->opened_convtoucs4 = 1; 476 } 477 478 r = (*ctx->ops->convtoucs4)(ctx, ctx->private_data, from, to, tolen); 479 ret: 480 if (r == idn_success) { 481 TRACE(("idn_converter_convtoucs4(): success (to=\"%s\")\n", 482 idn__debug_ucs4xstring(to, 50))); 483 } else { 484 TRACE(("idn_converter_convtoucs4(): %s\n", 485 idn_result_tostring(r))); 486 } 487 return (r); 488 } 489 490 /* 491 * Encoding registration. 492 */ 493 494 idn_result_t 495 idn_converter_register(const char *name, 496 idn_converter_openproc_t openfromucs4, 497 idn_converter_openproc_t opentoucs4, 498 idn_converter_convfromucs4proc_t convfromucs4, 499 idn_converter_convtoucs4proc_t convtoucs4, 500 idn_converter_closeproc_t close, 501 int encoding_type) { 502 converter_ops_t *ops; 503 idn_result_t r; 504 505 assert(name != NULL && convfromucs4 != NULL && convtoucs4 != NULL); 506 507 TRACE(("idn_converter_register(name=%s)\n", name)); 508 509 if ((ops = malloc(sizeof(*ops))) == NULL) { 510 r = idn_nomemory; 511 goto ret; 512 } 513 514 if (openfromucs4 == NULL) 515 openfromucs4 = converter_none_open; 516 if (opentoucs4 == NULL) 517 opentoucs4 = converter_none_open; 518 if (close == NULL) 519 close = converter_none_close; 520 521 ops->openfromucs4 = openfromucs4; 522 ops->opentoucs4 = opentoucs4; 523 ops->convfromucs4 = convfromucs4; 524 ops->convtoucs4 = convtoucs4; 525 ops->close = close; 526 ops->encoding_type = encoding_type; 527 528 r = idn__strhash_put(encoding_name_hash, name, ops); 529 if (r != idn_success) { 530 free(ops); 531 goto ret; 532 } 533 534 r = idn_success; 535 ret: 536 TRACE(("idn_converter_register(): %s\n", idn_result_tostring(r))); 537 return (r); 538 } 539 540 static idn_result_t 541 register_standard_encoding(void) { 542 idn_result_t r; 543 544 r = idn_converter_register(IDN_PUNYCODE_ENCODING_NAME, 545 NULL, 546 NULL, 547 idn__punycode_encode, 548 idn__punycode_decode, 549 converter_none_close, 550 IDN_ACE_STRICTCASE); 551 if (r != idn_success) 552 return (r); 553 554 #ifdef IDN_EXTRA_ACE 555 r = idn_converter_register(IDN_AMCACEZ_ENCODING_NAME, 556 NULL, 557 NULL, 558 idn__punycode_encode, 559 idn__punycode_decode, 560 converter_none_close, 561 IDN_ACE_STRICTCASE); 562 if (r != idn_success) 563 return (r); 564 565 r = idn_converter_register(IDN_RACE_ENCODING_NAME, 566 NULL, 567 NULL, 568 idn__race_encode, 569 idn__race_decode, 570 converter_none_close, 571 IDN_ACE_LOOSECASE); 572 if (r != idn_success) 573 return (r); 574 #endif /* IDN_EXTRA_ACE */ 575 576 #ifdef DEBUG 577 /* This is convenient for debug. Not useful for other purposes. */ 578 r = idn_converter_register("U-escape", 579 NULL, 580 NULL, 581 converter_uescape_convfromucs4, 582 converter_uescape_convtoucs4, 583 NULL, 584 IDN_NONACE); 585 if (r != idn_success) 586 return (r); 587 #endif /* DEBUG */ 588 589 return (r); 590 } 591 592 /* 593 * Encoding alias support. 594 */ 595 idn_result_t 596 idn_converter_addalias(const char *alias_name, const char *real_name, 597 int first_item) { 598 idn_result_t r; 599 600 assert(alias_name != NULL && real_name != NULL); 601 602 TRACE(("idn_converter_addalias(alias_name=%s,real_name=%s)\n", 603 alias_name, real_name)); 604 605 if (strlen(alias_name) == 0 || strlen(real_name) == 0) { 606 return idn_invalid_syntax; 607 } 608 609 if (strcmp(alias_name, real_name) == 0) { 610 r = idn_success; 611 goto ret; 612 } 613 614 if (encoding_alias_list == NULL) { 615 WARNING(("idn_converter_addalias(): the module is not " 616 "initialized\n")); 617 r = idn_failure; 618 goto ret; 619 } 620 621 r = idn__aliaslist_additem(encoding_alias_list, alias_name, real_name, 622 first_item); 623 ret: 624 TRACE(("idn_converter_addalias(): %s\n", idn_result_tostring(r))); 625 return (r); 626 } 627 628 idn_result_t 629 idn_converter_aliasfile(const char *path) { 630 idn_result_t r; 631 632 assert(path != NULL); 633 634 TRACE(("idn_converter_aliasfile(path=%s)\n", path)); 635 636 if (encoding_alias_list == NULL) { 637 WARNING(("idn_converter_aliasfile(): the module is not " 638 "initialized\n")); 639 return (idn_failure); 640 } 641 642 r = idn__aliaslist_aliasfile(encoding_alias_list, path); 643 644 TRACE(("idn_converter_aliasfile(): %s\n", idn_result_tostring(r))); 645 return (r); 646 } 647 648 idn_result_t 649 idn_converter_resetalias(void) { 650 idn__aliaslist_t list; 651 idn_result_t r; 652 653 TRACE(("idn_converter_resetalias()\n")); 654 655 if (encoding_alias_list == NULL) { 656 WARNING(("idn_converter_resetalias(): the module is not " 657 "initialized\n")); 658 return (idn_failure); 659 } 660 661 list = encoding_alias_list; 662 encoding_alias_list = NULL; 663 idn__aliaslist_destroy(list); 664 list = NULL; 665 r = idn__aliaslist_create(&list); 666 encoding_alias_list = list; 667 668 TRACE(("idn_converter_resetalias(): %s\n", idn_result_tostring(r))); 669 return (r); 670 } 671 672 const char * 673 idn_converter_getrealname(const char *name) { 674 char *realname; 675 idn_result_t r; 676 677 TRACE(("idn_converter_getrealname()\n")); 678 679 assert(name != NULL); 680 681 if (encoding_alias_list == NULL) { 682 WARNING(("idn_converter_getrealname(): the module is not " 683 "initialized\n")); 684 return (name); 685 } 686 687 r = idn__aliaslist_find(encoding_alias_list, name, &realname); 688 if (r != idn_success) { 689 return (name); 690 } 691 return (realname); 692 } 693 694 /* 695 * Round trip check. 696 */ 697 698 static idn_result_t 699 roundtrip_check(idn_converter_t ctx, const unsigned long *from, const char *to) 700 { 701 /* 702 * One problem with iconv() convertion is that 703 * iconv() doesn't signal an error if the input 704 * string contains characters which are valid but 705 * do not have mapping to the output codeset. 706 * (the behavior of iconv() for that case is defined as 707 * `implementation dependent') 708 * One way to check this case is to perform round-trip 709 * conversion and see if it is same as the original string. 710 */ 711 idn_result_t r; 712 unsigned long *back; 713 unsigned long backbuf[256]; 714 size_t fromlen; 715 size_t backlen; 716 717 TRACE(("idn_converter_convert: round-trip checking (from=\"%s\")\n", 718 idn__debug_ucs4xstring(from, 50))); 719 720 /* Allocate enough buffer. */ 721 fromlen = idn_ucs4_strlen(from) + 1; 722 if (fromlen * sizeof(*back) <= sizeof(backbuf)) { 723 backlen = sizeof(backbuf); 724 back = backbuf; 725 } else { 726 backlen = fromlen; 727 back = (unsigned long *)malloc(backlen * sizeof(*back)); 728 if (back == NULL) 729 return (idn_nomemory); 730 } 731 732 /* 733 * Perform backward conversion. 734 */ 735 r = idn_converter_convtoucs4(ctx, to, back, backlen); 736 switch (r) { 737 case idn_success: 738 if (memcmp(back, from, sizeof(*from) * fromlen) != 0) 739 r = idn_nomapping; 740 break; 741 case idn_invalid_encoding: 742 case idn_buffer_overflow: 743 r = idn_nomapping; 744 break; 745 default: 746 break; 747 } 748 749 if (back != backbuf) 750 free(back); 751 752 if (r != idn_success) { 753 TRACE(("round-trip check failed: %s\n", 754 idn_result_tostring(r))); 755 } 756 757 return (r); 758 } 759 760 /* 761 * Identity conversion (or, no conversion at all). 762 */ 763 764 static idn_result_t 765 converter_none_open(idn_converter_t ctx, void **privdata) { 766 assert(ctx != NULL); 767 768 return (idn_success); 769 } 770 771 static idn_result_t 772 converter_none_close(idn_converter_t ctx, void *privdata) { 773 assert(ctx != NULL); 774 775 return (idn_success); 776 } 777 778 static idn_result_t 779 converter_none_convfromucs4(idn_converter_t ctx, void *privdata, 780 const unsigned long *from, char *to, size_t tolen) { 781 assert(ctx != NULL && from != NULL && to != NULL); 782 783 return idn_ucs4_ucs4toutf8(from, to, tolen); 784 } 785 786 static idn_result_t 787 converter_none_convtoucs4(idn_converter_t ctx, void *privdata, 788 const char *from, unsigned long *to, size_t tolen) { 789 assert(ctx != NULL && from != NULL && to != NULL); 790 791 return idn_ucs4_utf8toucs4(from, to, tolen); 792 } 793 794 #ifndef WITHOUT_ICONV 795 796 /* 797 * Conversion using iconv() interface. 798 */ 799 800 static idn_result_t 801 converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata) { 802 iconv_t *ictxp; 803 idn_result_t r; 804 805 assert(ctx != NULL); 806 807 r = iconv_initialize_privdata(privdata); 808 if (r != idn_success) 809 return (r); 810 811 ictxp = (iconv_t *)*privdata; 812 *ictxp = iconv_open(ctx->local_encoding_name, IDN_UTF8_ENCODING_NAME); 813 if (*ictxp == (iconv_t)(-1)) { 814 free(*privdata); 815 *privdata = NULL; 816 switch (errno) { 817 case ENOMEM: 818 return (idn_nomemory); 819 case EINVAL: 820 return (idn_invalid_name); 821 default: 822 WARNING(("iconv_open failed with errno %d\n", errno)); 823 return (idn_failure); 824 } 825 } 826 827 return (idn_success); 828 } 829 830 static idn_result_t 831 converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata) { 832 iconv_t *ictxp; 833 idn_result_t r; 834 835 assert(ctx != NULL); 836 837 r = iconv_initialize_privdata(privdata); 838 if (r != idn_success) 839 return (r); 840 841 ictxp = (iconv_t *)*privdata + 1; 842 *ictxp = iconv_open(IDN_UTF8_ENCODING_NAME, ctx->local_encoding_name); 843 if (*ictxp == (iconv_t)(-1)) { 844 free(*privdata); 845 *privdata = NULL; 846 switch (errno) { 847 case ENOMEM: 848 return (idn_nomemory); 849 case EINVAL: 850 return (idn_invalid_name); 851 default: 852 WARNING(("iconv_open failed with errno %d\n", errno)); 853 return (idn_failure); 854 } 855 } 856 857 return (idn_success); 858 } 859 860 static idn_result_t 861 iconv_initialize_privdata(void **privdata) { 862 if (*privdata == NULL) { 863 *privdata = malloc(sizeof(iconv_t) * 2); 864 if (*privdata == NULL) 865 return (idn_nomemory); 866 *((iconv_t *)*privdata) = (iconv_t)(-1); 867 *((iconv_t *)*privdata + 1) = (iconv_t)(-1); 868 } 869 870 return (idn_success); 871 } 872 873 static void 874 iconv_finalize_privdata(void *privdata) { 875 iconv_t *ictxp; 876 877 if (privdata != NULL) { 878 ictxp = (iconv_t *)privdata; 879 if (*ictxp != (iconv_t)(-1)) 880 iconv_close(*ictxp); 881 882 ictxp++; 883 if (*ictxp != (iconv_t)(-1)) 884 iconv_close(*ictxp); 885 free(privdata); 886 } 887 } 888 889 static idn_result_t 890 converter_iconv_close(idn_converter_t ctx, void *privdata) { 891 assert(ctx != NULL); 892 893 iconv_finalize_privdata(privdata); 894 895 return (idn_success); 896 } 897 898 static idn_result_t 899 converter_iconv_convfromucs4(idn_converter_t ctx, void *privdata, 900 const unsigned long *from, char *to, 901 size_t tolen) { 902 iconv_t ictx; 903 char *utf8 = NULL; 904 size_t utf8size = 256; /* large enough */ 905 idn_result_t r; 906 size_t sz; 907 size_t inleft; 908 size_t outleft; 909 char *inbuf, *outbuf; 910 911 assert(ctx != NULL && from != NULL && to != NULL); 912 913 if (tolen <= 0) { 914 r = idn_buffer_overflow; /* need space for NUL */ 915 goto ret; 916 } 917 918 /* 919 * UCS4 -> UTF-8 conversion. 920 */ 921 utf8 = (char *)malloc(utf8size); 922 if (utf8 == NULL) { 923 r = idn_nomemory; 924 goto ret; 925 } 926 927 try_again: 928 r = idn_ucs4_ucs4toutf8(from, utf8, utf8size); 929 if (r == idn_buffer_overflow) { 930 char *new_utf8; 931 932 utf8size *= 2; 933 new_utf8 = (char *)realloc(utf8, utf8size); 934 if (new_utf8 == NULL) { 935 r = idn_nomemory; 936 goto ret; 937 } 938 utf8 = new_utf8; 939 goto try_again; 940 } else if (r != idn_success) { 941 goto ret; 942 } 943 944 ictx = ((iconv_t *)privdata)[0]; 945 946 /* 947 * Reset internal state. 948 * 949 * The following code should work according to the SUSv2 spec, 950 * but causes segmentation fault with Solaris 2.6. 951 * So.. a work-around. 952 * 953 * (void)iconv(ictx, (const char **)NULL, (size_t *)NULL, 954 * (char **)NULL, (size_t *)NULL); 955 */ 956 inleft = 0; 957 outbuf = NULL; 958 outleft = 0; 959 (void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft); 960 961 inleft = strlen(utf8); 962 inbuf = utf8; 963 outleft = tolen - 1; /* reserve space for terminating NUL */ 964 sz = iconv(ictx, (const char **)&inbuf, &inleft, &to, &outleft); 965 966 if (sz == (size_t)(-1) || inleft > 0) { 967 switch (errno) { 968 case EILSEQ: 969 case EINVAL: 970 /* 971 * We already checked the validity of the input 972 * string. So we assume a mapping error. 973 */ 974 r = idn_nomapping; 975 goto ret; 976 case E2BIG: 977 r = idn_buffer_overflow; 978 goto ret; 979 default: 980 WARNING(("iconv failed with errno %d\n", errno)); 981 r = idn_failure; 982 goto ret; 983 } 984 } 985 986 /* 987 * For UTF-8 -> local conversion, append a sequence of 988 * state reset. 989 */ 990 inleft = 0; 991 sz = iconv(ictx, (const char **)NULL, &inleft, &to, &outleft); 992 if (sz == (size_t)(-1)) { 993 switch (errno) { 994 case EILSEQ: 995 case EINVAL: 996 r = idn_invalid_encoding; 997 goto ret; 998 case E2BIG: 999 r = idn_buffer_overflow; 1000 goto ret; 1001 default: 1002 WARNING(("iconv failed with errno %d\n", errno)); 1003 r = idn_failure; 1004 goto ret; 1005 } 1006 } 1007 *to = '\0'; 1008 r = idn_success; 1009 1010 ret: 1011 free(utf8); 1012 return (r); 1013 1014 } 1015 1016 static idn_result_t 1017 converter_iconv_convtoucs4(idn_converter_t ctx, void *privdata, 1018 const char *from, unsigned long *to, size_t tolen) { 1019 iconv_t ictx; 1020 char *utf8 = NULL; 1021 size_t utf8size = 256; /* large enough */ 1022 idn_result_t r; 1023 size_t sz; 1024 size_t inleft; 1025 size_t outleft; 1026 const char *from_ptr; 1027 char *outbuf; 1028 1029 assert(ctx != NULL && from != NULL && to != NULL); 1030 1031 if (tolen <= 0) { 1032 r = idn_buffer_overflow; /* need space for NUL */ 1033 goto ret; 1034 } 1035 ictx = ((iconv_t *)privdata)[1]; 1036 utf8 = (char *)malloc(utf8size); 1037 if (utf8 == NULL) { 1038 r = idn_nomemory; 1039 goto ret; 1040 } 1041 1042 try_again: 1043 /* 1044 * Reset internal state. 1045 */ 1046 inleft = 0; 1047 outbuf = NULL; 1048 outleft = 0; 1049 (void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft); 1050 1051 from_ptr = from; 1052 inleft = strlen(from); 1053 outbuf = utf8; 1054 outleft = utf8size - 1; /* reserve space for terminating NUL */ 1055 sz = iconv(ictx, (const char **)&from_ptr, &inleft, &outbuf, &outleft); 1056 1057 if (sz == (size_t)(-1) || inleft > 0) { 1058 char *new_utf8; 1059 1060 switch (errno) { 1061 case EILSEQ: 1062 case EINVAL: 1063 /* 1064 * We assume all the characters in the local 1065 * codeset are included in UCS. This means mapping 1066 * error is not possible, so the input string must 1067 * have some problem. 1068 */ 1069 r = idn_invalid_encoding; 1070 goto ret; 1071 case E2BIG: 1072 utf8size *= 2; 1073 new_utf8 = (char *)realloc(utf8, utf8size); 1074 if (new_utf8 == NULL) { 1075 r = idn_nomemory; 1076 goto ret; 1077 } 1078 utf8 = new_utf8; 1079 goto try_again; 1080 default: 1081 WARNING(("iconv failed with errno %d\n", errno)); 1082 r = idn_failure; 1083 goto ret; 1084 } 1085 } 1086 *outbuf = '\0'; 1087 1088 /* 1089 * UTF-8 -> UCS4 conversion. 1090 */ 1091 r = idn_ucs4_utf8toucs4(utf8, to, tolen); 1092 1093 ret: 1094 free(utf8); 1095 return (r); 1096 } 1097 1098 #endif /* !WITHOUT_ICONV */ 1099 1100 #ifdef DEBUG 1101 /* 1102 * Conversion to/from unicode escape string. 1103 * Arbitrary UCS-4 character can be specified by a special sequence 1104 * \u{XXXXXX} 1105 * where XXXXX denotes any hexadecimal string up to FFFFFFFF. 1106 * This is designed for debugging. 1107 */ 1108 1109 static idn_result_t 1110 converter_uescape_convfromucs4(idn_converter_t ctx, void *privdata, 1111 const unsigned long *from, char *to, 1112 size_t tolen) { 1113 idn_result_t r; 1114 unsigned long v; 1115 1116 while (*from != '\0') { 1117 v = *from++; 1118 1119 if (v <= 0x7f) { 1120 if (tolen < 1) { 1121 r = idn_buffer_overflow; 1122 goto failure; 1123 } 1124 *to++ = v; 1125 tolen--; 1126 } else if (v <= 0xffffffff) { 1127 char tmp[20]; 1128 int len; 1129 1130 (void)sprintf(tmp, "\\u{%lx}", v); 1131 len = strlen(tmp); 1132 if (tolen < len) { 1133 r = idn_buffer_overflow; 1134 goto failure; 1135 } 1136 (void)memcpy(to, tmp, len); 1137 to += len; 1138 tolen -= len; 1139 } else { 1140 r = idn_invalid_encoding; 1141 goto failure; 1142 } 1143 } 1144 1145 if (tolen <= 0) { 1146 r = idn_buffer_overflow; 1147 goto failure; 1148 } 1149 *to = '\0'; 1150 1151 return (idn_success); 1152 1153 failure: 1154 if (r != idn_buffer_overflow) { 1155 WARNING(("idn_uescape_convfromucs4(): %s\n", 1156 idn_result_tostring(r))); 1157 } 1158 return (r); 1159 } 1160 1161 static idn_result_t 1162 converter_uescape_convtoucs4(idn_converter_t ctx, void *privdata, 1163 const char *from, unsigned long *to, size_t tolen) 1164 { 1165 idn_result_t r; 1166 size_t fromlen = strlen(from); 1167 1168 while (*from != '\0') { 1169 if (tolen <= 0) { 1170 r = idn_buffer_overflow; 1171 goto failure; 1172 } 1173 if (strncmp(from, "\\u{", 3) == 0 || 1174 strncmp(from, "\\U{", 3) == 0) { 1175 size_t ullen; 1176 unsigned long v; 1177 char *end; 1178 1179 v = strtoul(from + 3, &end, 16); 1180 ullen = end - (from + 3); 1181 if (*end == '}' && ullen > 1 && ullen < 8) { 1182 *to = v; 1183 from = end + 1; 1184 fromlen -= ullen; 1185 } else { 1186 *to = '\\'; 1187 from++; 1188 fromlen--; 1189 } 1190 } else { 1191 int c = *(unsigned char *)from; 1192 size_t width; 1193 char buf[8]; 1194 1195 if (c < 0x80) 1196 width = 1; 1197 else if (c < 0xc0) 1198 width = 0; 1199 else if (c < 0xe0) 1200 width = 2; 1201 else if (c < 0xf0) 1202 width = 3; 1203 else if (c < 0xf8) 1204 width = 4; 1205 else if (c < 0xfc) 1206 width = 5; 1207 else if (c < 0xfe) 1208 width = 6; 1209 else 1210 width = 0; 1211 if (width == 0 || width > fromlen) { 1212 r = idn_invalid_encoding; 1213 goto failure; 1214 } 1215 1216 memcpy(buf, from, width); 1217 buf[width] = '\0'; 1218 r = idn_ucs4_utf8toucs4(buf, to, tolen); 1219 if (r != idn_success) { 1220 r = idn_invalid_encoding; 1221 goto failure; 1222 } 1223 from += width; 1224 fromlen -= width; 1225 } 1226 to++; 1227 tolen--; 1228 } 1229 1230 if (tolen <= 0) { 1231 r = idn_buffer_overflow; 1232 goto failure; 1233 } 1234 *to = '\0'; 1235 1236 return (idn_success); 1237 1238 failure: 1239 if (r != idn_buffer_overflow) { 1240 WARNING(("idn_uescape_convtoucs4(): %s\n", 1241 idn_result_tostring(r))); 1242 } 1243 return (r); 1244 } 1245 1246 #endif 1247