1 #ifndef lint
2 static char *rcsid = "$Id: converter.c,v 1.1 2003/06/04 00:25:51 marka Exp $";
3 #endif
4
5 /*
6 * Copyright (c) 2000,2002 Japan Network Information Center.
7 * All rights reserved.
8 *
9 * By using this file, you agree to the terms and conditions set forth bellow.
10 *
11 * LICENSE TERMS AND CONDITIONS
12 *
13 * The following License Terms and Conditions apply, unless a different
14 * license is obtained from Japan Network Information Center ("JPNIC"),
15 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
16 * Chiyoda-ku, Tokyo 101-0047, Japan.
17 *
18 * 1. Use, Modification and Redistribution (including distribution of any
19 * modified or derived work) in source and/or binary forms is permitted
20 * under this License Terms and Conditions.
21 *
22 * 2. Redistribution of source code must retain the copyright notices as they
23 * appear in each source code file, this License Terms and Conditions.
24 *
25 * 3. Redistribution in binary form must reproduce the Copyright Notice,
26 * this License Terms and Conditions, in the documentation and/or other
27 * materials provided with the distribution. For the purposes of binary
28 * distribution the "Copyright Notice" refers to the following language:
29 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
30 *
31 * 4. The name of JPNIC may not be used to endorse or promote products
32 * derived from this Software without specific prior written approval of
33 * JPNIC.
34 *
35 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
38 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
42 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
43 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
44 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
45 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
46 */
47
48 #include <config.h>
49
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <ctype.h>
55 #include <errno.h>
56 #ifndef WITHOUT_ICONV
57 #include <iconv.h>
58 #endif
59
60 #include <idn/result.h>
61 #include <idn/assert.h>
62 #include <idn/logmacro.h>
63 #include <idn/converter.h>
64 #include <idn/aliaslist.h>
65 #include <idn/strhash.h>
66 #include <idn/debug.h>
67 #include <idn/ucs4.h>
68 #include <idn/punycode.h>
69 #include <idn/race.h>
70 #include <idn/util.h>
71
72 #ifndef IDN_UTF8_ENCODING_NAME
73 #define IDN_UTF8_ENCODING_NAME "UTF-8" /* by IANA */
74 #endif
75 #ifndef IDN_RACE_ENCODING_NAME
76 #define IDN_RACE_ENCODING_NAME "RACE"
77 #endif
78 #ifndef IDN_AMCACEZ_ENCODING_NAME
79 #define IDN_AMCACEZ_ENCODING_NAME "AMC-ACE-Z"
80 #endif
81 #ifndef IDN_PUNYCODE_ENCODING_NAME
82 #define IDN_PUNYCODE_ENCODING_NAME "Punycode"
83 #endif
84
85 #define MAX_RECURSE 20
86
87 #ifdef WIN32
88
89 #define IDNKEY_IDNKIT "Software\\JPNIC\\IDN"
90 #define IDNVAL_ALIASFILE "AliasFile"
91
92 #else /* WIN32 */
93
94 #ifndef IDN_RESCONF_DIR
95 #define IDN_RESCONF_DIR "/etc"
96 #endif
97 #define IDN_ALIAS_FILE IDN_RESCONF_DIR "/idnalias.conf"
98
99 #endif /* WIN32 */
100
101 typedef struct {
102 idn_converter_openproc_t openfromucs4;
103 idn_converter_openproc_t opentoucs4;
104 idn_converter_convfromucs4proc_t convfromucs4;
105 idn_converter_convtoucs4proc_t convtoucs4;
106 idn_converter_closeproc_t close;
107 int encoding_type;
108 } converter_ops_t;
109
110 struct idn_converter {
111 char *local_encoding_name;
112 converter_ops_t *ops;
113 int flags;
114 int opened_convfromucs4;
115 int opened_convtoucs4;
116 int reference_count;
117 void *private_data;
118 };
119
120 static idn__strhash_t encoding_name_hash;
121 static idn__aliaslist_t encoding_alias_list;
122
123 static idn_result_t register_standard_encoding(void);
124 static idn_result_t roundtrip_check(idn_converter_t ctx,
125 const unsigned long *from,
126 const char *to);
127
128 static idn_result_t
129 converter_none_open(idn_converter_t ctx, void **privdata);
130 static idn_result_t
131 converter_none_close(idn_converter_t ctx, void *privdata);
132 static idn_result_t
133 converter_none_convfromucs4(idn_converter_t ctx,
134 void *privdata,
135 const unsigned long *from,
136 char *to, size_t tolen);
137 static idn_result_t
138 converter_none_convtoucs4(idn_converter_t ctx,
139 void *privdata, const char *from,
140 unsigned long *to, size_t tolen);
141
142 #ifndef WITHOUT_ICONV
143 static idn_result_t
144 converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata);
145 static idn_result_t
146 converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata);
147 static idn_result_t
148 converter_iconv_close(idn_converter_t ctx, void *privdata);
149 static idn_result_t
150 converter_iconv_convfromucs4(idn_converter_t ctx,
151 void *privdata,
152 const unsigned long *from,
153 char *to, size_t tolen);
154 static idn_result_t
155 converter_iconv_convtoucs4(idn_converter_t ctx,
156 void *privdata,
157 const char *from,
158 unsigned long *to, size_t tolen);
159
160 static idn_result_t
161 iconv_initialize_privdata(void **privdata);
162 static void
163 iconv_finalize_privdata(void *privdata);
164
165 static char * get_system_aliasfile(void);
166 static int file_exist(const char *filename);
167
168 #endif /* !WITHOUT_ICONV */
169
170 #ifdef DEBUG
171 static idn_result_t
172 converter_uescape_convfromucs4(idn_converter_t ctx,
173 void *privdata,
174 const unsigned long *from,
175 char *to, size_t tolen);
176 static idn_result_t
177 converter_uescape_convtoucs4(idn_converter_t ctx,
178 void *privdata,
179 const char *from,
180 unsigned long *to,
181 size_t tolen);
182 #endif /* DEBUG */
183
184 static converter_ops_t none_converter_ops = {
185 converter_none_open,
186 converter_none_open,
187 converter_none_convfromucs4,
188 converter_none_convtoucs4,
189 converter_none_close,
190 IDN_NONACE,
191 };
192
193 #ifndef WITHOUT_ICONV
194 static converter_ops_t iconv_converter_ops = {
195 converter_iconv_openfromucs4,
196 converter_iconv_opentoucs4,
197 converter_iconv_convfromucs4,
198 converter_iconv_convtoucs4,
199 converter_iconv_close,
200 IDN_NONACE,
201 };
202 #endif
203
204 /*
205 * Initialize.
206 */
207
208 idn_result_t
idn_converter_initialize(void)209 idn_converter_initialize(void) {
210 idn_result_t r;
211 idn__strhash_t hash;
212 idn__aliaslist_t list;
213 #ifndef WITHOUT_ICONV
214 const char *fname;
215 #endif
216
217 TRACE(("idn_converter_initialize()\n"));
218
219 if (encoding_name_hash == NULL) {
220 if ((r = idn__strhash_create(&hash)) != idn_success)
221 goto ret;
222 encoding_name_hash = hash;
223 r = register_standard_encoding();
224 }
225 if (encoding_alias_list == NULL) {
226 if ((r = idn__aliaslist_create(&list)) != idn_success)
227 goto ret;
228 encoding_alias_list = list;
229 #ifndef WITHOUT_ICONV
230 fname = get_system_aliasfile();
231 if (fname != NULL && file_exist(fname))
232 idn_converter_aliasfile(fname);
233 #endif
234 }
235
236 r = idn_success;
237 ret:
238 TRACE(("idn_converter_initialize(): %s\n", idn_result_tostring(r)));
239 return (r);
240 }
241
242 #ifndef WITHOUT_ICONV
243 static char *
get_system_aliasfile()244 get_system_aliasfile() {
245 #ifdef WIN32
246 static char alias_path[500]; /* a good longer than MAX_PATH */
247
248 if (idn__util_getregistrystring(idn__util_hkey_localmachine,
249 IDNVAL_ALIASFILE,
250 alias_path, sizeof(alias_path))) {
251 return (alias_path);
252 } else {
253 return (NULL);
254 }
255 #else
256 return (IDN_ALIAS_FILE);
257 #endif
258 }
259
260 static int
file_exist(const char * filename)261 file_exist(const char *filename) {
262 FILE *fp;
263
264 if ((fp = fopen(filename, "r")) == NULL)
265 return (0);
266 fclose(fp);
267 return (1);
268 }
269 #endif
270
271 idn_result_t
idn_converter_create(const char * name,idn_converter_t * ctxp,int flags)272 idn_converter_create(const char *name, idn_converter_t *ctxp, int flags) {
273 const char *realname;
274 idn_converter_t ctx;
275 idn_result_t r;
276 void *v;
277
278 assert(name != NULL && ctxp != NULL);
279
280 TRACE(("idn_converter_create(%s)\n", name));
281
282 realname = idn_converter_getrealname(name);
283 #ifdef DEBUG
284 if (strcmp(name, realname) != 0) {
285 TRACE(("idn_converter_create: realname=%s\n", realname));
286 }
287 #endif
288
289 *ctxp = NULL;
290
291 /* Allocate memory for a converter context and the name. */
292 ctx = malloc(sizeof(struct idn_converter) + strlen(realname) + 1);
293 if (ctx == NULL) {
294 r = idn_nomemory;
295 goto ret;
296 }
297
298 ctx->local_encoding_name = (char *)(ctx + 1);
299 (void)strcpy(ctx->local_encoding_name, realname);
300 ctx->flags = flags;
301 ctx->reference_count = 1;
302 ctx->opened_convfromucs4 = 0;
303 ctx->opened_convtoucs4 = 0;
304 ctx->private_data = NULL;
305
306 assert(encoding_name_hash != NULL);
307
308 if (strcmp(realname, IDN_UTF8_ENCODING_NAME) == 0) {
309 /* No conversion needed */
310 ctx->ops = &none_converter_ops;
311 } else if ((r = idn__strhash_get(encoding_name_hash, realname, &v))
312 == idn_success) {
313 /* Special converter found */
314 ctx->ops = (converter_ops_t *)v;
315 } else {
316 /* General case */
317 #ifdef WITHOUT_ICONV
318 free(ctx);
319 *ctxp = NULL;
320 r = idn_invalid_name;
321 goto ret;
322 #else
323 ctx->ops = &iconv_converter_ops;
324 #endif
325 }
326
327 if ((flags & IDN_CONVERTER_DELAYEDOPEN) == 0) {
328 r = (ctx->ops->openfromucs4)(ctx, &(ctx->private_data));
329 if (r != idn_success) {
330 WARNING(("idn_converter_create(): open failed "
331 "(ucs4->local)\n"));
332 free(ctx);
333 *ctxp = NULL;
334 goto ret;
335 }
336 ctx->opened_convfromucs4 = 1;
337
338 r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data));
339 if (r != idn_success) {
340 WARNING(("idn_converter_create(): open failed "
341 "(local->ucs4)\n"));
342 free(ctx);
343 *ctxp = NULL;
344 goto ret;
345 }
346 ctx->opened_convtoucs4 = 1;
347 }
348
349 *ctxp = ctx;
350 r = idn_success;
351 ret:
352 TRACE(("idn_converter_create(): %s\n", idn_result_tostring(r)));
353 return (r);
354 }
355
356 void
idn_converter_destroy(idn_converter_t ctx)357 idn_converter_destroy(idn_converter_t ctx) {
358 assert(ctx != NULL);
359
360 TRACE(("idn_converter_destroy(ctx=%s)\n", ctx->local_encoding_name));
361
362 ctx->reference_count--;
363 if (ctx->reference_count <= 0) {
364 TRACE(("idn_converter_destroy(): the object is destroyed\n"));
365 (void)(*ctx->ops->close)(ctx, ctx->private_data);
366 free(ctx);
367 } else {
368 TRACE(("idn_converter_destroy(): "
369 "update reference count (%d->%d)\n",
370 ctx->reference_count + 1, ctx->reference_count));
371 }
372 }
373
374 void
idn_converter_incrref(idn_converter_t ctx)375 idn_converter_incrref(idn_converter_t ctx) {
376 assert(ctx != NULL);
377
378 TRACE(("idn_converter_incrref(ctx=%s)\n", ctx->local_encoding_name));
379 TRACE(("idn_converter_incrref: update reference count (%d->%d)\n",
380 ctx->reference_count, ctx->reference_count + 1));
381
382 ctx->reference_count++;
383 }
384
385 char *
idn_converter_localencoding(idn_converter_t ctx)386 idn_converter_localencoding(idn_converter_t ctx) {
387 assert(ctx != NULL);
388 TRACE(("idn_converter_localencoding(ctx=%s)\n",
389 ctx->local_encoding_name));
390 return (ctx->local_encoding_name);
391 }
392
393 int
idn_converter_encodingtype(idn_converter_t ctx)394 idn_converter_encodingtype(idn_converter_t ctx) {
395 int encoding_type;
396
397 assert(ctx != NULL);
398 TRACE(("idn_converter_encodingtype(ctx=%s)\n",
399 ctx->local_encoding_name));
400
401 encoding_type = ctx->ops->encoding_type;
402 TRACE(("idn_converter_encodingtype(): %d\n", encoding_type));
403 return (encoding_type);
404 }
405
406 int
idn_converter_isasciicompatible(idn_converter_t ctx)407 idn_converter_isasciicompatible(idn_converter_t ctx) {
408 int iscompat;
409
410 assert(ctx != NULL);
411 TRACE(("idn_converter_isasciicompatible(ctx=%s)\n",
412 ctx->local_encoding_name));
413
414 iscompat = (ctx->ops->encoding_type != IDN_NONACE);
415 TRACE(("idn_converter_isasciicompatible(): %d\n", iscompat));
416 return (iscompat);
417 }
418
419 idn_result_t
idn_converter_convfromucs4(idn_converter_t ctx,const unsigned long * from,char * to,size_t tolen)420 idn_converter_convfromucs4(idn_converter_t ctx, const unsigned long *from,
421 char *to, size_t tolen) {
422 idn_result_t r;
423
424 assert(ctx != NULL && from != NULL && to != NULL);
425
426 TRACE(("idn_converter_convfromucs4(ctx=%s, from=\"%s\", tolen=%d)\n",
427 ctx->local_encoding_name, idn__debug_ucs4xstring(from, 50),
428 (int)tolen));
429
430 if (!ctx->opened_convfromucs4) {
431 r = (*ctx->ops->openfromucs4)(ctx, &(ctx->private_data));
432 if (r != idn_success)
433 goto ret;
434 ctx->opened_convfromucs4 = 1;
435 }
436
437 r = (*ctx->ops->convfromucs4)(ctx, ctx->private_data, from, to, tolen);
438 if (r != idn_success)
439 goto ret;
440 if ((ctx->flags & IDN_CONVERTER_RTCHECK) != 0) {
441 r = roundtrip_check(ctx, from, to);
442 if (r != idn_success)
443 goto ret;
444 }
445
446 r = idn_success;
447 ret:
448 if (r == idn_success) {
449 TRACE(("idn_converter_convfromucs4(): success (to=\"%s\")\n",
450 idn__debug_xstring(to, 50)));
451 } else {
452 TRACE(("idn_converter_convfromucs4(): %s\n",
453 idn_result_tostring(r)));
454 }
455 return (r);
456 }
457
458 idn_result_t
idn_converter_convtoucs4(idn_converter_t ctx,const char * from,unsigned long * to,size_t tolen)459 idn_converter_convtoucs4(idn_converter_t ctx, const char *from,
460 unsigned long *to, size_t tolen) {
461 idn_result_t r;
462
463 assert(ctx != NULL && from != NULL && to != NULL);
464
465 TRACE(("idn_converter_convtoucs4(ctx=%s, from=\"%s\", tolen=%d)\n",
466 ctx->local_encoding_name, idn__debug_xstring(from, 50),
467 (int)tolen));
468
469 if (!ctx->opened_convtoucs4) {
470 r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data));
471 if (r != idn_success)
472 goto ret;
473 ctx->opened_convtoucs4 = 1;
474 }
475
476 r = (*ctx->ops->convtoucs4)(ctx, ctx->private_data, from, to, tolen);
477 ret:
478 if (r == idn_success) {
479 TRACE(("idn_converter_convtoucs4(): success (to=\"%s\")\n",
480 idn__debug_ucs4xstring(to, 50)));
481 } else {
482 TRACE(("idn_converter_convtoucs4(): %s\n",
483 idn_result_tostring(r)));
484 }
485 return (r);
486 }
487
488 /*
489 * Encoding registration.
490 */
491
492 idn_result_t
idn_converter_register(const char * name,idn_converter_openproc_t openfromucs4,idn_converter_openproc_t opentoucs4,idn_converter_convfromucs4proc_t convfromucs4,idn_converter_convtoucs4proc_t convtoucs4,idn_converter_closeproc_t close,int encoding_type)493 idn_converter_register(const char *name,
494 idn_converter_openproc_t openfromucs4,
495 idn_converter_openproc_t opentoucs4,
496 idn_converter_convfromucs4proc_t convfromucs4,
497 idn_converter_convtoucs4proc_t convtoucs4,
498 idn_converter_closeproc_t close,
499 int encoding_type) {
500 converter_ops_t *ops;
501 idn_result_t r;
502
503 assert(name != NULL && convfromucs4 != NULL && convtoucs4 != NULL);
504
505 TRACE(("idn_converter_register(name=%s)\n", name));
506
507 if ((ops = malloc(sizeof(*ops))) == NULL) {
508 r = idn_nomemory;
509 goto ret;
510 }
511
512 if (openfromucs4 == NULL)
513 openfromucs4 = converter_none_open;
514 if (opentoucs4 == NULL)
515 opentoucs4 = converter_none_open;
516 if (close == NULL)
517 close = converter_none_close;
518
519 ops->openfromucs4 = openfromucs4;
520 ops->opentoucs4 = opentoucs4;
521 ops->convfromucs4 = convfromucs4;
522 ops->convtoucs4 = convtoucs4;
523 ops->close = close;
524 ops->encoding_type = encoding_type;
525
526 r = idn__strhash_put(encoding_name_hash, name, ops);
527 if (r != idn_success) {
528 free(ops);
529 goto ret;
530 }
531
532 r = idn_success;
533 ret:
534 TRACE(("idn_converter_register(): %s\n", idn_result_tostring(r)));
535 return (r);
536 }
537
538 static idn_result_t
register_standard_encoding(void)539 register_standard_encoding(void) {
540 idn_result_t r;
541
542 r = idn_converter_register(IDN_PUNYCODE_ENCODING_NAME,
543 NULL,
544 NULL,
545 idn__punycode_encode,
546 idn__punycode_decode,
547 converter_none_close,
548 IDN_ACE_STRICTCASE);
549 if (r != idn_success)
550 return (r);
551
552 #ifdef IDN_EXTRA_ACE
553 r = idn_converter_register(IDN_AMCACEZ_ENCODING_NAME,
554 NULL,
555 NULL,
556 idn__punycode_encode,
557 idn__punycode_decode,
558 converter_none_close,
559 IDN_ACE_STRICTCASE);
560 if (r != idn_success)
561 return (r);
562
563 r = idn_converter_register(IDN_RACE_ENCODING_NAME,
564 NULL,
565 NULL,
566 idn__race_encode,
567 idn__race_decode,
568 converter_none_close,
569 IDN_ACE_LOOSECASE);
570 if (r != idn_success)
571 return (r);
572 #endif /* IDN_EXTRA_ACE */
573
574 #ifdef DEBUG
575 /* This is convenient for debug. Not useful for other purposes. */
576 r = idn_converter_register("U-escape",
577 NULL,
578 NULL,
579 converter_uescape_convfromucs4,
580 converter_uescape_convtoucs4,
581 NULL,
582 IDN_NONACE);
583 if (r != idn_success)
584 return (r);
585 #endif /* DEBUG */
586
587 return (r);
588 }
589
590 /*
591 * Encoding alias support.
592 */
593 idn_result_t
idn_converter_addalias(const char * alias_name,const char * real_name,int first_item)594 idn_converter_addalias(const char *alias_name, const char *real_name,
595 int first_item) {
596 idn_result_t r;
597
598 assert(alias_name != NULL && real_name != NULL);
599
600 TRACE(("idn_converter_addalias(alias_name=%s,real_name=%s)\n",
601 alias_name, real_name));
602
603 if (strlen(alias_name) == 0 || strlen(real_name) == 0) {
604 return idn_invalid_syntax;
605 }
606
607 if (strcmp(alias_name, real_name) == 0) {
608 r = idn_success;
609 goto ret;
610 }
611
612 if (encoding_alias_list == NULL) {
613 WARNING(("idn_converter_addalias(): the module is not "
614 "initialized\n"));
615 r = idn_failure;
616 goto ret;
617 }
618
619 r = idn__aliaslist_additem(encoding_alias_list, alias_name, real_name,
620 first_item);
621 ret:
622 TRACE(("idn_converter_addalias(): %s\n", idn_result_tostring(r)));
623 return (r);
624 }
625
626 idn_result_t
idn_converter_aliasfile(const char * path)627 idn_converter_aliasfile(const char *path) {
628 idn_result_t r;
629
630 assert(path != NULL);
631
632 TRACE(("idn_converter_aliasfile(path=%s)\n", path));
633
634 if (encoding_alias_list == NULL) {
635 WARNING(("idn_converter_aliasfile(): the module is not "
636 "initialized\n"));
637 return (idn_failure);
638 }
639
640 r = idn__aliaslist_aliasfile(encoding_alias_list, path);
641
642 TRACE(("idn_converter_aliasfile(): %s\n", idn_result_tostring(r)));
643 return (r);
644 }
645
646 idn_result_t
idn_converter_resetalias(void)647 idn_converter_resetalias(void) {
648 idn__aliaslist_t list;
649 idn_result_t r;
650
651 TRACE(("idn_converter_resetalias()\n"));
652
653 if (encoding_alias_list == NULL) {
654 WARNING(("idn_converter_resetalias(): the module is not "
655 "initialized\n"));
656 return (idn_failure);
657 }
658
659 list = encoding_alias_list;
660 encoding_alias_list = NULL;
661 idn__aliaslist_destroy(list);
662 list = NULL;
663 r = idn__aliaslist_create(&list);
664 encoding_alias_list = list;
665
666 TRACE(("idn_converter_resetalias(): %s\n", idn_result_tostring(r)));
667 return (r);
668 }
669
670 const char *
idn_converter_getrealname(const char * name)671 idn_converter_getrealname(const char *name) {
672 char *realname;
673 idn_result_t r;
674
675 TRACE(("idn_converter_getrealname()\n"));
676
677 assert(name != NULL);
678
679 if (encoding_alias_list == NULL) {
680 WARNING(("idn_converter_getrealname(): the module is not "
681 "initialized\n"));
682 return (name);
683 }
684
685 r = idn__aliaslist_find(encoding_alias_list, name, &realname);
686 if (r != idn_success) {
687 return (name);
688 }
689 return (realname);
690 }
691
692 /*
693 * Round trip check.
694 */
695
696 static idn_result_t
roundtrip_check(idn_converter_t ctx,const unsigned long * from,const char * to)697 roundtrip_check(idn_converter_t ctx, const unsigned long *from, const char *to)
698 {
699 /*
700 * One problem with iconv() conversion is that
701 * iconv() doesn't signal an error if the input
702 * string contains characters which are valid but
703 * do not have mapping to the output codeset.
704 * (the behavior of iconv() for that case is defined as
705 * `implementation dependent')
706 * One way to check this case is to perform round-trip
707 * conversion and see if it is same as the original string.
708 */
709 idn_result_t r;
710 unsigned long *back;
711 unsigned long backbuf[256];
712 size_t fromlen;
713 size_t backlen;
714
715 TRACE(("idn_converter_convert: round-trip checking (from=\"%s\")\n",
716 idn__debug_ucs4xstring(from, 50)));
717
718 /* Allocate enough buffer. */
719 fromlen = idn_ucs4_strlen(from) + 1;
720 if (fromlen * sizeof(*back) <= sizeof(backbuf)) {
721 backlen = sizeof(backbuf);
722 back = backbuf;
723 } else {
724 backlen = fromlen;
725 back = (unsigned long *)malloc(backlen * sizeof(*back));
726 if (back == NULL)
727 return (idn_nomemory);
728 }
729
730 /*
731 * Perform backward conversion.
732 */
733 r = idn_converter_convtoucs4(ctx, to, back, backlen);
734 switch (r) {
735 case idn_success:
736 if (memcmp(back, from, sizeof(*from) * fromlen) != 0)
737 r = idn_nomapping;
738 break;
739 case idn_invalid_encoding:
740 case idn_buffer_overflow:
741 r = idn_nomapping;
742 break;
743 default:
744 break;
745 }
746
747 if (back != backbuf)
748 free(back);
749
750 if (r != idn_success) {
751 TRACE(("round-trip check failed: %s\n",
752 idn_result_tostring(r)));
753 }
754
755 return (r);
756 }
757
758 /*
759 * Identity conversion (or, no conversion at all).
760 */
761
762 static idn_result_t
converter_none_open(idn_converter_t ctx,void ** privdata)763 converter_none_open(idn_converter_t ctx, void **privdata) {
764 assert(ctx != NULL);
765
766 return (idn_success);
767 }
768
769 static idn_result_t
converter_none_close(idn_converter_t ctx,void * privdata)770 converter_none_close(idn_converter_t ctx, void *privdata) {
771 assert(ctx != NULL);
772
773 return (idn_success);
774 }
775
776 static idn_result_t
converter_none_convfromucs4(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)777 converter_none_convfromucs4(idn_converter_t ctx, void *privdata,
778 const unsigned long *from, char *to, size_t tolen) {
779 assert(ctx != NULL && from != NULL && to != NULL);
780
781 return idn_ucs4_ucs4toutf8(from, to, tolen);
782 }
783
784 static idn_result_t
converter_none_convtoucs4(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)785 converter_none_convtoucs4(idn_converter_t ctx, void *privdata,
786 const char *from, unsigned long *to, size_t tolen) {
787 assert(ctx != NULL && from != NULL && to != NULL);
788
789 return idn_ucs4_utf8toucs4(from, to, tolen);
790 }
791
792 #ifndef WITHOUT_ICONV
793
794 /*
795 * Conversion using iconv() interface.
796 */
797
798 static idn_result_t
converter_iconv_openfromucs4(idn_converter_t ctx,void ** privdata)799 converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata) {
800 iconv_t *ictxp;
801 idn_result_t r;
802
803 assert(ctx != NULL);
804
805 r = iconv_initialize_privdata(privdata);
806 if (r != idn_success)
807 return (r);
808
809 ictxp = (iconv_t *)*privdata;
810 *ictxp = iconv_open(ctx->local_encoding_name, IDN_UTF8_ENCODING_NAME);
811 if (*ictxp == (iconv_t)(-1)) {
812 free(*privdata);
813 *privdata = NULL;
814 switch (errno) {
815 case ENOMEM:
816 return (idn_nomemory);
817 case EINVAL:
818 return (idn_invalid_name);
819 default:
820 WARNING(("iconv_open failed with errno %d\n", errno));
821 return (idn_failure);
822 }
823 }
824
825 return (idn_success);
826 }
827
828 static idn_result_t
converter_iconv_opentoucs4(idn_converter_t ctx,void ** privdata)829 converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata) {
830 iconv_t *ictxp;
831 idn_result_t r;
832
833 assert(ctx != NULL);
834
835 r = iconv_initialize_privdata(privdata);
836 if (r != idn_success)
837 return (r);
838
839 ictxp = (iconv_t *)*privdata + 1;
840 *ictxp = iconv_open(IDN_UTF8_ENCODING_NAME, ctx->local_encoding_name);
841 if (*ictxp == (iconv_t)(-1)) {
842 free(*privdata);
843 *privdata = NULL;
844 switch (errno) {
845 case ENOMEM:
846 return (idn_nomemory);
847 case EINVAL:
848 return (idn_invalid_name);
849 default:
850 WARNING(("iconv_open failed with errno %d\n", errno));
851 return (idn_failure);
852 }
853 }
854
855 return (idn_success);
856 }
857
858 static idn_result_t
iconv_initialize_privdata(void ** privdata)859 iconv_initialize_privdata(void **privdata) {
860 if (*privdata == NULL) {
861 *privdata = malloc(sizeof(iconv_t) * 2);
862 if (*privdata == NULL)
863 return (idn_nomemory);
864 *((iconv_t *)*privdata) = (iconv_t)(-1);
865 *((iconv_t *)*privdata + 1) = (iconv_t)(-1);
866 }
867
868 return (idn_success);
869 }
870
871 static void
iconv_finalize_privdata(void * privdata)872 iconv_finalize_privdata(void *privdata) {
873 iconv_t *ictxp;
874
875 if (privdata != NULL) {
876 ictxp = (iconv_t *)privdata;
877 if (*ictxp != (iconv_t)(-1))
878 iconv_close(*ictxp);
879
880 ictxp++;
881 if (*ictxp != (iconv_t)(-1))
882 iconv_close(*ictxp);
883 free(privdata);
884 }
885 }
886
887 static idn_result_t
converter_iconv_close(idn_converter_t ctx,void * privdata)888 converter_iconv_close(idn_converter_t ctx, void *privdata) {
889 assert(ctx != NULL);
890
891 iconv_finalize_privdata(privdata);
892
893 return (idn_success);
894 }
895
896 static idn_result_t
converter_iconv_convfromucs4(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)897 converter_iconv_convfromucs4(idn_converter_t ctx, void *privdata,
898 const unsigned long *from, char *to,
899 size_t tolen) {
900 iconv_t ictx;
901 char *utf8 = NULL;
902 size_t utf8size = 256; /* large enough */
903 idn_result_t r;
904 size_t sz;
905 size_t inleft;
906 size_t outleft;
907 char *inbuf, *outbuf;
908
909 assert(ctx != NULL && from != NULL && to != NULL);
910
911 if (tolen <= 0) {
912 r = idn_buffer_overflow; /* need space for NUL */
913 goto ret;
914 }
915
916 /*
917 * UCS4 -> UTF-8 conversion.
918 */
919 utf8 = (char *)malloc(utf8size);
920 if (utf8 == NULL) {
921 r = idn_nomemory;
922 goto ret;
923 }
924
925 try_again:
926 r = idn_ucs4_ucs4toutf8(from, utf8, utf8size);
927 if (r == idn_buffer_overflow) {
928 char *new_utf8;
929
930 utf8size *= 2;
931 new_utf8 = (char *)realloc(utf8, utf8size);
932 if (new_utf8 == NULL) {
933 r = idn_nomemory;
934 goto ret;
935 }
936 utf8 = new_utf8;
937 goto try_again;
938 } else if (r != idn_success) {
939 goto ret;
940 }
941
942 ictx = ((iconv_t *)privdata)[0];
943
944 /*
945 * Reset internal state.
946 *
947 * The following code should work according to the SUSv2 spec,
948 * but causes segmentation fault with Solaris 2.6.
949 * So.. a work-around.
950 *
951 * (void)iconv(ictx, (const char **)NULL, (size_t *)NULL,
952 * (char **)NULL, (size_t *)NULL);
953 */
954 inleft = 0;
955 outbuf = NULL;
956 outleft = 0;
957 (void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft);
958
959 inleft = strlen(utf8);
960 inbuf = utf8;
961 outleft = tolen - 1; /* reserve space for terminating NUL */
962 sz = iconv(ictx, (const char **)&inbuf, &inleft, &to, &outleft);
963
964 if (sz == (size_t)(-1) || inleft > 0) {
965 switch (errno) {
966 case EILSEQ:
967 case EINVAL:
968 /*
969 * We already checked the validity of the input
970 * string. So we assume a mapping error.
971 */
972 r = idn_nomapping;
973 goto ret;
974 case E2BIG:
975 r = idn_buffer_overflow;
976 goto ret;
977 default:
978 WARNING(("iconv failed with errno %d\n", errno));
979 r = idn_failure;
980 goto ret;
981 }
982 }
983
984 /*
985 * For UTF-8 -> local conversion, append a sequence of
986 * state reset.
987 */
988 inleft = 0;
989 sz = iconv(ictx, (const char **)NULL, &inleft, &to, &outleft);
990 if (sz == (size_t)(-1)) {
991 switch (errno) {
992 case EILSEQ:
993 case EINVAL:
994 r = idn_invalid_encoding;
995 goto ret;
996 case E2BIG:
997 r = idn_buffer_overflow;
998 goto ret;
999 default:
1000 WARNING(("iconv failed with errno %d\n", errno));
1001 r = idn_failure;
1002 goto ret;
1003 }
1004 }
1005 *to = '\0';
1006 r = idn_success;
1007
1008 ret:
1009 free(utf8);
1010 return (r);
1011
1012 }
1013
1014 static idn_result_t
converter_iconv_convtoucs4(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)1015 converter_iconv_convtoucs4(idn_converter_t ctx, void *privdata,
1016 const char *from, unsigned long *to, size_t tolen) {
1017 iconv_t ictx;
1018 char *utf8 = NULL;
1019 size_t utf8size = 256; /* large enough */
1020 idn_result_t r;
1021 size_t sz;
1022 size_t inleft;
1023 size_t outleft;
1024 const char *from_ptr;
1025 char *outbuf;
1026
1027 assert(ctx != NULL && from != NULL && to != NULL);
1028
1029 if (tolen <= 0) {
1030 r = idn_buffer_overflow; /* need space for NUL */
1031 goto ret;
1032 }
1033 ictx = ((iconv_t *)privdata)[1];
1034 utf8 = (char *)malloc(utf8size);
1035 if (utf8 == NULL) {
1036 r = idn_nomemory;
1037 goto ret;
1038 }
1039
1040 try_again:
1041 /*
1042 * Reset internal state.
1043 */
1044 inleft = 0;
1045 outbuf = NULL;
1046 outleft = 0;
1047 (void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft);
1048
1049 from_ptr = from;
1050 inleft = strlen(from);
1051 outbuf = utf8;
1052 outleft = utf8size - 1; /* reserve space for terminating NUL */
1053 sz = iconv(ictx, (const char **)&from_ptr, &inleft, &outbuf, &outleft);
1054
1055 if (sz == (size_t)(-1) || inleft > 0) {
1056 char *new_utf8;
1057
1058 switch (errno) {
1059 case EILSEQ:
1060 case EINVAL:
1061 /*
1062 * We assume all the characters in the local
1063 * codeset are included in UCS. This means mapping
1064 * error is not possible, so the input string must
1065 * have some problem.
1066 */
1067 r = idn_invalid_encoding;
1068 goto ret;
1069 case E2BIG:
1070 utf8size *= 2;
1071 new_utf8 = (char *)realloc(utf8, utf8size);
1072 if (new_utf8 == NULL) {
1073 r = idn_nomemory;
1074 goto ret;
1075 }
1076 utf8 = new_utf8;
1077 goto try_again;
1078 default:
1079 WARNING(("iconv failed with errno %d\n", errno));
1080 r = idn_failure;
1081 goto ret;
1082 }
1083 }
1084 *outbuf = '\0';
1085
1086 /*
1087 * UTF-8 -> UCS4 conversion.
1088 */
1089 r = idn_ucs4_utf8toucs4(utf8, to, tolen);
1090
1091 ret:
1092 free(utf8);
1093 return (r);
1094 }
1095
1096 #endif /* !WITHOUT_ICONV */
1097
1098 #ifdef DEBUG
1099 /*
1100 * Conversion to/from unicode escape string.
1101 * Arbitrary UCS-4 character can be specified by a special sequence
1102 * \u{XXXXXX}
1103 * where XXXXX denotes any hexadecimal string up to FFFFFFFF.
1104 * This is designed for debugging.
1105 */
1106
1107 static idn_result_t
converter_uescape_convfromucs4(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)1108 converter_uescape_convfromucs4(idn_converter_t ctx, void *privdata,
1109 const unsigned long *from, char *to,
1110 size_t tolen) {
1111 idn_result_t r;
1112 unsigned long v;
1113
1114 while (*from != '\0') {
1115 v = *from++;
1116
1117 if (v <= 0x7f) {
1118 if (tolen < 1) {
1119 r = idn_buffer_overflow;
1120 goto failure;
1121 }
1122 *to++ = v;
1123 tolen--;
1124 } else if (v <= 0xffffffff) {
1125 char tmp[20];
1126 int len;
1127
1128 (void)sprintf(tmp, "\\u{%lx}", v);
1129 len = strlen(tmp);
1130 if (tolen < len) {
1131 r = idn_buffer_overflow;
1132 goto failure;
1133 }
1134 (void)memcpy(to, tmp, len);
1135 to += len;
1136 tolen -= len;
1137 } else {
1138 r = idn_invalid_encoding;
1139 goto failure;
1140 }
1141 }
1142
1143 if (tolen <= 0) {
1144 r = idn_buffer_overflow;
1145 goto failure;
1146 }
1147 *to = '\0';
1148
1149 return (idn_success);
1150
1151 failure:
1152 if (r != idn_buffer_overflow) {
1153 WARNING(("idn_uescape_convfromucs4(): %s\n",
1154 idn_result_tostring(r)));
1155 }
1156 return (r);
1157 }
1158
1159 static idn_result_t
converter_uescape_convtoucs4(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)1160 converter_uescape_convtoucs4(idn_converter_t ctx, void *privdata,
1161 const char *from, unsigned long *to, size_t tolen)
1162 {
1163 idn_result_t r;
1164 size_t fromlen = strlen(from);
1165
1166 while (*from != '\0') {
1167 if (tolen <= 0) {
1168 r = idn_buffer_overflow;
1169 goto failure;
1170 }
1171 if (strncmp(from, "\\u{", 3) == 0 ||
1172 strncmp(from, "\\U{", 3) == 0) {
1173 size_t ullen;
1174 unsigned long v;
1175 char *end;
1176
1177 v = strtoul(from + 3, &end, 16);
1178 ullen = end - (from + 3);
1179 if (*end == '}' && ullen > 1 && ullen < 8) {
1180 *to = v;
1181 from = end + 1;
1182 fromlen -= ullen;
1183 } else {
1184 *to = '\\';
1185 from++;
1186 fromlen--;
1187 }
1188 } else {
1189 int c = *(unsigned char *)from;
1190 size_t width;
1191 char buf[8];
1192
1193 if (c < 0x80)
1194 width = 1;
1195 else if (c < 0xc0)
1196 width = 0;
1197 else if (c < 0xe0)
1198 width = 2;
1199 else if (c < 0xf0)
1200 width = 3;
1201 else if (c < 0xf8)
1202 width = 4;
1203 else if (c < 0xfc)
1204 width = 5;
1205 else if (c < 0xfe)
1206 width = 6;
1207 else
1208 width = 0;
1209 if (width == 0 || width > fromlen) {
1210 r = idn_invalid_encoding;
1211 goto failure;
1212 }
1213
1214 memcpy(buf, from, width);
1215 buf[width] = '\0';
1216 r = idn_ucs4_utf8toucs4(buf, to, tolen);
1217 if (r != idn_success) {
1218 r = idn_invalid_encoding;
1219 goto failure;
1220 }
1221 from += width;
1222 fromlen -= width;
1223 }
1224 to++;
1225 tolen--;
1226 }
1227
1228 if (tolen <= 0) {
1229 r = idn_buffer_overflow;
1230 goto failure;
1231 }
1232 *to = '\0';
1233
1234 return (idn_success);
1235
1236 failure:
1237 if (r != idn_buffer_overflow) {
1238 WARNING(("idn_uescape_convtoucs4(): %s\n",
1239 idn_result_tostring(r)));
1240 }
1241 return (r);
1242 }
1243
1244 #endif
1245