1 #ifndef lint
2 static char *rcsid = "$Id: converter.c,v 1.79 2003/01/10 13:36:34 ishisone Exp $";
3 #endif
4 
5 /*
6  * Copyright (c) 2000,2002 Japan Network Information Center.
7  * All rights reserved.
8  *
9  * By using this file, you agree to the terms and conditions set forth bellow.
10  *
11  * 			LICENSE TERMS AND CONDITIONS
12  *
13  * The following License Terms and Conditions apply, unless a different
14  * license is obtained from Japan Network Information Center ("JPNIC"),
15  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
16  * Chiyoda-ku, Tokyo 101-0047, Japan.
17  *
18  * 1. Use, Modification and Redistribution (including distribution of any
19  *    modified or derived work) in source and/or binary forms is permitted
20  *    under this License Terms and Conditions.
21  *
22  * 2. Redistribution of source code must retain the copyright notices as they
23  *    appear in each source code file, this License Terms and Conditions.
24  *
25  * 3. Redistribution in binary form must reproduce the Copyright Notice,
26  *    this License Terms and Conditions, in the documentation and/or other
27  *    materials provided with the distribution.  For the purposes of binary
28  *    distribution the "Copyright Notice" refers to the following language:
29  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
30  *
31  * 4. The name of JPNIC may not be used to endorse or promote products
32  *    derived from this Software without specific prior written approval of
33  *    JPNIC.
34  *
35  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
36  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
38  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
39  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
42  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
43  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
44  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
45  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
46  */
47 
48 #include <config.h>
49 
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <ctype.h>
55 #include <errno.h>
56 #ifndef WITHOUT_ICONV
57 #include <iconv.h>
58 #endif
59 
60 #include <idn/result.h>
61 #include <idn/assert.h>
62 #include <idn/logmacro.h>
63 #include <idn/converter.h>
64 #include <idn/aliaslist.h>
65 #include <idn/strhash.h>
66 #include <idn/debug.h>
67 #include <idn/ucs4.h>
68 #include <idn/punycode.h>
69 #include <idn/race.h>
70 #include <idn/util.h>
71 
72 #ifndef IDN_UTF8_ENCODING_NAME
73 #define IDN_UTF8_ENCODING_NAME "UTF-8"		/* by IANA */
74 #endif
75 #ifndef IDN_RACE_ENCODING_NAME
76 #define IDN_RACE_ENCODING_NAME "RACE"
77 #endif
78 #ifndef IDN_AMCACEZ_ENCODING_NAME
79 #define IDN_AMCACEZ_ENCODING_NAME "AMC-ACE-Z"
80 #endif
81 #ifndef IDN_PUNYCODE_ENCODING_NAME
82 #define IDN_PUNYCODE_ENCODING_NAME "Punycode"
83 #endif
84 
85 #define MAX_RECURSE	20
86 
87 #ifdef WIN32
88 
89 #define IDNKEY_IDNKIT		"Software\\JPNIC\\IDN"
90 #define IDNVAL_ALIASFILE	"AliasFile"
91 
92 #else /* WIN32 */
93 
94 #ifndef IDN_RESCONF_DIR
95 #define IDN_RESCONF_DIR		"/etc"
96 #endif
97 #define IDN_ALIAS_FILE		IDN_RESCONF_DIR "/idnalias.conf"
98 
99 #endif /* WIN32 */
100 
101 typedef struct {
102 	idn_converter_openproc_t openfromucs4;
103 	idn_converter_openproc_t opentoucs4;
104 	idn_converter_convfromucs4proc_t convfromucs4;
105 	idn_converter_convtoucs4proc_t convtoucs4;
106 	idn_converter_closeproc_t close;
107 	int encoding_type;
108 } converter_ops_t;
109 
110 struct idn_converter {
111 	char *local_encoding_name;
112 	converter_ops_t *ops;
113 	int flags;
114 	int opened_convfromucs4;
115 	int opened_convtoucs4;
116 	int reference_count;
117 	void *private_data;
118 };
119 
120 static idn__strhash_t encoding_name_hash;
121 static idn__aliaslist_t encoding_alias_list;
122 
123 static idn_result_t	register_standard_encoding(void);
124 static idn_result_t	roundtrip_check(idn_converter_t ctx,
125 					const unsigned long *from,
126 					const char *to);
127 
128 static idn_result_t
129        converter_none_open(idn_converter_t ctx, void **privdata);
130 static idn_result_t
131        converter_none_close(idn_converter_t ctx, void *privdata);
132 static idn_result_t
133        converter_none_convfromucs4(idn_converter_t ctx,
134 				   void *privdata,
135 				   const unsigned long *from,
136 				   char *to, size_t tolen);
137 static idn_result_t
138        converter_none_convtoucs4(idn_converter_t ctx,
139 				 void *privdata, const char *from,
140 				 unsigned long *to, size_t tolen);
141 
142 #ifndef WITHOUT_ICONV
143 static idn_result_t
144        converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata);
145 static idn_result_t
146        converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata);
147 static idn_result_t
148        converter_iconv_close(idn_converter_t ctx, void *privdata);
149 static idn_result_t
150        converter_iconv_convfromucs4(idn_converter_t ctx,
151 				    void *privdata,
152 				    const unsigned long *from,
153 				    char *to, size_t tolen);
154 static idn_result_t
155        converter_iconv_convtoucs4(idn_converter_t ctx,
156 				  void *privdata,
157 				  const char *from,
158 				  unsigned long *to, size_t tolen);
159 
160 static idn_result_t
161 iconv_initialize_privdata(void **privdata);
162 static void
163 iconv_finalize_privdata(void *privdata);
164 
165 static char *		get_system_aliasfile(void);
166 static int		file_exist(const char *filename);
167 
168 #endif /* !WITHOUT_ICONV */
169 
170 #ifdef DEBUG
171 static idn_result_t
172        converter_uescape_convfromucs4(idn_converter_t ctx,
173 				      void *privdata,
174 				      const unsigned long *from,
175 				      char *to, size_t tolen);
176 static idn_result_t
177        converter_uescape_convtoucs4(idn_converter_t ctx,
178 				    void *privdata,
179 				    const char *from,
180 				    unsigned long *to,
181 				    size_t tolen);
182 #endif /* DEBUG */
183 
184 static converter_ops_t none_converter_ops = {
185 	converter_none_open,
186 	converter_none_open,
187 	converter_none_convfromucs4,
188 	converter_none_convtoucs4,
189 	converter_none_close,
190 	IDN_NONACE,
191 };
192 
193 #ifndef WITHOUT_ICONV
194 static converter_ops_t iconv_converter_ops = {
195 	converter_iconv_openfromucs4,
196 	converter_iconv_opentoucs4,
197 	converter_iconv_convfromucs4,
198 	converter_iconv_convtoucs4,
199 	converter_iconv_close,
200 	IDN_NONACE,
201 };
202 #endif
203 
204 /*
205  * Initialize.
206  */
207 
208 idn_result_t
idn_converter_initialize(void)209 idn_converter_initialize(void) {
210 	idn_result_t r;
211 	idn__strhash_t hash;
212 	idn__aliaslist_t list;
213 #ifndef WITHOUT_ICONV
214 	const char *fname;
215 #endif
216 
217 	TRACE(("idn_converter_initialize()\n"));
218 
219 	if (encoding_name_hash == NULL) {
220 		if ((r = idn__strhash_create(&hash)) != idn_success)
221 			goto ret;
222 		encoding_name_hash = hash;
223 		r = register_standard_encoding();
224 	}
225 	if (encoding_alias_list == NULL) {
226 		if ((r = idn__aliaslist_create(&list)) != idn_success)
227 			goto ret;
228 		encoding_alias_list = list;
229 #ifndef WITHOUT_ICONV
230 		fname = get_system_aliasfile();
231 		if (fname != NULL && file_exist(fname))
232 			idn_converter_aliasfile(fname);
233 #endif
234 	}
235 
236 	r = idn_success;
237 ret:
238 	TRACE(("idn_converter_initialize(): %s\n", idn_result_tostring(r)));
239 	return (r);
240 }
241 
242 #ifndef WITHOUT_ICONV
243 static char *
get_system_aliasfile()244 get_system_aliasfile() {
245 #ifdef WIN32
246 	static char alias_path[500];	/* a good longer than MAX_PATH */
247 
248 	if (idn__util_getregistrystring(idn__util_hkey_localmachine,
249 					IDNVAL_ALIASFILE,
250 					alias_path, sizeof(alias_path))) {
251 		return (alias_path);
252 	} else {
253 		return (NULL);
254 	}
255 #else
256 	return (IDN_ALIAS_FILE);
257 #endif
258 }
259 
260 static int
file_exist(const char * filename)261 file_exist(const char *filename) {
262 	FILE  *fp;
263 
264 	if ((fp = fopen(filename, "r")) == NULL)
265 		return (0);
266 	fclose(fp);
267 	return (1);
268 }
269 #endif
270 
271 idn_result_t
idn_converter_create(const char * name,idn_converter_t * ctxp,int flags)272 idn_converter_create(const char *name, idn_converter_t *ctxp, int flags) {
273 	const char *realname;
274 	idn_converter_t ctx;
275 	idn_result_t r;
276 	void *v;
277 
278 	assert(name != NULL && ctxp != NULL);
279 
280 	TRACE(("idn_converter_create(%s)\n", name));
281 
282 	realname = idn_converter_getrealname(name);
283 #ifdef DEBUG
284 	if (strcmp(name, realname) != 0) {
285 		TRACE(("idn_converter_create: realname=%s\n", realname));
286 	}
287 #endif
288 
289 	*ctxp = NULL;
290 
291 	/* Allocate memory for a converter context and the name. */
292 	ctx = malloc(sizeof(struct idn_converter) + strlen(realname) + 1);
293 	if (ctx == NULL) {
294 		r = idn_nomemory;
295 		goto ret;
296 	}
297 
298 	ctx->local_encoding_name = (char *)(ctx + 1);
299 	(void)strcpy(ctx->local_encoding_name, realname);
300 	ctx->flags = flags;
301 	ctx->reference_count = 1;
302 	ctx->opened_convfromucs4 = 0;
303 	ctx->opened_convtoucs4 = 0;
304 	ctx->private_data = NULL;
305 
306 	assert(encoding_name_hash != NULL);
307 
308 	if (strcmp(realname, IDN_UTF8_ENCODING_NAME) == 0) {
309 		/* No conversion needed */
310 		ctx->ops = &none_converter_ops;
311 	} else if ((r = idn__strhash_get(encoding_name_hash, realname, &v))
312 		   == idn_success) {
313 		/* Special converter found */
314 		ctx->ops = (converter_ops_t *)v;
315 	} else {
316 		/* General case */
317 #ifdef WITHOUT_ICONV
318 		free(ctx);
319 		*ctxp = NULL;
320 		r = idn_invalid_name;
321 		goto ret;
322 #else
323 		ctx->ops = &iconv_converter_ops;
324 #endif
325 	}
326 
327 	if ((flags & IDN_CONVERTER_DELAYEDOPEN) == 0) {
328 		r = (ctx->ops->openfromucs4)(ctx, &(ctx->private_data));
329 		if (r != idn_success) {
330 			WARNING(("idn_converter_create(): open failed "
331 			     "(ucs4->local)\n"));
332 			free(ctx);
333 			*ctxp = NULL;
334 			goto ret;
335 		}
336 		ctx->opened_convfromucs4 = 1;
337 
338 		r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data));
339 		if (r != idn_success) {
340 			WARNING(("idn_converter_create(): open failed "
341 			     "(local->ucs4)\n"));
342 			free(ctx);
343 			*ctxp = NULL;
344 			goto ret;
345 		}
346 		ctx->opened_convtoucs4 = 1;
347 	}
348 
349 	*ctxp = ctx;
350 	r = idn_success;
351 ret:
352 	TRACE(("idn_converter_create(): %s\n", idn_result_tostring(r)));
353 	return (r);
354 }
355 
356 void
idn_converter_destroy(idn_converter_t ctx)357 idn_converter_destroy(idn_converter_t ctx) {
358 	assert(ctx != NULL);
359 
360 	TRACE(("idn_converter_destroy(ctx=%s)\n", ctx->local_encoding_name));
361 
362 	ctx->reference_count--;
363 	if (ctx->reference_count <= 0) {
364 		TRACE(("idn_converter_destroy(): the object is destroyed\n"));
365 		(void)(*ctx->ops->close)(ctx, ctx->private_data);
366 		free(ctx);
367 	} else {
368 		TRACE(("idn_converter_destroy(): "
369 		       "update reference count (%d->%d)\n",
370 		       ctx->reference_count + 1, ctx->reference_count));
371 	}
372 }
373 
374 void
idn_converter_incrref(idn_converter_t ctx)375 idn_converter_incrref(idn_converter_t ctx) {
376 	assert(ctx != NULL);
377 
378 	TRACE(("idn_converter_incrref(ctx=%s)\n", ctx->local_encoding_name));
379 	TRACE(("idn_converter_incrref: update reference count (%d->%d)\n",
380 	    ctx->reference_count, ctx->reference_count + 1));
381 
382 	ctx->reference_count++;
383 }
384 
385 char *
idn_converter_localencoding(idn_converter_t ctx)386 idn_converter_localencoding(idn_converter_t ctx) {
387 	assert(ctx != NULL);
388 	TRACE(("idn_converter_localencoding(ctx=%s)\n",
389 	       ctx->local_encoding_name));
390 	return (ctx->local_encoding_name);
391 }
392 
393 int
idn_converter_encodingtype(idn_converter_t ctx)394 idn_converter_encodingtype(idn_converter_t ctx) {
395 	int encoding_type;
396 
397 	assert(ctx != NULL);
398 	TRACE(("idn_converter_encodingtype(ctx=%s)\n",
399 	       ctx->local_encoding_name));
400 
401 	encoding_type = ctx->ops->encoding_type;
402 	TRACE(("idn_converter_encodingtype(): %d\n", encoding_type));
403 	return (encoding_type);
404 }
405 
406 int
idn_converter_isasciicompatible(idn_converter_t ctx)407 idn_converter_isasciicompatible(idn_converter_t ctx) {
408 	int iscompat;
409 
410 	assert(ctx != NULL);
411 	TRACE(("idn_converter_isasciicompatible(ctx=%s)\n",
412 	       ctx->local_encoding_name));
413 
414 	iscompat = (ctx->ops->encoding_type != IDN_NONACE);
415 	TRACE(("idn_converter_isasciicompatible(): %d\n", iscompat));
416 	return (iscompat);
417 }
418 
419 idn_result_t
idn_converter_convfromucs4(idn_converter_t ctx,const unsigned long * from,char * to,size_t tolen)420 idn_converter_convfromucs4(idn_converter_t ctx, const unsigned long *from,
421 			   char *to, size_t tolen) {
422 	idn_result_t r;
423 
424 	assert(ctx != NULL && from != NULL && to != NULL);
425 
426 	TRACE(("idn_converter_convfromucs4(ctx=%s, from=\"%s\", tolen=%d)\n",
427 	       ctx->local_encoding_name, idn__debug_ucs4xstring(from, 50),
428 	       (int)tolen));
429 
430 	if (!ctx->opened_convfromucs4) {
431 		r = (*ctx->ops->openfromucs4)(ctx, &(ctx->private_data));
432 		if (r != idn_success)
433 			goto ret;
434 		ctx->opened_convfromucs4 = 1;
435 	}
436 
437 	r = (*ctx->ops->convfromucs4)(ctx, ctx->private_data, from, to, tolen);
438 	if (r != idn_success)
439 		goto ret;
440 	if ((ctx->flags & IDN_CONVERTER_RTCHECK) != 0) {
441 		r = roundtrip_check(ctx, from, to);
442 		if (r != idn_success)
443 			goto ret;
444 	}
445 
446 	r = idn_success;
447 ret:
448 	if (r == idn_success) {
449 		TRACE(("idn_converter_convfromucs4(): success (to=\"%s\")\n",
450 		       idn__debug_xstring(to, 50)));
451 	} else {
452 		TRACE(("idn_converter_convfromucs4(): %s\n",
453 		       idn_result_tostring(r)));
454 	}
455 	return (r);
456 }
457 
458 idn_result_t
idn_converter_convtoucs4(idn_converter_t ctx,const char * from,unsigned long * to,size_t tolen)459 idn_converter_convtoucs4(idn_converter_t ctx, const char *from,
460 			 unsigned long *to, size_t tolen) {
461 	idn_result_t r;
462 
463 	assert(ctx != NULL && from != NULL && to != NULL);
464 
465 	TRACE(("idn_converter_convtoucs4(ctx=%s, from=\"%s\", tolen=%d)\n",
466 	       ctx->local_encoding_name, idn__debug_xstring(from, 50),
467 	       (int)tolen));
468 
469 	if (!ctx->opened_convtoucs4) {
470 		r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data));
471 		if (r != idn_success)
472 			goto ret;
473 		ctx->opened_convtoucs4 = 1;
474 	}
475 
476 	r = (*ctx->ops->convtoucs4)(ctx, ctx->private_data, from, to, tolen);
477 ret:
478 	if (r == idn_success) {
479 		TRACE(("idn_converter_convtoucs4(): success (to=\"%s\")\n",
480 		       idn__debug_ucs4xstring(to, 50)));
481 	} else {
482 		TRACE(("idn_converter_convtoucs4(): %s\n",
483 		       idn_result_tostring(r)));
484 	}
485 	return (r);
486 }
487 
488 /*
489  * Encoding registration.
490  */
491 
492 idn_result_t
idn_converter_register(const char * name,idn_converter_openproc_t openfromucs4,idn_converter_openproc_t opentoucs4,idn_converter_convfromucs4proc_t convfromucs4,idn_converter_convtoucs4proc_t convtoucs4,idn_converter_closeproc_t close,int encoding_type)493 idn_converter_register(const char *name,
494 		       idn_converter_openproc_t openfromucs4,
495 		       idn_converter_openproc_t opentoucs4,
496 		       idn_converter_convfromucs4proc_t convfromucs4,
497 		       idn_converter_convtoucs4proc_t convtoucs4,
498 		       idn_converter_closeproc_t close,
499 		       int encoding_type) {
500 	converter_ops_t *ops;
501 	idn_result_t r;
502 
503 	assert(name != NULL && convfromucs4 != NULL && convtoucs4 != NULL);
504 
505 	TRACE(("idn_converter_register(name=%s)\n", name));
506 
507 	if ((ops = malloc(sizeof(*ops))) == NULL) {
508 		r = idn_nomemory;
509 		goto ret;
510 	}
511 
512 	if (openfromucs4 == NULL)
513 		openfromucs4 = converter_none_open;
514 	if (opentoucs4 == NULL)
515 		opentoucs4 = converter_none_open;
516 	if (close == NULL)
517 		close = converter_none_close;
518 
519 	ops->openfromucs4 = openfromucs4;
520 	ops->opentoucs4 = opentoucs4;
521 	ops->convfromucs4 = convfromucs4;
522 	ops->convtoucs4 = convtoucs4;
523 	ops->close = close;
524 	ops->encoding_type = encoding_type;
525 
526 	r = idn__strhash_put(encoding_name_hash, name, ops);
527 	if (r != idn_success) {
528 		free(ops);
529 		goto ret;
530 	}
531 
532 	r = idn_success;
533 ret:
534 	TRACE(("idn_converter_register(): %s\n", idn_result_tostring(r)));
535 	return (r);
536 }
537 
538 static idn_result_t
register_standard_encoding(void)539 register_standard_encoding(void) {
540 	idn_result_t r;
541 
542 	r = idn_converter_register(IDN_PUNYCODE_ENCODING_NAME,
543 				   NULL,
544 				   NULL,
545 				   idn__punycode_encode,
546 				   idn__punycode_decode,
547 				   converter_none_close,
548 				   IDN_ACE_STRICTCASE);
549 	if (r != idn_success)
550 		return (r);
551 
552 #ifdef IDN_EXTRA_ACE
553 	r = idn_converter_register(IDN_AMCACEZ_ENCODING_NAME,
554 				   NULL,
555 				   NULL,
556 				   idn__punycode_encode,
557 				   idn__punycode_decode,
558 				   converter_none_close,
559 				   IDN_ACE_STRICTCASE);
560 	if (r != idn_success)
561 		return (r);
562 
563 	r = idn_converter_register(IDN_RACE_ENCODING_NAME,
564 				   NULL,
565 				   NULL,
566 				   idn__race_encode,
567 				   idn__race_decode,
568 				   converter_none_close,
569 				   IDN_ACE_LOOSECASE);
570 	if (r != idn_success)
571 		return (r);
572 #endif /* IDN_EXTRA_ACE */
573 
574 #ifdef DEBUG
575 	/* This is convenient for debug.  Not useful for other purposes. */
576 	r = idn_converter_register("U-escape",
577 				   NULL,
578 				   NULL,
579 				   converter_uescape_convfromucs4,
580 				   converter_uescape_convtoucs4,
581 				   NULL,
582 				   IDN_NONACE);
583 	if (r != idn_success)
584 		return (r);
585 #endif /* DEBUG */
586 
587 	return (r);
588 }
589 
590 /*
591  * Encoding alias support.
592  */
593 idn_result_t
idn_converter_addalias(const char * alias_name,const char * real_name,int first_item)594 idn_converter_addalias(const char *alias_name, const char *real_name,
595 		       int first_item) {
596 	idn_result_t r;
597 
598 	assert(alias_name != NULL && real_name != NULL);
599 
600 	TRACE(("idn_converter_addalias(alias_name=%s,real_name=%s)\n",
601 	       alias_name, real_name));
602 
603 	if (strlen(alias_name) == 0 || strlen(real_name) == 0) {
604 		return idn_invalid_syntax;
605 	}
606 
607 	if (strcmp(alias_name, real_name) == 0) {
608 		r = idn_success;
609 		goto ret;
610 	}
611 
612 	if (encoding_alias_list == NULL) {
613 		WARNING(("idn_converter_addalias(): the module is not "
614 			 "initialized\n"));
615 		r = idn_failure;
616 		goto ret;
617 	}
618 
619 	r = idn__aliaslist_additem(encoding_alias_list, alias_name, real_name,
620 				   first_item);
621 ret:
622 	TRACE(("idn_converter_addalias(): %s\n", idn_result_tostring(r)));
623 	return (r);
624 }
625 
626 idn_result_t
idn_converter_aliasfile(const char * path)627 idn_converter_aliasfile(const char *path) {
628 	idn_result_t r;
629 
630 	assert(path != NULL);
631 
632 	TRACE(("idn_converter_aliasfile(path=%s)\n", path));
633 
634 	if (encoding_alias_list == NULL) {
635 		WARNING(("idn_converter_aliasfile(): the module is not "
636 			 "initialized\n"));
637 		return (idn_failure);
638 	}
639 
640 	r = idn__aliaslist_aliasfile(encoding_alias_list, path);
641 
642 	TRACE(("idn_converter_aliasfile(): %s\n", idn_result_tostring(r)));
643 	return (r);
644 }
645 
646 idn_result_t
idn_converter_resetalias(void)647 idn_converter_resetalias(void) {
648 	idn__aliaslist_t list;
649 	idn_result_t r;
650 
651  	TRACE(("idn_converter_resetalias()\n"));
652 
653 	if (encoding_alias_list == NULL) {
654 		WARNING(("idn_converter_resetalias(): the module is not "
655 			 "initialized\n"));
656 		return (idn_failure);
657 	}
658 
659 	list = encoding_alias_list;
660 	encoding_alias_list = NULL;
661 	idn__aliaslist_destroy(list);
662 	list = NULL;
663 	r = idn__aliaslist_create(&list);
664 	encoding_alias_list = list;
665 
666 	TRACE(("idn_converter_resetalias(): %s\n", idn_result_tostring(r)));
667 	return (r);
668 }
669 
670 const char *
idn_converter_getrealname(const char * name)671 idn_converter_getrealname(const char *name) {
672 	char *realname;
673 	idn_result_t r;
674 
675  	TRACE(("idn_converter_getrealname()\n"));
676 
677 	assert(name != NULL);
678 
679 	if (encoding_alias_list == NULL) {
680 		WARNING(("idn_converter_getrealname(): the module is not "
681 			 "initialized\n"));
682 		return (name);
683 	}
684 
685 	r = idn__aliaslist_find(encoding_alias_list, name, &realname);
686 	if (r != idn_success) {
687 		return (name);
688 	}
689 	return (realname);
690 }
691 
692 /*
693  * Round trip check.
694  */
695 
696 static idn_result_t
roundtrip_check(idn_converter_t ctx,const unsigned long * from,const char * to)697 roundtrip_check(idn_converter_t ctx, const unsigned long *from, const char *to)
698 {
699 	/*
700 	 * One problem with iconv() convertion is that
701 	 * iconv() doesn't signal an error if the input
702 	 * string contains characters which are valid but
703 	 * do not have mapping to the output codeset.
704 	 * (the behavior of iconv() for that case is defined as
705 	 * `implementation dependent')
706 	 * One way to check this case is to perform round-trip
707 	 * conversion and see if it is same as the original string.
708 	 */
709 	idn_result_t r;
710 	unsigned long *back;
711 	unsigned long backbuf[256];
712 	size_t fromlen;
713 	size_t backlen;
714 
715 	TRACE(("idn_converter_convert: round-trip checking (from=\"%s\")\n",
716 	       idn__debug_ucs4xstring(from, 50)));
717 
718 	/* Allocate enough buffer. */
719 	fromlen = idn_ucs4_strlen(from) + 1;
720 	if (fromlen * sizeof(*back) <= sizeof(backbuf)) {
721 		backlen = sizeof(backbuf);
722 		back = backbuf;
723 	} else {
724 		backlen = fromlen;
725 		back = (unsigned long *)malloc(backlen * sizeof(*back));
726 		if (back == NULL)
727 			return (idn_nomemory);
728 	}
729 
730 	/*
731 	 * Perform backward conversion.
732 	 */
733 	r = idn_converter_convtoucs4(ctx, to, back, backlen);
734 	switch (r) {
735 	case idn_success:
736 		if (memcmp(back, from, sizeof(*from) * fromlen) != 0)
737 			r = idn_nomapping;
738 		break;
739 	case idn_invalid_encoding:
740 	case idn_buffer_overflow:
741 		r = idn_nomapping;
742 		break;
743 	default:
744 		break;
745 	}
746 
747 	if (back != backbuf)
748 		free(back);
749 
750 	if (r != idn_success) {
751 		TRACE(("round-trip check failed: %s\n",
752 		       idn_result_tostring(r)));
753 	}
754 
755 	return (r);
756 }
757 
758 /*
759  * Identity conversion (or, no conversion at all).
760  */
761 
762 static idn_result_t
converter_none_open(idn_converter_t ctx,void ** privdata)763 converter_none_open(idn_converter_t ctx, void **privdata) {
764 	assert(ctx != NULL);
765 
766 	return (idn_success);
767 }
768 
769 static idn_result_t
converter_none_close(idn_converter_t ctx,void * privdata)770 converter_none_close(idn_converter_t ctx, void *privdata) {
771 	assert(ctx != NULL);
772 
773 	return (idn_success);
774 }
775 
776 static idn_result_t
converter_none_convfromucs4(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)777 converter_none_convfromucs4(idn_converter_t ctx, void *privdata,
778 		       const unsigned long *from, char *to, size_t tolen) {
779 	assert(ctx != NULL && from != NULL && to != NULL);
780 
781 	return idn_ucs4_ucs4toutf8(from, to, tolen);
782 }
783 
784 static idn_result_t
converter_none_convtoucs4(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)785 converter_none_convtoucs4(idn_converter_t ctx, void *privdata,
786 		     const char *from, unsigned long *to, size_t tolen) {
787 	assert(ctx != NULL && from != NULL && to != NULL);
788 
789 	return idn_ucs4_utf8toucs4(from, to, tolen);
790 }
791 
792 #ifndef WITHOUT_ICONV
793 
794 /*
795  * Conversion using iconv() interface.
796  */
797 
798 static idn_result_t
converter_iconv_openfromucs4(idn_converter_t ctx,void ** privdata)799 converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata) {
800 	iconv_t *ictxp;
801 	idn_result_t r;
802 
803 	assert(ctx != NULL);
804 
805 	r = iconv_initialize_privdata(privdata);
806 	if (r != idn_success)
807 		return (r);
808 
809 	ictxp = (iconv_t *)*privdata;
810 	*ictxp = iconv_open(ctx->local_encoding_name, IDN_UTF8_ENCODING_NAME);
811 	if (*ictxp == (iconv_t)(-1)) {
812 		free(*privdata);
813 		*privdata = NULL;
814 		switch (errno) {
815 		case ENOMEM:
816 			return (idn_nomemory);
817 		case EINVAL:
818 			return (idn_invalid_name);
819 		default:
820 			WARNING(("iconv_open failed with errno %d\n", errno));
821 			return (idn_failure);
822 		}
823 	}
824 
825 	return (idn_success);
826 }
827 
828 static idn_result_t
converter_iconv_opentoucs4(idn_converter_t ctx,void ** privdata)829 converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata) {
830 	iconv_t *ictxp;
831 	idn_result_t r;
832 
833 	assert(ctx != NULL);
834 
835 	r = iconv_initialize_privdata(privdata);
836 	if (r != idn_success)
837 		return (r);
838 
839 	ictxp = (iconv_t *)*privdata + 1;
840 	*ictxp = iconv_open(IDN_UTF8_ENCODING_NAME, ctx->local_encoding_name);
841 	if (*ictxp == (iconv_t)(-1)) {
842 		free(*privdata);
843 		*privdata = NULL;
844 		switch (errno) {
845 		case ENOMEM:
846 			return (idn_nomemory);
847 		case EINVAL:
848 			return (idn_invalid_name);
849 		default:
850 			WARNING(("iconv_open failed with errno %d\n", errno));
851 			return (idn_failure);
852 		}
853 	}
854 
855 	return (idn_success);
856 }
857 
858 static idn_result_t
iconv_initialize_privdata(void ** privdata)859 iconv_initialize_privdata(void **privdata) {
860 	if (*privdata == NULL) {
861 		*privdata = malloc(sizeof(iconv_t) * 2);
862 		if (*privdata == NULL)
863 			return (idn_nomemory);
864 		*((iconv_t *)*privdata) = (iconv_t)(-1);
865 		*((iconv_t *)*privdata + 1) = (iconv_t)(-1);
866 	}
867 
868 	return (idn_success);
869 }
870 
871 static void
iconv_finalize_privdata(void * privdata)872 iconv_finalize_privdata(void *privdata) {
873 	iconv_t *ictxp;
874 
875 	if (privdata != NULL) {
876 		ictxp = (iconv_t *)privdata;
877 		if (*ictxp != (iconv_t)(-1))
878 			iconv_close(*ictxp);
879 
880 		ictxp++;
881 		if (*ictxp != (iconv_t)(-1))
882 			iconv_close(*ictxp);
883 		free(privdata);
884 	}
885 }
886 
887 static idn_result_t
converter_iconv_close(idn_converter_t ctx,void * privdata)888 converter_iconv_close(idn_converter_t ctx, void *privdata) {
889 	assert(ctx != NULL);
890 
891 	iconv_finalize_privdata(privdata);
892 
893 	return (idn_success);
894 }
895 
896 static idn_result_t
converter_iconv_convfromucs4(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)897 converter_iconv_convfromucs4(idn_converter_t ctx, void *privdata,
898 			     const unsigned long *from, char *to,
899 			     size_t tolen) {
900 	iconv_t ictx;
901 	char *utf8 = NULL;
902 	size_t utf8size = 256;  /* large enough */
903 	idn_result_t r;
904 	size_t sz;
905 	size_t inleft;
906 	size_t outleft;
907 	char *inbuf, *outbuf;
908 
909 	assert(ctx != NULL && from != NULL && to != NULL);
910 
911 	if (tolen <= 0) {
912 		r = idn_buffer_overflow;	/* need space for NUL */
913 		goto ret;
914 	}
915 
916 	/*
917 	 * UCS4 -> UTF-8 conversion.
918 	 */
919 	utf8 = (char *)malloc(utf8size);
920 	if (utf8 == NULL) {
921 		r = idn_nomemory;
922 		goto ret;
923 	}
924 
925 try_again:
926 	r = idn_ucs4_ucs4toutf8(from, utf8, utf8size);
927 	if (r == idn_buffer_overflow) {
928 		char *new_utf8;
929 
930 		utf8size *= 2;
931 		new_utf8 = (char *)realloc(utf8, utf8size);
932 		if (new_utf8 == NULL) {
933 			r = idn_nomemory;
934 			goto ret;
935 		}
936 		utf8 = new_utf8;
937 		goto try_again;
938 	} else if (r != idn_success) {
939 		goto ret;
940 	}
941 
942 	ictx = ((iconv_t *)privdata)[0];
943 
944 	/*
945 	 * Reset internal state.
946 	 *
947 	 * The following code should work according to the SUSv2 spec,
948 	 * but causes segmentation fault with Solaris 2.6.
949 	 * So.. a work-around.
950 	 *
951 	 * (void)iconv(ictx, (const char **)NULL, (size_t *)NULL,
952 	 * 	    (char **)NULL, (size_t *)NULL);
953 	 */
954 	inleft = 0;
955 	outbuf = NULL;
956 	outleft = 0;
957 	(void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft);
958 
959 	inleft = strlen(utf8);
960 	inbuf = utf8;
961 	outleft = tolen - 1;	/* reserve space for terminating NUL */
962 	sz = iconv(ictx, (const char **)&inbuf, &inleft, &to, &outleft);
963 
964 	if (sz == (size_t)(-1) || inleft > 0) {
965 		switch (errno) {
966 		case EILSEQ:
967 		case EINVAL:
968 			/*
969 			 * We already checked the validity of the input
970 			 * string.  So we assume a mapping error.
971 			 */
972 			r = idn_nomapping;
973 			goto ret;
974 		case E2BIG:
975 			r = idn_buffer_overflow;
976 			goto ret;
977 		default:
978 			WARNING(("iconv failed with errno %d\n", errno));
979 			r = idn_failure;
980 			goto ret;
981 		}
982 	}
983 
984 	/*
985 	 * For UTF-8 -> local conversion, append a sequence of
986 	 * state reset.
987 	 */
988 	inleft = 0;
989 	sz = iconv(ictx, (const char **)NULL, &inleft, &to, &outleft);
990 	if (sz == (size_t)(-1)) {
991 		switch (errno) {
992 		case EILSEQ:
993 		case EINVAL:
994 			r = idn_invalid_encoding;
995 			goto ret;
996 		case E2BIG:
997 			r = idn_buffer_overflow;
998 			goto ret;
999 		default:
1000 			WARNING(("iconv failed with errno %d\n", errno));
1001 			r = idn_failure;
1002 			goto ret;
1003 		}
1004 	}
1005 	*to = '\0';
1006 	r = idn_success;
1007 
1008 ret:
1009 	free(utf8);
1010 	return (r);
1011 
1012 }
1013 
1014 static idn_result_t
converter_iconv_convtoucs4(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)1015 converter_iconv_convtoucs4(idn_converter_t ctx, void *privdata,
1016 			   const char *from, unsigned long *to, size_t tolen) {
1017 	iconv_t ictx;
1018 	char *utf8 = NULL;
1019 	size_t utf8size = 256;  /* large enough */
1020 	idn_result_t r;
1021 	size_t sz;
1022 	size_t inleft;
1023 	size_t outleft;
1024 	const char *from_ptr;
1025 	char *outbuf;
1026 
1027 	assert(ctx != NULL && from != NULL && to != NULL);
1028 
1029 	if (tolen <= 0) {
1030 		r = idn_buffer_overflow;	/* need space for NUL */
1031 		goto ret;
1032 	}
1033 	ictx = ((iconv_t *)privdata)[1];
1034 	utf8 = (char *)malloc(utf8size);
1035 	if (utf8 == NULL) {
1036 		r = idn_nomemory;
1037 		goto ret;
1038 	}
1039 
1040 try_again:
1041 	/*
1042 	 * Reset internal state.
1043 	 */
1044 	inleft = 0;
1045 	outbuf = NULL;
1046 	outleft = 0;
1047 	(void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft);
1048 
1049 	from_ptr = from;
1050 	inleft = strlen(from);
1051 	outbuf = utf8;
1052 	outleft = utf8size - 1;    /* reserve space for terminating NUL */
1053 	sz = iconv(ictx, (const char **)&from_ptr, &inleft, &outbuf, &outleft);
1054 
1055 	if (sz == (size_t)(-1) || inleft > 0) {
1056 		char *new_utf8;
1057 
1058 		switch (errno) {
1059 		case EILSEQ:
1060 		case EINVAL:
1061 			/*
1062 			 * We assume all the characters in the local
1063 			 * codeset are included in UCS.  This means mapping
1064 			 * error is not possible, so the input string must
1065 			 * have some problem.
1066 			 */
1067 			r = idn_invalid_encoding;
1068 			goto ret;
1069 		case E2BIG:
1070 			utf8size *= 2;
1071 			new_utf8 = (char *)realloc(utf8, utf8size);
1072 			if (new_utf8 == NULL) {
1073 				r = idn_nomemory;
1074 				goto ret;
1075 			}
1076 			utf8 = new_utf8;
1077 			goto try_again;
1078 		default:
1079 			WARNING(("iconv failed with errno %d\n", errno));
1080 			r = idn_failure;
1081 			goto ret;
1082 		}
1083 	}
1084 	*outbuf = '\0';
1085 
1086 	/*
1087 	 * UTF-8 -> UCS4 conversion.
1088 	 */
1089 	r = idn_ucs4_utf8toucs4(utf8, to, tolen);
1090 
1091 ret:
1092 	free(utf8);
1093 	return (r);
1094 }
1095 
1096 #endif /* !WITHOUT_ICONV */
1097 
1098 #ifdef DEBUG
1099 /*
1100  * Conversion to/from unicode escape string.
1101  * Arbitrary UCS-4 character can be specified by a special sequence
1102  *	\u{XXXXXX}
1103  * where XXXXX denotes any hexadecimal string up to FFFFFFFF.
1104  * This is designed for debugging.
1105  */
1106 
1107 static idn_result_t
converter_uescape_convfromucs4(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)1108 converter_uescape_convfromucs4(idn_converter_t ctx, void *privdata,
1109 			  const unsigned long *from, char *to,
1110 			  size_t tolen) {
1111 	idn_result_t r;
1112 	unsigned long v;
1113 
1114 	while (*from != '\0') {
1115 		v = *from++;
1116 
1117 		if (v <= 0x7f) {
1118 			if (tolen < 1) {
1119 				r = idn_buffer_overflow;
1120 				goto failure;
1121 			}
1122 			*to++ = v;
1123 			tolen--;
1124 		} else if (v <= 0xffffffff) {
1125 			char tmp[20];
1126 			int len;
1127 
1128 			(void)sprintf(tmp, "\\u{%lx}", v);
1129 			len = strlen(tmp);
1130 			if (tolen < len) {
1131 				r = idn_buffer_overflow;
1132 				goto failure;
1133 			}
1134 			(void)memcpy(to, tmp, len);
1135 			to += len;
1136 			tolen -= len;
1137 		} else {
1138 			r = idn_invalid_encoding;
1139 			goto failure;
1140 		}
1141 	}
1142 
1143 	if (tolen <= 0) {
1144 		r = idn_buffer_overflow;
1145 		goto failure;
1146 	}
1147 	*to = '\0';
1148 
1149 	return (idn_success);
1150 
1151 failure:
1152 	if (r != idn_buffer_overflow) {
1153 		WARNING(("idn_uescape_convfromucs4(): %s\n",
1154 			 idn_result_tostring(r)));
1155 	}
1156 	return (r);
1157 }
1158 
1159 static idn_result_t
converter_uescape_convtoucs4(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)1160 converter_uescape_convtoucs4(idn_converter_t ctx, void *privdata,
1161 			const char *from, unsigned long *to, size_t tolen)
1162 {
1163 	idn_result_t r;
1164 	size_t fromlen = strlen(from);
1165 
1166 	while (*from != '\0') {
1167 		if (tolen <= 0) {
1168 			r = idn_buffer_overflow;
1169 			goto failure;
1170 		}
1171 		if (strncmp(from, "\\u{", 3) == 0 ||
1172 		    strncmp(from, "\\U{", 3) == 0) {
1173 			size_t ullen;
1174 			unsigned long v;
1175 			char *end;
1176 
1177 			v = strtoul(from + 3, &end, 16);
1178 			ullen = end - (from + 3);
1179 			if (*end == '}' && ullen > 1 && ullen < 8) {
1180 				*to = v;
1181 				from = end + 1;
1182 				fromlen -= ullen;
1183 			} else {
1184 				*to = '\\';
1185 				from++;
1186 				fromlen--;
1187 			}
1188 		} else {
1189 			int c = *(unsigned char *)from;
1190 			size_t width;
1191 			char buf[8];
1192 
1193 			if (c < 0x80)
1194 				width = 1;
1195 			else if (c < 0xc0)
1196 				width = 0;
1197 			else if (c < 0xe0)
1198 				width = 2;
1199 			else if (c < 0xf0)
1200 				width = 3;
1201 			else if (c < 0xf8)
1202 				width = 4;
1203 			else if (c < 0xfc)
1204 				width = 5;
1205 			else if (c < 0xfe)
1206 				width = 6;
1207 			else
1208 				width = 0;
1209 			if (width == 0 || width > fromlen) {
1210 				r = idn_invalid_encoding;
1211 				goto failure;
1212 			}
1213 
1214 			memcpy(buf, from, width);
1215 			buf[width] = '\0';
1216 			r = idn_ucs4_utf8toucs4(buf, to, tolen);
1217 			if (r != idn_success) {
1218 				r = idn_invalid_encoding;
1219 				goto failure;
1220 			}
1221 			from += width;
1222 			fromlen -= width;
1223 		}
1224 		to++;
1225 		tolen--;
1226 	}
1227 
1228 	if (tolen <= 0) {
1229 		r = idn_buffer_overflow;
1230 		goto failure;
1231 	}
1232 	*to = '\0';
1233 
1234 	return (idn_success);
1235 
1236 failure:
1237 	if (r != idn_buffer_overflow) {
1238 		WARNING(("idn_uescape_convtoucs4(): %s\n",
1239 			 idn_result_tostring(r)));
1240 	}
1241 	return (r);
1242 }
1243 
1244 #endif
1245