1 /*++
2 /* NAME
3 /*	midna_domain 3
4 /* SUMMARY
5 /*	ASCII/UTF-8 domain name conversion
6 /* SYNOPSIS
7 /*	#include <midna_domain.h>
8 /*
9 /*	int midna_domain_cache_size;
10 /*	int midna_domain_transitional;
11 /*
12 /*	const char *midna_domain_to_ascii(
13 /*	const char *name)
14 /*
15 /*	const char *midna_domain_to_utf8(
16 /*	const char *name)
17 /*
18 /*	const char *midna_domain_suffix_to_ascii(
19 /*	const char *name)
20 /*
21 /*	const char *midna_domain_suffix_to_utf8(
22 /*	const char *name)
23 /* AUXILIARY FUNCTIONS
24 /*	void midna_domain_pre_chroot(void)
25 /* DESCRIPTION
26 /*	The functions in this module transform domain names from/to
27 /*	ASCII and UTF-8 form. The result is cached to avoid repeated
28 /*	conversion.
29 /*
30 /*	This module builds on the ICU library implementation of the
31 /*	UTS #46 specification, using default ICU library options
32 /*	because those are likely best tested: with transitional
33 /*	processing, with case mapping, with normalization, with
34 /*	limited IDNA2003 compatibility, without STD3 ASCII rules.
35 /*
36 /*	midna_domain_to_ascii() converts an UTF-8 or ASCII domain
37 /*	name to ASCII.  The result is a null pointer in case of
38 /*	error.  This function verifies that the result passes
39 /*	valid_hostname().
40 /*
41 /*	midna_domain_to_utf8() converts an UTF-8 or ASCII domain
42 /*	name to UTF-8.  The result is a null pointer in case of
43 /*	error.  This function verifies that the result, after
44 /*	conversion to ASCII, passes valid_hostname().
45 /*
46 /*	midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8()
47 /*	take a name that starts with '.' and otherwise perform the
48 /*	same operations as midna_domain_to_ascii() and
49 /*	midna_domain_to_utf8().
50 /*
51 /*	midna_domain_cache_size specifies the size of the conversion
52 /*	result cache.  This value is used only once, upon the first
53 /*	lookup request.
54 /*
55 /*	midna_domain_transitional enables transitional conversion
56 /*	between UTF8 and ASCII labels.
57 /*
58 /*	midna_domain_pre_chroot() does some pre-chroot initialization.
59 /* SEE ALSO
60 /*	http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing
61 /*	msg(3) diagnostics interface
62 /* DIAGNOSTICS
63 /*	Fatal errors: memory allocation problem.
64 /*	Warnings: conversion error or result validation error.
65 /* LICENSE
66 /* .ad
67 /* .fi
68 /*	The Secure Mailer license must be distributed with this software.
69 /* AUTHOR(S)
70 /*	Arnt Gulbrandsen
71 /*
72 /*	Wietse Venema
73 /*	IBM T.J. Watson Research
74 /*	P.O. Box 704
75 /*	Yorktown Heights, NY 10598, USA
76 /*
77 /*	Wietse Venema
78 /*	Google, Inc.
79 /*	111 8th Avenue
80 /*	New York, NY 10011, USA
81 /*--*/
82 
83  /*
84   * System library.
85   */
86 #include <sys_defs.h>
87 #include <string.h>
88 #include <ctype.h>
89 
90 #ifndef NO_EAI
91 #include <unicode/uidna.h>
92 
93  /*
94   * Utility library.
95   */
96 #include <mymalloc.h>
97 #include <msg.h>
98 #include <ctable.h>
99 #include <stringops.h>
100 #include <valid_hostname.h>
101 #include <name_mask.h>
102 #include <midna_domain.h>
103 
104  /*
105   * Application-specific.
106   */
107 #define DEF_MIDNA_CACHE_SIZE	256
108 
109 int     midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE;
110 int     midna_domain_transitional = 0;
111 static VSTRING *midna_domain_buf;	/* x.suffix */
112 
113 #define STR(x)	vstring_str(x)
114 
115 /* midna_domain_strerror - pick one for error reporting */
116 
midna_domain_strerror(UErrorCode error,int info_errors)117 static const char *midna_domain_strerror(UErrorCode error, int info_errors)
118 {
119 
120     /*
121      * XXX The UIDNA_ERROR_EMPTY_LABEL etc. names are defined in an ENUM, so
122      * we can't use #ifdef to dynamically determine which names exist.
123      */
124     static LONG_NAME_MASK uidna_errors[] = {
125 	"UIDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL,
126 	"UIDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG,
127 	"UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG,
128 	"UIDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN,
129 	"UIDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN,
130 	"UIDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4,
131 	"UIDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK,
132 	"UIDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED,
133 	"UIDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE,
134 	"UIDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT,
135 	"UIDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL,
136 	"UIDNA_ERROR_BIDI", UIDNA_ERROR_BIDI,
137 	"UIDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ,
138 	/* The above errors are defined with ICU 46 and later. */
139 	0,
140     };
141 
142     if (info_errors) {
143 	return (str_long_name_mask_opt((VSTRING *) 0, "idna error",
144 				       uidna_errors, info_errors,
145 				       NAME_MASK_NUMBER | NAME_MASK_COMMA));
146     } else {
147 	return u_errorName(error);
148     }
149 }
150 
151 /* midna_domain_pre_chroot - pre-chroot initialization */
152 
midna_domain_pre_chroot(void)153 void    midna_domain_pre_chroot(void)
154 {
155     UErrorCode error = U_ZERO_ERROR;
156     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
157     UIDNA  *idna;
158 
159     idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
160 			   : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
161     if (U_FAILURE(error))
162 	msg_warn("ICU library initialization failed: %s",
163 		 midna_domain_strerror(error, info.errors));
164     uidna_close(idna);
165 }
166 
167 /* midna_domain_to_ascii_create - convert domain to ASCII */
168 
midna_domain_to_ascii_create(const char * name,void * unused_context)169 static void *midna_domain_to_ascii_create(const char *name, void *unused_context)
170 {
171     static const char myname[] = "midna_domain_to_ascii_create";
172     char    buf[1024];			/* XXX */
173     UErrorCode error = U_ZERO_ERROR;
174     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
175     UIDNA  *idna;
176     int     anl;
177 
178     /*
179      * Paranoia: do not expose uidna_*() to unfiltered network data.
180      */
181     if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
182 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
183 		 myname, name, "malformed UTF-8");
184 	return (0);
185     }
186 
187     /*
188      * Perform the requested conversion.
189      */
190     idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
191 			   : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
192     anl = uidna_nameToASCII_UTF8(idna,
193 				 name, strlen(name),
194 				 buf, sizeof(buf) - 1,
195 				 &info,
196 				 &error);
197     uidna_close(idna);
198 
199     /*
200      * Paranoia: verify that the result passes valid_hostname(). A quick
201      * check shows that UTS46 ToASCII by default rejects inputs with labels
202      * that start or end in '-', with names or labels that are over-long, or
203      * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on
204      * valid_hostname() on the output side just to be sure.
205      */
206     if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
207 	buf[anl] = 0;				/* XXX */
208 	if (!valid_hostname(buf, DONT_GRIPE)) {
209 	    msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
210 		     myname, name, "malformed ASCII label(s)");
211 	    return (0);
212 	}
213 	return (mystrndup(buf, anl));
214     } else {
215 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
216 		 myname, name, midna_domain_strerror(error, info.errors));
217 	return (0);
218     }
219 }
220 
221 /* midna_domain_to_utf8_create - convert domain to UTF8 */
222 
midna_domain_to_utf8_create(const char * name,void * unused_context)223 static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
224 {
225     static const char myname[] = "midna_domain_to_utf8_create";
226     char    buf[1024];			/* XXX */
227     UErrorCode error = U_ZERO_ERROR;
228     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
229     UIDNA  *idna;
230     int     anl;
231 
232     /*
233      * Paranoia: do not expose uidna_*() to unfiltered network data.
234      */
235     if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
236 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
237 		 myname, name, "malformed UTF-8");
238 	return (0);
239     }
240 
241     /*
242      * Perform the requested conversion.
243      */
244     idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
245 			   : UIDNA_NONTRANSITIONAL_TO_UNICODE, &error);
246     anl = uidna_nameToUnicodeUTF8(idna,
247 				  name, strlen(name),
248 				  buf, sizeof(buf) - 1,
249 				  &info,
250 				  &error);
251     uidna_close(idna);
252 
253     /*
254      * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long
255      * name or a name that contains an over-long NR-LDH label (and perhaps
256      * other invalid forms that are not covered in UTS 46, section 4.1). We
257      * rely on midna_domain_to_ascii() to validate the output.
258      */
259     if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
260 	buf[anl] = 0;				/* XXX */
261 	if (midna_domain_to_ascii(buf) == 0)
262 	    return (0);
263 	return (mystrndup(buf, anl));
264     } else {
265 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s",
266 		 myname, name, midna_domain_strerror(error, info.errors));
267 	return (0);
268     }
269 }
270 
271 /* midna_domain_cache_free - cache element destructor */
272 
midna_domain_cache_free(void * value,void * unused_context)273 static void midna_domain_cache_free(void *value, void *unused_context)
274 {
275     if (value)
276 	myfree(value);
277 }
278 
279 /* midna_domain_to_ascii - convert name to ASCII */
280 
midna_domain_to_ascii(const char * name)281 const char *midna_domain_to_ascii(const char *name)
282 {
283     static CTABLE *midna_domain_to_ascii_cache = 0;
284 
285     if (midna_domain_to_ascii_cache == 0)
286 	midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size,
287 					       midna_domain_to_ascii_create,
288 						    midna_domain_cache_free,
289 						    (void *) 0);
290     return (ctable_locate(midna_domain_to_ascii_cache, name));
291 }
292 
293 /* midna_domain_to_utf8 - convert name to UTF8 */
294 
midna_domain_to_utf8(const char * name)295 const char *midna_domain_to_utf8(const char *name)
296 {
297     static CTABLE *midna_domain_to_utf8_cache = 0;
298 
299     if (midna_domain_to_utf8_cache == 0)
300 	midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size,
301 						midna_domain_to_utf8_create,
302 						   midna_domain_cache_free,
303 						   (void *) 0);
304     return (ctable_locate(midna_domain_to_utf8_cache, name));
305 }
306 
307 /* midna_domain_suffix_to_ascii - convert .name to ASCII */
308 
midna_domain_suffix_to_ascii(const char * suffix)309 const char *midna_domain_suffix_to_ascii(const char *suffix)
310 {
311     const char *cache_res;
312 
313     /*
314      * If prepending x to .name causes the result to become too long, then
315      * the suffix is bad.
316      */
317     if (midna_domain_buf == 0)
318 	midna_domain_buf = vstring_alloc(100);
319     vstring_sprintf(midna_domain_buf, "x%s", suffix);
320     if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0)
321 	return (0);
322     else
323 	return (cache_res + 1);
324 }
325 
326 /* midna_domain_suffix_to_utf8 - convert .name to UTF8 */
327 
midna_domain_suffix_to_utf8(const char * name)328 const char *midna_domain_suffix_to_utf8(const char *name)
329 {
330     const char *cache_res;
331 
332     /*
333      * If prepending x to .name causes the result to become too long, then
334      * the suffix is bad.
335      */
336     if (midna_domain_buf == 0)
337 	midna_domain_buf = vstring_alloc(100);
338     vstring_sprintf(midna_domain_buf, "x%s", name);
339     if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0)
340 	return (0);
341     else
342 	return (cache_res + 1);
343 }
344 
345 #ifdef TEST
346 
347  /*
348   * Test program - reads names from stdin, reports invalid names to stderr.
349   */
350 #include <unistd.h>
351 #include <stdlib.h>
352 #include <locale.h>
353 
354 #include <stringops.h>			/* XXX util_utf8_enable */
355 #include <vstring.h>
356 #include <vstream.h>
357 #include <vstring_vstream.h>
358 #include <msg_vstream.h>
359 
main(int argc,char ** argv)360 int     main(int argc, char **argv)
361 {
362     VSTRING *buffer = vstring_alloc(1);
363     const char *bp;
364     const char *ascii;
365     const char *utf8;
366 
367     if (setlocale(LC_ALL, "C") == 0)
368 	msg_fatal("setlocale(LC_ALL, C) failed: %m");
369 
370     msg_vstream_init(argv[0], VSTREAM_ERR);
371     /* msg_verbose = 1; */
372     util_utf8_enable = 1;
373 
374     if (geteuid() == 0) {
375 	midna_domain_pre_chroot();
376 	if (chroot(".") != 0)
377 	    msg_fatal("chroot(\".\"): %m");
378     }
379     while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
380 	bp = STR(buffer);
381 	msg_info("> %s", bp);
382 	while (ISSPACE(*bp))
383 	    bp++;
384 	if (*bp == '#' || *bp == 0)
385 	    continue;
386 	msg_info("unconditional conversions:");
387 	utf8 = midna_domain_to_utf8(bp);
388 	msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)");
389 	ascii = midna_domain_to_ascii(bp);
390 	msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)");
391 	msg_info("conditional conversions:");
392 	if (!allascii(bp)) {
393 	    if (ascii != 0) {
394 		utf8 = midna_domain_to_utf8(ascii);
395 		msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"",
396 			 bp, ascii, utf8 ? utf8 : "(error)");
397 		if (utf8 != 0) {
398 		    if (strcmp(utf8, bp) != 0)
399 			msg_warn("\"%s\" != \"%s\"", bp, utf8);
400 		}
401 	    }
402 	} else {
403 	    if (utf8 != 0) {
404 		ascii = midna_domain_to_ascii(utf8);
405 		msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"",
406 			 bp, utf8, ascii ? ascii : "(error)");
407 		if (ascii != 0) {
408 		    if (strcmp(ascii, bp) != 0)
409 			msg_warn("\"%s\" != \"%s\"", bp, ascii);
410 		}
411 	    }
412 	}
413     }
414     exit(0);
415 }
416 
417 #endif					/* TEST */
418 
419 #endif					/* NO_EAI */
420