1 /*
2  * Copyright 2014 Chris Young <chris@unsatisfactorysoftware.co.uk>
3  *
4  * This file is part of NetSurf, http://www.netsurf-browser.org/
5  *
6  * NetSurf is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * NetSurf is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /**
20  * \file
21  * NetSurf international domain name handling implementation.
22  */
23 
24 #include <assert.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sys/types.h>
30 
31 #include "netsurf/inttypes.h"
32 
33 #include "utils/errors.h"
34 #include "utils/idna.h"
35 #include "utils/idna_props.h"
36 #include "utils/log.h"
37 #include "utils/punycode.h"
38 #include "utils/utf8.h"
39 #include "utils/utils.h"
40 
41 
42 /**
43  * Convert punycode status into nserror.
44  *
45  * \param status The punycode status to convert.
46  * \return The corresponding nserror code for the status.
47  */
punycode_status_to_nserror(enum punycode_status status)48 static nserror punycode_status_to_nserror(enum punycode_status status)
49 {
50 	nserror ret = NSERROR_NOMEM;
51 
52 	switch (status) {
53 	case punycode_success:
54 		ret = NSERROR_OK;
55 		break;
56 
57 	case punycode_bad_input:
58 		NSLOG(netsurf, INFO, "Bad input");
59 		ret = NSERROR_BAD_ENCODING;
60 		break;
61 
62 	case punycode_big_output:
63 		NSLOG(netsurf, INFO, "Output too big");
64 		ret = NSERROR_BAD_SIZE;
65 		break;
66 
67 	case punycode_overflow:
68 		NSLOG(netsurf, INFO, "Overflow");
69 		ret = NSERROR_NOSPACE;
70 		break;
71 
72 	default:
73 		break;
74 	}
75 	return ret;
76 }
77 
78 
79 /**
80  * Convert a host label in UCS-4 to an ACE version
81  *
82  * \param ucs4_label UCS-4 NFC string containing host label
83  * \param len Length of host label (in characters/codepoints)
84  * \param ace_label ASCII-compatible encoded version
85  * \param out_len Length of ace_label
86  * \return NSERROR_OK on success, appropriate error otherwise
87  *
88  * If return value != NSERROR_OK, output will be left untouched.
89  */
90 static nserror
idna__ucs4_to_ace(int32_t * ucs4_label,size_t len,char ** ace_label,size_t * out_len)91 idna__ucs4_to_ace(int32_t *ucs4_label,
92 		  size_t len,
93 		  char **ace_label,
94 		  size_t *out_len)
95 {
96 	char punycode[65]; /* max length of host label + NULL */
97 	size_t output_length = 60; /* punycode length - 4 - 1 */
98 	nserror ret;
99 
100 	punycode[0] = 'x';
101 	punycode[1] = 'n';
102 	punycode[2] = '-';
103 	punycode[3] = '-';
104 
105 	ret = punycode_status_to_nserror(punycode_encode(len,
106 			(const punycode_uint *)ucs4_label, NULL,
107 			&output_length, punycode + 4));
108 	if (ret != NSERROR_OK) {
109 		return ret;
110 	}
111 
112 	output_length += SLEN("xn--");
113 	punycode[output_length] = '\0';
114 
115 	*ace_label = strdup(punycode);
116 	*out_len = output_length;
117 
118 	return NSERROR_OK;
119 }
120 
121 
122 /**
123  * Convert a host label in ACE format to UCS-4
124  *
125  * \param ace_label ASCII string containing host label
126  * \param ace_len Length of host label
127  * \param ucs4_label Pointer to hold UCS4 decoded version
128  * \param ucs4_len Pointer to hold length of ucs4_label
129  * \return NSERROR_OK on success, appropriate error otherwise
130  *
131  * If return value != NSERROR_OK, output will be left untouched.
132  */
133 static nserror
idna__ace_to_ucs4(const char * ace_label,size_t ace_len,int32_t ** ucs4_label,size_t * ucs4_len)134 idna__ace_to_ucs4(const char *ace_label,
135 		  size_t ace_len,
136 		  int32_t **ucs4_label,
137 		  size_t *ucs4_len)
138 {
139 	int32_t *ucs4;
140 	nserror ret;
141 	size_t output_length = ace_len; /* never exceeds input length */
142 
143 	/* The header should always have been checked before calling */
144 	assert((ace_label[0] == 'x') && (ace_label[1] == 'n') &&
145 		(ace_label[2] == '-') && (ace_label[3] == '-'));
146 
147 	ucs4 = malloc(output_length * 4);
148 	if (ucs4 == NULL) {
149 		return NSERROR_NOMEM;
150 	}
151 
152 	ret = punycode_status_to_nserror(punycode_decode(ace_len - 4,
153 		ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL));
154 	if (ret != NSERROR_OK) {
155 		free(ucs4);
156 		return ret;
157 	}
158 
159 	ucs4[output_length] = '\0';
160 
161 	*ucs4_label = ucs4;
162 	*ucs4_len = output_length;
163 
164 	return NSERROR_OK;
165 }
166 
167 
168 #ifdef WITH_UTF8PROC
169 
170 #include <utf8proc.h>
171 
172 int32_t idna_contexto[] = {
173 	/* CONTEXTO codepoints which have a rule defined */
174 	0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661,
175 	0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668,
176 	0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5,
177 	0x06f6, 0x06f7, 0x06f8, 0x06f9, 0
178 };
179 
180 /**
181  * Find the IDNA property of a UCS-4 codepoint
182  *
183  * \param cp	Unicode codepoint
184  * \return IDNA property
185  */
idna__cp_property(int32_t cp)186 static idna_property idna__cp_property(int32_t cp)
187 {
188 	const idna_table *t;
189 
190 	t = idna_derived;
191 	while (t->p.property) {
192 		if ((cp >= t->start) && (cp <= t->end)) {
193 			return t->p.property;
194 		}
195 		t++;
196 	};
197 
198 	return IDNA_P_DISALLOWED;
199 }
200 
201 
202 /**
203  * Find the Joining_Type property of a UCS-4 codepoint
204  *
205  * \param cp	Unicode codepoint
206  * \return JT property
207  */
idna__jt_property(int32_t cp)208 static idna_unicode_jt idna__jt_property(int32_t cp)
209 {
210 	const idna_table *t;
211 
212 	t = idna_joiningtype;
213 	while (t->p.jt) {
214 		if ((cp >= t->start) && (cp <= t->end)) {
215 			return t->p.jt;
216 		}
217 		t++;
218 	};
219 
220 	return IDNA_UNICODE_JT_U;
221 }
222 
223 
224 /**
225  * Check if a CONTEXTO codepoint has a rule defined
226  *
227  * \param cp	Unicode codepoint
228  * \return true if a rule is defined
229  */
idna__contexto_rule(int32_t cp)230 static bool idna__contexto_rule(int32_t cp)
231 {
232 	int32_t *t;
233 	for (t = idna_contexto; *t != 0; t++) {
234 		if (*t == cp) {
235 			return true;
236 		}
237 	}
238 
239 	return false;
240 }
241 
242 
243 /**
244  * Check if a CONTEXTJ codepoint has a rule defined,
245  * and conforms to that rule.
246  *
247  * \param label UCS-4 string
248  * \param index	character in the string which is CONTEXTJ
249  * \param len The length of the label
250  * \return true if conforming
251  */
idna__contextj_rule(int32_t * label,int index,size_t len)252 static bool idna__contextj_rule(int32_t *label, int index, size_t len)
253 {
254 	const utf8proc_property_t *unicode_props;
255 	idna_unicode_jt joining_type;
256 	int i;
257 	bool match;
258 
259 	/* These CONTEXTJ rules are defined at
260 	 * http://www.iana.org/assignments/idna-tables-5.2.0/idna-tables-5.2.0.xml
261 	 */
262 
263 	if (label[index] == 0x200c) {
264 		if (index == 0) {
265 			return false; /* No previous character */
266 		}
267 		unicode_props = utf8proc_get_property(label[index - 1]);
268 		if (unicode_props->combining_class == IDNA_UNICODE_CCC_VIRAMA) {
269 			return true;
270 		}
271 
272 		match = false;
273 		for (i = 0; i < (index - 1); i++) {
274 			joining_type = idna__jt_property(label[i]);
275 			if (((joining_type == IDNA_UNICODE_JT_L) ||
276 			     (joining_type == IDNA_UNICODE_JT_D)) &&
277 			    (idna__jt_property(label[i+1]) == IDNA_UNICODE_JT_T)) {
278 				match = true;
279 				break;
280 			}
281 		}
282 
283 		if (match == false) {
284 			return false;
285 		}
286 
287 		if (idna__jt_property(label[index+1]) != IDNA_UNICODE_JT_T) {
288 			return false;
289 		}
290 
291 		for (i = (index + 1); i < (int)len; i++) {
292 			joining_type = idna__jt_property(label[i]);
293 
294 			if ((joining_type == IDNA_UNICODE_JT_R) ||
295 			    (joining_type == IDNA_UNICODE_JT_D)) {
296 				return true;
297 			}
298 		}
299 
300 		return false;
301 
302 	} else if (label[index] == 0x200d) {
303 		if (index == 0) {
304 			return false; /* No previous character */
305 		}
306 		unicode_props = utf8proc_get_property(label[index - 1]);
307 		if (unicode_props->combining_class == IDNA_UNICODE_CCC_VIRAMA) {
308 			return true;
309 		}
310 		return false;
311 	}
312 
313 	/* No rule defined */
314 	return false;
315 }
316 
317 
318 /**
319  * Convert a UTF-8 string to UCS-4
320  *
321  * \param utf8_label	UTF-8 string containing host label
322  * \param len	Length of host label (in bytes)
323  * \param ucs4_label	Pointer to update with the output
324  * \param ucs4_len	Pointer to update with the length
325  * \return NSERROR_OK on success, appropriate error otherwise
326  *
327  * If return value != NSERROR_OK, output will be left untouched.
328  */
329 static nserror
idna__utf8_to_ucs4(const char * utf8_label,size_t len,int32_t ** ucs4_label,size_t * ucs4_len)330 idna__utf8_to_ucs4(const char *utf8_label,
331 		   size_t len,
332 		   int32_t **ucs4_label,
333 		   size_t *ucs4_len)
334 {
335 	int32_t *nfc_label;
336 	ssize_t nfc_size;
337 
338 	nfc_label = malloc(len * 4);
339 	if (nfc_label == NULL) {
340 		return NSERROR_NOMEM;
341 	}
342 
343 	nfc_size = utf8proc_decompose((const uint8_t *)utf8_label, len,
344 		nfc_label, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
345 	if (nfc_size < 0) {
346 		return NSERROR_NOMEM;
347 	}
348 
349 	nfc_size = utf8proc_normalize_utf32(nfc_label, nfc_size,
350 		UTF8PROC_STABLE | UTF8PROC_COMPOSE);
351 	if (nfc_size < 0) {
352 		return NSERROR_NOMEM;
353 	}
354 
355 	*ucs4_label = nfc_label;
356 	*ucs4_len = nfc_size;
357 
358 	return NSERROR_OK;
359 }
360 
361 
362 /**
363  * Convert a UCS-4 string to UTF-8
364  *
365  * \param ucs4_label	UCS-4 string containing host label
366  * \param ucs4_len	Length of host label (in bytes)
367  * \param utf8_label	Pointer to update with the output
368  * \param utf8_len	Pointer to update with the length
369  * \return NSERROR_OK on success, appropriate error otherwise
370  *
371  * If return value != NSERROR_OK, output will be left untouched.
372  */
373 static nserror
idna__ucs4_to_utf8(const int32_t * ucs4_label,size_t ucs4_len,char ** utf8_label,size_t * utf8_len)374 idna__ucs4_to_utf8(const int32_t *ucs4_label,
375 		   size_t ucs4_len,
376 		   char **utf8_label,
377 		   size_t *utf8_len)
378 {
379 	int32_t *nfc_label;
380 	ssize_t nfc_size = ucs4_len;
381 
382 	nfc_label = malloc(1 + ucs4_len * 4);
383 	if (nfc_label == NULL) {
384 		return NSERROR_NOMEM;
385 	}
386 	memcpy(nfc_label, ucs4_label, ucs4_len * 4);
387 
388 	nfc_size = utf8proc_reencode(nfc_label, ucs4_len,
389 		UTF8PROC_STABLE | UTF8PROC_COMPOSE);
390 	if (nfc_size < 0) {
391 		return NSERROR_NOMEM;
392 	}
393 
394 	*utf8_label = (char *)nfc_label;
395 	*utf8_len = nfc_size;
396 
397 	return NSERROR_OK;
398 }
399 
400 
401 /**
402  * Check if a host label is valid for IDNA2008
403  *
404  * \param label	Host label to check (UCS-4)
405  * \param len	Length of host label (in characters/codepoints)
406  * \return true if compliant, false otherwise
407  */
idna__is_valid(int32_t * label,size_t len)408 static bool idna__is_valid(int32_t *label, size_t len)
409 {
410 	const utf8proc_property_t *unicode_props;
411 	idna_property idna_prop;
412 	size_t i = 0;
413 
414 	/* 1. Check that the string is NFC.
415 	 * This check is skipped as the conversion to Unicode
416 	 * does normalisation as part of the conversion.
417 	 */
418 
419 	/* 2. Check characters 3 and 4 are not '--'. */
420 	if ((len >= 4) && (label[2] == 0x002d) && (label[3] == 0x002d)) {
421 		NSLOG(netsurf, INFO,
422 		      "Check failed: characters 2 and 3 are '--'");
423 		return false;
424 	}
425 
426 	/* 3. Check the first character is not a combining mark */
427 	unicode_props = utf8proc_get_property(label[0]);
428 
429 	if ((unicode_props->category == UTF8PROC_CATEGORY_MN) ||
430 		(unicode_props->category == UTF8PROC_CATEGORY_MC) ||
431 		(unicode_props->category == UTF8PROC_CATEGORY_ME)) {
432 		NSLOG(netsurf, INFO,
433 		      "Check failed: character 0 is a combining mark");
434 		return false;
435 	}
436 
437 	for (i = 0; i < len; i++) {
438 		idna_prop = idna__cp_property(label[i]);
439 
440 		/* 4. Check characters not DISALLOWED by RFC5892 */
441 		if (idna_prop == IDNA_P_DISALLOWED) {
442 			NSLOG(netsurf, INFO,
443 			      "Check failed: character %"PRIsizet" (%x) is DISALLOWED",
444 			      i,
445 			      label[i]);
446 			return false;
447 		}
448 
449 		/* 5. Check CONTEXTJ characters conform to defined rules */
450 		if (idna_prop == IDNA_P_CONTEXTJ) {
451 			if (idna__contextj_rule(label, i, len) == false) {
452 				NSLOG(netsurf, INFO,
453 				      "Check failed: character %"PRIsizet" (%x) does not conform to CONTEXTJ rule",
454 				      i,
455 				      label[i]);
456 				return false;
457 			}
458 		}
459 
460 		/* 6. Check CONTEXTO characters have a rule defined */
461 		/** \todo optionally we can check conformance to this rule */
462 		if (idna_prop == IDNA_P_CONTEXTO) {
463 			if (idna__contexto_rule(label[i]) == false) {
464 				NSLOG(netsurf, INFO,
465 				      "Check failed: character %"PRIsizet" (%x) has no CONTEXTO rule defined",
466 				      i,
467 				      label[i]);
468 				return false;
469 			}
470 		}
471 
472 		/* 7. Check characters are not UNASSIGNED */
473 		if (idna_prop == IDNA_P_UNASSIGNED) {
474 			NSLOG(netsurf, INFO,
475 			      "Check failed: character %"PRIsizet" (%x) is UNASSIGNED",
476 			      i,
477 			      label[i]);
478 			return false;
479 		}
480 
481 		/** \todo 8. (optionally) check Bidi compliance */
482 	}
483 
484 	return true;
485 }
486 
487 
488 /**
489  * Verify an ACE label is valid
490  *
491  * \param label	Host label to check
492  * \param len	Length of label
493  * \return true if valid, false otherwise
494  */
idna__verify(const char * label,size_t len)495 static bool idna__verify(const char *label, size_t len)
496 {
497 	nserror error;
498 	int32_t *ucs4;
499 	char *ace;
500 	ssize_t ucs4_len;
501 	size_t u_ucs4_len, ace_len;
502 
503 	/* Convert our ACE label back to UCS-4 */
504 	error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len);
505 	if (error != NSERROR_OK) {
506 		return false;
507 	}
508 
509 	/* Perform NFC normalisation */
510 	ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len,
511 		UTF8PROC_STABLE | UTF8PROC_COMPOSE);
512 	if (ucs4_len < 0) {
513 		free(ucs4);
514 		return false;
515 	}
516 
517 	/* Convert the UCS-4 label back to ACE */
518 	error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len,
519 				&ace, &ace_len);
520 	free(ucs4);
521 	if (error != NSERROR_OK) {
522 		return false;
523 	}
524 
525 	/* Check if it matches the input */
526 	if ((len == ace_len) && (strncmp(label, ace, len) == 0)) {
527 		free(ace);
528 		return true;
529 	}
530 
531 	NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input",
532 	      ace);
533 	free(ace);
534 
535 	return false;
536 }
537 
538 
539 #else /* WITH_UTF8PROC */
540 
541 
542 /**
543  * Convert a UTF-8 string to UCS-4
544  *
545  * \param utf8_label	UTF-8 string containing host label
546  * \param len	Length of host label (in bytes)
547  * \param ucs4_label	Pointer to update with the output
548  * \param ucs4_len	Pointer to update with the length
549  * \return NSERROR_OK on success, appropriate error otherwise
550  *
551  * If return value != NSERROR_OK, output will be left untouched.
552  */
553 static nserror
idna__utf8_to_ucs4(const char * utf8_label,size_t len,int32_t ** ucs4_label,size_t * ucs4_len)554 idna__utf8_to_ucs4(const char *utf8_label,
555 		   size_t len,
556 		   int32_t **ucs4_label,
557 		   size_t *ucs4_len)
558 {
559 	return NSERROR_NOT_IMPLEMENTED;
560 }
561 
562 
563 /**
564  * Convert a UCS-4 string to UTF-8
565  *
566  * \param ucs4_label	UCS-4 string containing host label
567  * \param ucs4_len	Length of host label (in bytes)
568  * \param utf8_label	Pointer to update with the output
569  * \param utf8_len	Pointer to update with the length
570  * \return NSERROR_OK on success, appropriate error otherwise
571  *
572  * If return value != NSERROR_OK, output will be left untouched.
573  */
574 static nserror
idna__ucs4_to_utf8(const int32_t * ucs4_label,size_t ucs4_len,char ** utf8_label,size_t * utf8_len)575 idna__ucs4_to_utf8(const int32_t *ucs4_label,
576 		   size_t ucs4_len,
577 		   char **utf8_label,
578 		   size_t *utf8_len)
579 {
580 	return NSERROR_NOT_IMPLEMENTED;
581 }
582 
583 
584 /**
585  * Check if a host label is valid for IDNA2008
586  *
587  * \param label	Host label to check (UCS-4)
588  * \param len	Length of host label (in characters/codepoints)
589  * \return true if compliant, false otherwise
590  */
idna__is_valid(int32_t * label,size_t len)591 static bool idna__is_valid(int32_t *label, size_t len)
592 {
593 	return true;
594 }
595 
596 
597 /**
598  * Verify an ACE label is valid
599  *
600  * \param label	Host label to check
601  * \param len	Length of label
602  * \return true if valid, false otherwise
603  */
idna__verify(const char * label,size_t len)604 static bool idna__verify(const char *label, size_t len)
605 {
606 	return true;
607 }
608 
609 
610 #endif /* WITH_UTF8PROC */
611 
612 
613 /**
614  * Find the length of a host label
615  *
616  * \param host	String containing a host or FQDN
617  * \param max_length	Length of host string to search (in bytes)
618  * \return Distance to next separator character or end of string
619  */
idna__host_label_length(const char * host,size_t max_length)620 static size_t idna__host_label_length(const char *host, size_t max_length)
621 {
622 	const char *p = host;
623 	size_t length = 0;
624 
625 	while (length < max_length) {
626 		if ((*p == '.') || (*p == ':') || (*p == '\0')) {
627 			break;
628 		}
629 		length++;
630 		p++;
631 	}
632 
633 	return length;
634 }
635 
636 
637 /**
638  * Check if a host label is LDH
639  *
640  * \param label	Host label to check
641  * \param len	Length of host label
642  * \return true if LDH compliant, false otherwise
643  */
idna__is_ldh(const char * label,size_t len)644 static bool idna__is_ldh(const char *label, size_t len)
645 {
646 	const char *p = label;
647 	size_t i = 0;
648 
649 	/* Check for leading or trailing hyphens */
650 	if ((p[0] == '-') || (p[len - 1] == '-'))
651 		return false;
652 
653 	/* Check for non-alphanumeric, non-hyphen characters */
654 	for (i = 0; i < len; p++) {
655 		i++;
656 		if (*p == '-') continue;
657 		if ((*p >= '0') && (*p <= '9')) continue;
658 		if ((*p >= 'a') && (*p <= 'z')) continue;
659 		if ((*p >= 'A') && (*p <= 'Z')) continue;
660 
661 		return false;
662 	}
663 
664 	return true;
665 }
666 
667 
668 /**
669  * Check if a host label appears to be ACE
670  *
671  * \param label	Host label to check
672  * \param len	Length of host label
673  * \return true if ACE compliant, false otherwise
674  */
idna__is_ace(const char * label,size_t len)675 static bool idna__is_ace(const char *label, size_t len)
676 {
677 	/* Check it is a valid DNS string */
678 	if (idna__is_ldh(label, len) == false) {
679 		return false;
680 	}
681 
682 	/* Check the ACE prefix is present */
683 	if ((label[0] == 'x') && (label[1] == 'n') &&
684 	    (label[2] == '-') && (label[3] == '-')) {
685 		return true;
686 	}
687 
688 	return false;
689 }
690 
691 
692 /* exported interface documented in idna.h */
693 nserror
idna_encode(const char * host,size_t len,char ** ace_host,size_t * ace_len)694 idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len)
695 {
696 	nserror error;
697 	int32_t *ucs4_host;
698 	size_t label_len, output_len, ucs4_len, fqdn_len = 0;
699 	char fqdn[256];
700 	char *output, *fqdn_p = fqdn;
701 
702 	label_len = idna__host_label_length(host, len);
703 	if (label_len == 0) {
704 		return NSERROR_BAD_URL;
705 	}
706 
707 	while (label_len != 0) {
708 		if (idna__is_ldh(host, label_len) == false) {
709 			/* This string is IDN or invalid */
710 
711 			/* Convert to Unicode */
712 			error = idna__utf8_to_ucs4(host, label_len,
713 						   &ucs4_host, &ucs4_len);
714 			if (error != NSERROR_OK) {
715 				return error;
716 			}
717 
718 			/* Check this is valid for conversion */
719 			if (idna__is_valid(ucs4_host, ucs4_len) == false) {
720 				free(ucs4_host);
721 				return NSERROR_BAD_URL;
722 			}
723 
724 			/* Convert to ACE */
725 			error = idna__ucs4_to_ace(ucs4_host, ucs4_len,
726 						&output, &output_len);
727 			free(ucs4_host);
728 			if (error != NSERROR_OK) {
729 				return error;
730 			}
731 			strncpy(fqdn_p, output, output_len);
732 			free(output);
733 			fqdn_p += output_len;
734 			fqdn_len += output_len;
735 		} else {
736 			/* This is already a DNS-valid ASCII string */
737 			if ((idna__is_ace(host, label_len) == true) &&
738 			    (idna__verify(host, label_len) == false)) {
739 				NSLOG(netsurf, INFO,
740 				      "Cannot verify ACE label %s", host);
741 				return NSERROR_BAD_URL;
742 			}
743 			strncpy(fqdn_p, host, label_len);
744 			fqdn_p += label_len;
745 			fqdn_len += label_len;
746 		}
747 
748 		*fqdn_p = '.';
749 		fqdn_p++;
750 		fqdn_len++;
751 
752 		host += label_len;
753 		if ((*host == '\0') || (*host == ':')) {
754 			break;
755 		}
756 		host++;
757 		len = len - label_len - 1;
758 
759 		label_len = idna__host_label_length(host, len);
760 	}
761 
762 	fqdn_p--;
763 	*fqdn_p = '\0';
764 	*ace_host = strdup(fqdn);
765 	*ace_len = fqdn_len - 1; /* last character is NULL */
766 
767 	return NSERROR_OK;
768 }
769 
770 
771 /* exported interface documented in idna.h */
772 nserror
idna_decode(const char * ace_host,size_t ace_len,char ** host,size_t * host_len)773 idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len)
774 {
775 	nserror error;
776 	int32_t *ucs4_host;
777 	size_t label_len, output_len, ucs4_len, fqdn_len = 0;
778 	char fqdn[256];
779 	char *output, *fqdn_p = fqdn;
780 
781 	label_len = idna__host_label_length(ace_host, ace_len);
782 	if (label_len == 0) {
783 		return NSERROR_BAD_URL;
784 	}
785 
786 	while (label_len != 0) {
787 		if (idna__is_ace(ace_host, label_len) == true) {
788 			/* This string is DNS-valid and (probably) encoded */
789 
790 			/* Decode to Unicode */
791 			error = idna__ace_to_ucs4(ace_host, label_len,
792 						  &ucs4_host, &ucs4_len);
793 			if (error != NSERROR_OK) {
794 				return error;
795 			}
796 
797 			/* Convert to UTF-8 */
798 			error = idna__ucs4_to_utf8(ucs4_host, ucs4_len,
799 						   &output, &output_len);
800 			free(ucs4_host);
801 			if (error != NSERROR_OK) {
802 				return error;
803 			}
804 
805 			memcpy(fqdn_p, output, output_len);
806 			free(output);
807 			fqdn_p += output_len;
808 			fqdn_len += output_len;
809 		} else {
810 			/* Not ACE */
811 			memcpy(fqdn_p, ace_host, label_len);
812 			fqdn_p += label_len;
813 			fqdn_len += label_len;
814 		}
815 
816 		*fqdn_p = '.';
817 		fqdn_p++;
818 		fqdn_len++;
819 
820 		ace_host += label_len;
821 		if ((*ace_host == '\0') || (*ace_host == ':')) {
822 			break;
823 		}
824 		ace_host++;
825 		ace_len = ace_len - label_len - 1;
826 
827 		label_len = idna__host_label_length(ace_host, ace_len);
828 	}
829 
830 	fqdn_p--;
831 	*fqdn_p = '\0';
832 	*host = strdup(fqdn);
833 	*host_len = fqdn_len - 1; /* last character is NULL */
834 
835 	return NSERROR_OK;
836 }
837