1 /* lookup.c - implementation of IDNA2008 lookup functions
2    Copyright (C) 2011-2021 Simon Josefsson
3    Copyright (C) 2017-2021 Tim Ruehsen
4 
5    Libidn2 is free software: you can redistribute it and/or modify it
6    under the terms of either:
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at
10        your option) any later version.
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at
16        your option) any later version.
17 
18    or both in parallel, as here.
19 
20    This program is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23    GNU General Public License for more details.
24 
25    You should have received copies of the GNU General Public License and
26    the GNU Lesser General Public License along with this program.  If
27    not, see <http://www.gnu.org/licenses/>.
28 */
29 
30 #include <config.h>
31 
32 #include "idn2.h"
33 
34 #include <errno.h>		/* errno */
35 #include <stdlib.h>		/* malloc, free */
36 
37 #include "punycode.h"
38 
39 #include <unitypes.h>
40 #include <uniconv.h>		/* u8_strconv_from_locale */
41 #include <uninorm.h>		/* u32_normalize */
42 #include <unistr.h>		/* u8_to_u32 */
43 
44 #include "idna.h"		/* _idn2_label_test */
45 #include "tr46map.h"		/* definition for tr46map.c */
46 
47 #ifdef HAVE_LIBUNISTRING
48 /* copied from gnulib */
49 #include <limits.h>
50 #define _C_CTYPE_LOWER_N(N) \
51    case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
52    case 'e' + (N): case 'f' + (N): \
53    case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
54    case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
55    case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
56    case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
57    case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
58 #define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
59 static inline int
c_tolower(int c)60 c_tolower (int c)
61 {
62   switch (c)
63     {
64     _C_CTYPE_UPPER:
65       return c - 'A' + 'a';
66     default:
67       return c;
68     }
69 }
70 
71 static int
c_strncasecmp(const char * s1,const char * s2,size_t n)72 c_strncasecmp (const char *s1, const char *s2, size_t n)
73 {
74   register const unsigned char *p1 = (const unsigned char *) s1;
75   register const unsigned char *p2 = (const unsigned char *) s2;
76   unsigned char c1, c2;
77 
78   if (p1 == p2 || n == 0)
79     return 0;
80 
81   do
82     {
83       c1 = c_tolower (*p1);
84       c2 = c_tolower (*p2);
85 
86       if (--n == 0 || c1 == '\0')
87 	break;
88 
89       ++p1;
90       ++p2;
91     }
92   while (c1 == c2);
93 
94   if (UCHAR_MAX <= INT_MAX)
95     return c1 - c2;
96   else
97     /* On machines where 'char' and 'int' are types of the same size, the
98        difference of two 'unsigned char' values - including the sign bit -
99        doesn't fit in an 'int'.  */
100     return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
101 }
102 #else
103 #include <c-strcase.h>
104 #endif
105 
106 static int
set_default_flags(int * flags)107 set_default_flags (int *flags)
108 {
109   if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
110     return IDN2_INVALID_FLAGS;
111 
112   if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
113       && ((*flags) & IDN2_NO_TR46))
114     return IDN2_INVALID_FLAGS;
115 
116   if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
117       && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
118     return IDN2_INVALID_FLAGS;
119 
120   if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
121     *flags |= IDN2_NONTRANSITIONAL;
122 
123   return IDN2_OK;
124 }
125 
126 static int
label(const uint8_t * src,size_t srclen,uint8_t * dst,size_t * dstlen,int flags)127 label (const uint8_t * src, size_t srclen, uint8_t * dst, size_t *dstlen,
128        int flags)
129 {
130   size_t plen;
131   uint32_t *p = NULL;
132   const uint8_t *src_org = NULL;
133   uint8_t *src_allocated = NULL;
134   int rc, check_roundtrip = 0;
135   size_t tmpl, srclen_org = 0;
136   uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
137   size_t label32_len = IDN2_LABEL_MAX_LENGTH;
138 
139   if (_idn2_ascii_p (src, srclen))
140     {
141       if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
142 	  && memcmp (src, "xn--", 4) == 0)
143 	{
144 	  /*
145 	     If the input to this procedure appears to be an A-label
146 	     (i.e., it starts in "xn--", interpreted
147 	     case-insensitively), the lookup application MAY attempt to
148 	     convert it to a U-label, first ensuring that the A-label is
149 	     entirely in lowercase (converting it to lowercase if
150 	     necessary), and apply the tests of Section 5.4 and the
151 	     conversion of Section 5.5 to that form. */
152 	  rc =
153 	    _idn2_punycode_decode_internal (srclen - 4, (char *) src + 4,
154 					    &label32_len, label_u32);
155 	  if (rc)
156 	    return rc;
157 
158 	  check_roundtrip = 1;
159 	  src_org = src;
160 	  srclen_org = srclen;
161 
162 	  srclen = IDN2_LABEL_MAX_LENGTH;
163 	  src = src_allocated =
164 	    u32_to_u8 (label_u32, label32_len, NULL, &srclen);
165 	  if (!src)
166 	    {
167 	      if (errno == ENOMEM)
168 		return IDN2_MALLOC;
169 	      return IDN2_ENCODING_ERROR;
170 	    }
171 	}
172       else
173 	{
174 	  if (srclen > IDN2_LABEL_MAX_LENGTH)
175 	    return IDN2_TOO_BIG_LABEL;
176 	  if (srclen > *dstlen)
177 	    return IDN2_TOO_BIG_DOMAIN;
178 
179 	  memcpy (dst, src, srclen);
180 	  *dstlen = srclen;
181 	  return IDN2_OK;
182 	}
183     }
184 
185   rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
186   if (rc != IDN2_OK)
187     goto out;
188 
189   if (!(flags & IDN2_TRANSITIONAL))
190     {
191       rc = _idn2_label_test (TEST_NFC |
192 			     TEST_2HYPHEN |
193 			     TEST_LEADING_COMBINING |
194 			     TEST_DISALLOWED |
195 			     TEST_CONTEXTJ_RULE |
196 			     TEST_CONTEXTO_WITH_RULE |
197 			     TEST_UNASSIGNED | TEST_BIDI |
198 			     ((flags & IDN2_NONTRANSITIONAL) ?
199 			      TEST_NONTRANSITIONAL : 0) | ((flags &
200 							    IDN2_USE_STD3_ASCII_RULES)
201 							   ? 0 :
202 							   TEST_ALLOW_STD3_DISALLOWED),
203 			     p, plen);
204 
205       if (rc != IDN2_OK)
206 	goto out;
207     }
208 
209   dst[0] = 'x';
210   dst[1] = 'n';
211   dst[2] = '-';
212   dst[3] = '-';
213 
214   tmpl = *dstlen - 4;
215   rc = _idn2_punycode_encode_internal (plen, p, &tmpl, (char *) dst + 4);
216   if (rc != IDN2_OK)
217     goto out;
218 
219 
220   *dstlen = 4 + tmpl;
221 
222   if (check_roundtrip)
223     {
224       if (srclen_org != *dstlen
225 	  || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
226 	{
227 	  rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
228 	  goto out;
229 	}
230     }
231   else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
232     {
233       rc =
234 	_idn2_punycode_decode_internal (*dstlen - 4, (char *) dst + 4,
235 					&label32_len, label_u32);
236       if (rc)
237 	{
238 	  rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
239 	  goto out;
240 	}
241 
242       if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
243 	{
244 	  rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
245 	  goto out;
246 	}
247     }
248 
249   rc = IDN2_OK;
250 
251 out:
252   free (p);
253   free (src_allocated);
254   return rc;
255 }
256 
257 #define TR46_TRANSITIONAL_CHECK \
258   (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
259 #define TR46_NONTRANSITIONAL_CHECK \
260   (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
261 
262 static int
_tr46(const uint8_t * domain_u8,uint8_t ** out,int flags)263 _tr46 (const uint8_t * domain_u8, uint8_t ** out, int flags)
264 {
265   size_t len, it;
266   uint32_t *domain_u32;
267   int err = IDN2_OK, rc;
268   int transitional = 0;
269   int test_flags;
270 
271   if (flags & IDN2_TRANSITIONAL)
272     transitional = 1;
273 
274   /* convert UTF-8 to UTF-32 */
275   if (!(domain_u32 =
276 	u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
277     {
278       if (errno == ENOMEM)
279 	return IDN2_MALLOC;
280       return IDN2_ENCODING_ERROR;
281     }
282 
283   size_t len2 = 0;
284   for (it = 0; it < len - 1; it++)
285     {
286       IDNAMap map;
287 
288       get_idna_map (domain_u32[it], &map);
289 
290       if (map_is (&map, TR46_FLG_DISALLOWED))
291 	{
292 	  if (domain_u32[it])
293 	    {
294 	      free (domain_u32);
295 	      return IDN2_DISALLOWED;
296 	    }
297 	  len2++;
298 	}
299       else if (map_is (&map, TR46_FLG_MAPPED))
300 	{
301 	  len2 += map.nmappings;
302 	}
303       else if (map_is (&map, TR46_FLG_VALID))
304 	{
305 	  len2++;
306 	}
307       else if (map_is (&map, TR46_FLG_IGNORED))
308 	{
309 	  continue;
310 	}
311       else if (map_is (&map, TR46_FLG_DEVIATION))
312 	{
313 	  if (transitional)
314 	    {
315 	      len2 += map.nmappings;
316 	    }
317 	  else
318 	    len2++;
319 	}
320       else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
321 	{
322 	  if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
323 	    {
324 	      /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
325 	      len2++;
326 	    }
327 	  else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
328 	    {
329 	      /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
330 	      len2 += map.nmappings;
331 	    }
332 	}
333     }
334 
335   /* Exit early if result is too long.
336    * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
337   if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
338     {
339       free (domain_u32);
340       return IDN2_TOO_BIG_DOMAIN;
341     }
342 
343   uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
344   if (!tmp)
345     {
346       free (domain_u32);
347       return IDN2_MALLOC;
348     }
349 
350   len2 = 0;
351   for (it = 0; it < len - 1; it++)
352     {
353       uint32_t c = domain_u32[it];
354       IDNAMap map;
355 
356       get_idna_map (c, &map);
357 
358       if (map_is (&map, TR46_FLG_DISALLOWED))
359 	{
360 	  tmp[len2++] = c;
361 	}
362       else if (map_is (&map, TR46_FLG_MAPPED))
363 	{
364 	  len2 += get_map_data (tmp + len2, &map);
365 	}
366       else if (map_is (&map, TR46_FLG_VALID))
367 	{
368 	  tmp[len2++] = c;
369 	}
370       else if (map_is (&map, TR46_FLG_IGNORED))
371 	{
372 	  continue;
373 	}
374       else if (map_is (&map, TR46_FLG_DEVIATION))
375 	{
376 	  if (transitional)
377 	    {
378 	      len2 += get_map_data (tmp + len2, &map);
379 	    }
380 	  else
381 	    tmp[len2++] = c;
382 	}
383       else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
384 	{
385 	  if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
386 	    {
387 	      tmp[len2++] = c;
388 	    }
389 	  else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
390 	    {
391 	      len2 += get_map_data (tmp + len2, &map);
392 	    }
393 	}
394     }
395   free (domain_u32);
396 
397   /* Normalize to NFC */
398   tmp[len2] = 0;
399   domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
400   free (tmp);
401   tmp = NULL;
402 
403   if (!domain_u32)
404     {
405       if (errno == ENOMEM)
406 	return IDN2_MALLOC;
407       return IDN2_ENCODING_ERROR;
408     }
409 
410   /* split into labels and check */
411   uint32_t *e, *s;
412   for (e = s = domain_u32; *e; s = e)
413     {
414       while (*e && *e != '.')
415 	e++;
416 
417       if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
418 	  && s[3] == '-')
419 	{
420 	  /* decode punycode and check result non-transitional */
421 	  size_t ace_len;
422 	  uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
423 	  size_t name_len = IDN2_LABEL_MAX_LENGTH;
424 	  uint8_t *ace;
425 
426 	  ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
427 	  if (!ace)
428 	    {
429 	      free (domain_u32);
430 	      if (errno == ENOMEM)
431 		return IDN2_MALLOC;
432 	      return IDN2_ENCODING_ERROR;
433 	    }
434 
435 	  rc = _idn2_punycode_decode_internal (ace_len, (char *) ace,
436 					       &name_len, name_u32);
437 
438 	  free (ace);
439 
440 	  if (rc)
441 	    {
442 	      free (domain_u32);
443 	      return rc;
444 	    }
445 
446 	  test_flags = TR46_NONTRANSITIONAL_CHECK;
447 
448 	  if (!(flags & IDN2_USE_STD3_ASCII_RULES))
449 	    test_flags |= TEST_ALLOW_STD3_DISALLOWED;
450 
451 	  if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
452 	    err = rc;
453 	}
454       else
455 	{
456 	  test_flags =
457 	    transitional ? TR46_TRANSITIONAL_CHECK :
458 	    TR46_NONTRANSITIONAL_CHECK;
459 
460 	  if (!(flags & IDN2_USE_STD3_ASCII_RULES))
461 	    test_flags |= TEST_ALLOW_STD3_DISALLOWED;
462 
463 	  if ((rc = _idn2_label_test (test_flags, s, e - s)))
464 	    err = rc;
465 	}
466 
467       if (*e)
468 	e++;
469     }
470 
471   if (err == IDN2_OK && out)
472     {
473       uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
474       free (domain_u32);
475 
476       if (!_out)
477 	{
478 	  if (errno == ENOMEM)
479 	    return IDN2_MALLOC;
480 	  return IDN2_ENCODING_ERROR;
481 	}
482 
483       *out = _out;
484     }
485   else
486     free (domain_u32);
487 
488   return err;
489 }
490 
491 /**
492  * idn2_lookup_u8:
493  * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
494  * @lookupname: newly allocated output variable with name to lookup in DNS.
495  * @flags: optional #idn2_flags to modify behaviour.
496  *
497  * Perform IDNA2008 lookup string conversion on domain name @src, as
498  * described in section 5 of RFC 5891.  Note that the input string
499  * must be encoded in UTF-8 and be in Unicode NFC form.
500  *
501  * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
502  * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
503  * do already imply %IDN2_NFC_INPUT.
504  *
505  * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
506  * convert any input A-labels to U-labels and perform additional
507  * testing. This is default since version 2.2.
508  * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
509  *
510  * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
511  * transitional processing, and %IDN2_NONTRANSITIONAL to enable
512  * Unicode TR46 non-transitional processing.
513  *
514  * Multiple flags may be specified by binary or:ing them together.
515  *
516  * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
517  * Previously we were eliminating non-STD3 characters from domain strings
518  * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
519  * functions. That was an unexpected regression for applications switching
520  * from libidn and thus it is no longer applied by default.
521  * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
522  *
523  * After version 0.11: @lookupname may be NULL to test lookup of @src
524  * without allocating memory.
525  *
526  * Returns: On successful conversion %IDN2_OK is returned, if the
527  *   output domain or any label would have been too long
528  *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
529  *   another error code is returned.
530  *
531  * Since: 0.1
532  **/
533 int
idn2_lookup_u8(const uint8_t * src,uint8_t ** lookupname,int flags)534 idn2_lookup_u8 (const uint8_t * src, uint8_t ** lookupname, int flags)
535 {
536   size_t lookupnamelen = 0;
537   uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
538   uint8_t *src_allocated = NULL;
539   int rc;
540 
541   if (src == NULL)
542     {
543       if (lookupname)
544 	*lookupname = NULL;
545       return IDN2_OK;
546     }
547 
548   rc = set_default_flags (&flags);
549   if (rc != IDN2_OK)
550     return rc;
551 
552   if (!(flags & IDN2_NO_TR46))
553     {
554       uint8_t *out;
555 
556       rc = _tr46 (src, &out, flags);
557       if (rc != IDN2_OK)
558 	return rc;
559 
560       src = src_allocated = out;
561     }
562 
563   do
564     {
565       const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
566       /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
567          and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
568       size_t labellen = end - src;
569       uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
570       size_t tmplen = IDN2_LABEL_MAX_LENGTH;
571 
572       rc = label (src, labellen, tmp, &tmplen, flags);
573       if (rc != IDN2_OK)
574 	{
575 	  free (src_allocated);
576 	  return rc;
577 	}
578 
579       if (lookupnamelen + tmplen
580 	  > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
581 	{
582 	  free (src_allocated);
583 	  return IDN2_TOO_BIG_DOMAIN;
584 	}
585 
586       memcpy (_lookupname + lookupnamelen, tmp, tmplen);
587       lookupnamelen += tmplen;
588 
589       if (*end == '.')
590 	{
591 	  if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
592 	    {
593 	      free (src_allocated);
594 	      return IDN2_TOO_BIG_DOMAIN;
595 	    }
596 
597 	  _lookupname[lookupnamelen] = '.';
598 	  lookupnamelen++;
599 	}
600       _lookupname[lookupnamelen] = '\0';
601 
602       src = end;
603     }
604   while (*src++);
605 
606   free (src_allocated);
607 
608   if (lookupname)
609     {
610       uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
611 
612       if (tmp == NULL)
613 	return IDN2_MALLOC;
614 
615       memcpy (tmp, _lookupname, lookupnamelen + 1);
616       *lookupname = tmp;
617     }
618 
619   return IDN2_OK;
620 }
621 
622 /**
623  * idn2_lookup_ul:
624  * @src: input zero-terminated locale encoded string.
625  * @lookupname: newly allocated output variable with name to lookup in DNS.
626  * @flags: optional #idn2_flags to modify behaviour.
627  *
628  * Perform IDNA2008 lookup string conversion on domain name @src, as
629  * described in section 5 of RFC 5891.  Note that the input is assumed
630  * to be encoded in the locale's default coding system, and will be
631  * transcoded to UTF-8 and NFC normalized by this function.
632  *
633  * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
634  * convert any input A-labels to U-labels and perform additional
635  * testing. This is default since version 2.2.
636  * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
637  *
638  * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
639  * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
640  * processing.
641  *
642  * Multiple flags may be specified by binary or:ing them together, for
643  * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
644  *
645  * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
646  *
647  * After version 0.11: @lookupname may be NULL to test lookup of @src
648  * without allocating memory.
649  *
650  * Returns: On successful conversion %IDN2_OK is returned, if
651  *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
652  *   returned, if the output domain or any label would have been too
653  *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
654  *   another error code is returned.
655  *
656  * Since: 0.1
657  **/
658 int
idn2_lookup_ul(const char * src,char ** lookupname,int flags)659 idn2_lookup_ul (const char *src, char **lookupname, int flags)
660 {
661   uint8_t *utf8src = NULL;
662   int rc;
663 
664   if (src)
665     {
666       const char *encoding = locale_charset ();
667 
668       utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
669 
670       if (!utf8src)
671 	{
672 	  if (errno == ENOMEM)
673 	    return IDN2_MALLOC;
674 	  return IDN2_ICONV_FAIL;
675 	}
676     }
677 
678   rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
679 		       flags | IDN2_NFC_INPUT);
680 
681   free (utf8src);
682 
683   return rc;
684 }
685 
686 /**
687  * idn2_to_ascii_4i:
688  * @input: zero terminated input Unicode (UCS-4) string.
689  * @inlen: number of elements in @input.
690  * @output: output zero terminated string that must have room for at least 63 characters plus the terminating zero.
691  * @flags: optional #idn2_flags to modify behaviour.
692  *
693  * THIS FUNCTION HAS BEEN DEPRECATED DUE TO A DESIGN FLAW. USE idn2_to_ascii_4i2() INSTEAD !
694  *
695  * The ToASCII operation takes a sequence of Unicode code points that make
696  * up one domain label and transforms it into a sequence of code points in
697  * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
698  * the resulting sequence are equivalent labels.
699  *
700  * It is important to note that the ToASCII operation can fail.
701  * ToASCII fails if any step of it fails. If any step of the
702  * ToASCII operation fails on any label in a domain name, that domain
703  * name MUST NOT be used as an internationalized domain name.
704  * The method for dealing with this failure is application-specific.
705  *
706  * The inputs to ToASCII are a sequence of code points.
707  *
708  * ToASCII never alters a sequence of code points that are all in the ASCII
709  * range to begin with (although it could fail). Applying the ToASCII operation multiple
710  * effect as applying it just once.
711  *
712  * The default behavior of this function (when flags are zero) is to apply
713  * the IDNA2008 rules without the TR46 amendments. As the TR46
714  * non-transitional processing is nowadays ubiquitous, when unsure, it is
715  * recommended to call this function with the %IDN2_NONTRANSITIONAL
716  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
717  *
718  * Return value: Returns %IDN2_OK on success, or error code.
719  *
720  * Since: 2.0.0
721  *
722  * Deprecated: 2.1.1: Use idn2_to_ascii_4i2().
723  **/
724 int
idn2_to_ascii_4i(const uint32_t * input,size_t inlen,char * output,int flags)725 idn2_to_ascii_4i (const uint32_t * input, size_t inlen, char *output,
726 		  int flags)
727 {
728   char *out;
729   int rc;
730 
731   if (!input)
732     {
733       if (output)
734 	*output = 0;
735       return IDN2_OK;
736     }
737 
738   rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
739   if (rc == IDN2_OK)
740     {
741       size_t len = strlen (out);
742 
743       if (len > 63)
744 	rc = IDN2_TOO_BIG_DOMAIN;
745       else if (output)
746 	memcpy (output, out, len);
747 
748       free (out);
749     }
750 
751   return rc;
752 }
753 
754 /**
755  * idn2_to_ascii_4i2:
756  * @input: zero terminated input Unicode (UCS-4) string.
757  * @inlen: number of elements in @input.
758  * @output: pointer to newly allocated zero-terminated output string.
759  * @flags: optional #idn2_flags to modify behaviour.
760  *
761  * The ToASCII operation takes a sequence of Unicode code points that make
762  * up one domain label and transforms it into a sequence of code points in
763  * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
764  * the resulting sequence are equivalent labels.
765  *
766  * It is important to note that the ToASCII operation can fail.
767  * ToASCII fails if any step of it fails. If any step of the
768  * ToASCII operation fails on any label in a domain name, that domain
769  * name MUST NOT be used as an internationalized domain name.
770  * The method for dealing with this failure is application-specific.
771  *
772  * The inputs to ToASCII are a sequence of code points.
773  *
774  * ToASCII never alters a sequence of code points that are all in the ASCII
775  * range to begin with (although it could fail). Applying the ToASCII operation multiple
776  * effect as applying it just once.
777  *
778  * The default behavior of this function (when flags are zero) is to apply
779  * the IDNA2008 rules without the TR46 amendments. As the TR46
780  * non-transitional processing is nowadays ubiquitous, when unsure, it is
781  * recommended to call this function with the %IDN2_NONTRANSITIONAL
782  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
783  *
784  * Return value: Returns %IDN2_OK on success, or error code.
785  *
786  * Since: 2.1.1
787  **/
788 int
idn2_to_ascii_4i2(const uint32_t * input,size_t inlen,char ** output,int flags)789 idn2_to_ascii_4i2 (const uint32_t * input, size_t inlen, char **output,
790 		   int flags)
791 {
792   uint32_t *input_u32;
793   uint8_t *input_u8, *output_u8;
794   size_t length;
795   int rc;
796 
797   if (!input)
798     {
799       if (output)
800 	*output = NULL;
801       return IDN2_OK;
802     }
803 
804   input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
805   if (!input_u32)
806     return IDN2_MALLOC;
807 
808   u32_cpy (input_u32, input, inlen);
809   input_u32[inlen] = 0;
810 
811   input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
812   free (input_u32);
813   if (!input_u8)
814     {
815       if (errno == ENOMEM)
816 	return IDN2_MALLOC;
817       return IDN2_ENCODING_ERROR;
818     }
819 
820   rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
821   free (input_u8);
822 
823   if (rc == IDN2_OK)
824     {
825       if (output)
826 	*output = (char *) output_u8;
827       else
828 	free (output_u8);
829     }
830 
831   return rc;
832 }
833 
834 /**
835  * idn2_to_ascii_4z:
836  * @input: zero terminated input Unicode (UCS-4) string.
837  * @output: pointer to newly allocated zero-terminated output string.
838  * @flags: optional #idn2_flags to modify behaviour.
839  *
840  * Convert UCS-4 domain name to ASCII string using the IDNA2008
841  * rules.  The domain name may contain several labels, separated by dots.
842  * The output buffer must be deallocated by the caller.
843  *
844  * The default behavior of this function (when flags are zero) is to apply
845  * the IDNA2008 rules without the TR46 amendments. As the TR46
846  * non-transitional processing is nowadays ubiquitous, when unsure, it is
847  * recommended to call this function with the %IDN2_NONTRANSITIONAL
848  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
849  *
850  * Return value: Returns %IDN2_OK on success, or error code.
851  *
852  * Since: 2.0.0
853  **/
854 int
idn2_to_ascii_4z(const uint32_t * input,char ** output,int flags)855 idn2_to_ascii_4z (const uint32_t * input, char **output, int flags)
856 {
857   uint8_t *input_u8;
858   size_t length;
859   int rc;
860 
861   if (!input)
862     {
863       if (output)
864 	*output = NULL;
865       return IDN2_OK;
866     }
867 
868   input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
869   if (!input_u8)
870     {
871       if (errno == ENOMEM)
872 	return IDN2_MALLOC;
873       return IDN2_ENCODING_ERROR;
874     }
875 
876   rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
877   free (input_u8);
878 
879   return rc;
880 }
881 
882 /**
883  * idn2_to_ascii_8z:
884  * @input: zero terminated input UTF-8 string.
885  * @output: pointer to newly allocated output string.
886  * @flags: optional #idn2_flags to modify behaviour.
887  *
888  * Convert UTF-8 domain name to ASCII string using the IDNA2008
889  * rules.  The domain name may contain several labels, separated by dots.
890  * The output buffer must be deallocated by the caller.
891  *
892  * The default behavior of this function (when flags are zero) is to apply
893  * the IDNA2008 rules without the TR46 amendments. As the TR46
894  * non-transitional processing is nowadays ubiquitous, when unsure, it is
895  * recommended to call this function with the %IDN2_NONTRANSITIONAL
896  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
897  *
898  * Return value: Returns %IDN2_OK on success, or error code.
899  *
900  * Since: 2.0.0
901  **/
902 int
idn2_to_ascii_8z(const char * input,char ** output,int flags)903 idn2_to_ascii_8z (const char *input, char **output, int flags)
904 {
905   return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
906 }
907 
908 /**
909  * idn2_to_ascii_lz:
910  * @input: zero terminated input UTF-8 string.
911  * @output: pointer to newly allocated output string.
912  * @flags: optional #idn2_flags to modify behaviour.
913  *
914  * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
915  * rules.  The domain name may contain several labels, separated by dots.
916  * The output buffer must be deallocated by the caller.
917  *
918  * The default behavior of this function (when flags are zero) is to apply
919  * the IDNA2008 rules without the TR46 amendments. As the TR46
920  * non-transitional processing is nowadays ubiquitous, when unsure, it is
921  * recommended to call this function with the %IDN2_NONTRANSITIONAL
922  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
923  *
924  * Returns: %IDN2_OK on success, or error code.
925  * Same as described in idn2_lookup_ul() documentation.
926  *
927  * Since: 2.0.0
928  **/
929 int
idn2_to_ascii_lz(const char * input,char ** output,int flags)930 idn2_to_ascii_lz (const char *input, char **output, int flags)
931 {
932   return idn2_lookup_ul (input, output, flags);
933 }
934