1 /* lookup.c - implementation of IDNA2008 lookup functions
2 Copyright (C) 2011-2021 Simon Josefsson
3 Copyright (C) 2017-2021 Tim Ruehsen
4
5 Libidn2 is free software: you can redistribute it and/or modify it
6 under the terms of either:
7
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11
12 or
13
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version.
17
18 or both in parallel, as here.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>.
28 */
29
30 #include <config.h>
31
32 #include "idn2.h"
33
34 #include <errno.h> /* errno */
35 #include <stdlib.h> /* malloc, free */
36
37 #include "punycode.h"
38
39 #include <unitypes.h>
40 #include <uniconv.h> /* u8_strconv_from_locale */
41 #include <uninorm.h> /* u32_normalize */
42 #include <unistr.h> /* u8_to_u32 */
43
44 #include "idna.h" /* _idn2_label_test */
45 #include "tr46map.h" /* definition for tr46map.c */
46
47 #ifdef HAVE_LIBUNISTRING
48 /* copied from gnulib */
49 #include <limits.h>
50 #define _C_CTYPE_LOWER_N(N) \
51 case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
52 case 'e' + (N): case 'f' + (N): \
53 case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
54 case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
55 case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
56 case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
57 case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
58 #define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
59 static inline int
c_tolower(int c)60 c_tolower (int c)
61 {
62 switch (c)
63 {
64 _C_CTYPE_UPPER:
65 return c - 'A' + 'a';
66 default:
67 return c;
68 }
69 }
70
71 static int
c_strncasecmp(const char * s1,const char * s2,size_t n)72 c_strncasecmp (const char *s1, const char *s2, size_t n)
73 {
74 register const unsigned char *p1 = (const unsigned char *) s1;
75 register const unsigned char *p2 = (const unsigned char *) s2;
76 unsigned char c1, c2;
77
78 if (p1 == p2 || n == 0)
79 return 0;
80
81 do
82 {
83 c1 = c_tolower (*p1);
84 c2 = c_tolower (*p2);
85
86 if (--n == 0 || c1 == '\0')
87 break;
88
89 ++p1;
90 ++p2;
91 }
92 while (c1 == c2);
93
94 if (UCHAR_MAX <= INT_MAX)
95 return c1 - c2;
96 else
97 /* On machines where 'char' and 'int' are types of the same size, the
98 difference of two 'unsigned char' values - including the sign bit -
99 doesn't fit in an 'int'. */
100 return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
101 }
102 #else
103 #include <c-strcase.h>
104 #endif
105
106 static int
set_default_flags(int * flags)107 set_default_flags (int *flags)
108 {
109 if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
110 return IDN2_INVALID_FLAGS;
111
112 if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
113 && ((*flags) & IDN2_NO_TR46))
114 return IDN2_INVALID_FLAGS;
115
116 if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
117 && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
118 return IDN2_INVALID_FLAGS;
119
120 if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
121 *flags |= IDN2_NONTRANSITIONAL;
122
123 return IDN2_OK;
124 }
125
126 static int
label(const uint8_t * src,size_t srclen,uint8_t * dst,size_t * dstlen,int flags)127 label (const uint8_t * src, size_t srclen, uint8_t * dst, size_t *dstlen,
128 int flags)
129 {
130 size_t plen;
131 uint32_t *p = NULL;
132 const uint8_t *src_org = NULL;
133 uint8_t *src_allocated = NULL;
134 int rc, check_roundtrip = 0;
135 size_t tmpl, srclen_org = 0;
136 uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
137 size_t label32_len = IDN2_LABEL_MAX_LENGTH;
138
139 if (_idn2_ascii_p (src, srclen))
140 {
141 if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
142 && memcmp (src, "xn--", 4) == 0)
143 {
144 /*
145 If the input to this procedure appears to be an A-label
146 (i.e., it starts in "xn--", interpreted
147 case-insensitively), the lookup application MAY attempt to
148 convert it to a U-label, first ensuring that the A-label is
149 entirely in lowercase (converting it to lowercase if
150 necessary), and apply the tests of Section 5.4 and the
151 conversion of Section 5.5 to that form. */
152 rc =
153 _idn2_punycode_decode_internal (srclen - 4, (char *) src + 4,
154 &label32_len, label_u32);
155 if (rc)
156 return rc;
157
158 check_roundtrip = 1;
159 src_org = src;
160 srclen_org = srclen;
161
162 srclen = IDN2_LABEL_MAX_LENGTH;
163 src = src_allocated =
164 u32_to_u8 (label_u32, label32_len, NULL, &srclen);
165 if (!src)
166 {
167 if (errno == ENOMEM)
168 return IDN2_MALLOC;
169 return IDN2_ENCODING_ERROR;
170 }
171 }
172 else
173 {
174 if (srclen > IDN2_LABEL_MAX_LENGTH)
175 return IDN2_TOO_BIG_LABEL;
176 if (srclen > *dstlen)
177 return IDN2_TOO_BIG_DOMAIN;
178
179 memcpy (dst, src, srclen);
180 *dstlen = srclen;
181 return IDN2_OK;
182 }
183 }
184
185 rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
186 if (rc != IDN2_OK)
187 goto out;
188
189 if (!(flags & IDN2_TRANSITIONAL))
190 {
191 rc = _idn2_label_test (TEST_NFC |
192 TEST_2HYPHEN |
193 TEST_LEADING_COMBINING |
194 TEST_DISALLOWED |
195 TEST_CONTEXTJ_RULE |
196 TEST_CONTEXTO_WITH_RULE |
197 TEST_UNASSIGNED | TEST_BIDI |
198 ((flags & IDN2_NONTRANSITIONAL) ?
199 TEST_NONTRANSITIONAL : 0) | ((flags &
200 IDN2_USE_STD3_ASCII_RULES)
201 ? 0 :
202 TEST_ALLOW_STD3_DISALLOWED),
203 p, plen);
204
205 if (rc != IDN2_OK)
206 goto out;
207 }
208
209 dst[0] = 'x';
210 dst[1] = 'n';
211 dst[2] = '-';
212 dst[3] = '-';
213
214 tmpl = *dstlen - 4;
215 rc = _idn2_punycode_encode_internal (plen, p, &tmpl, (char *) dst + 4);
216 if (rc != IDN2_OK)
217 goto out;
218
219
220 *dstlen = 4 + tmpl;
221
222 if (check_roundtrip)
223 {
224 if (srclen_org != *dstlen
225 || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
226 {
227 rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
228 goto out;
229 }
230 }
231 else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
232 {
233 rc =
234 _idn2_punycode_decode_internal (*dstlen - 4, (char *) dst + 4,
235 &label32_len, label_u32);
236 if (rc)
237 {
238 rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
239 goto out;
240 }
241
242 if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
243 {
244 rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
245 goto out;
246 }
247 }
248
249 rc = IDN2_OK;
250
251 out:
252 free (p);
253 free (src_allocated);
254 return rc;
255 }
256
257 #define TR46_TRANSITIONAL_CHECK \
258 (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
259 #define TR46_NONTRANSITIONAL_CHECK \
260 (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
261
262 static int
_tr46(const uint8_t * domain_u8,uint8_t ** out,int flags)263 _tr46 (const uint8_t * domain_u8, uint8_t ** out, int flags)
264 {
265 size_t len, it;
266 uint32_t *domain_u32;
267 int err = IDN2_OK, rc;
268 int transitional = 0;
269 int test_flags;
270
271 if (flags & IDN2_TRANSITIONAL)
272 transitional = 1;
273
274 /* convert UTF-8 to UTF-32 */
275 if (!(domain_u32 =
276 u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
277 {
278 if (errno == ENOMEM)
279 return IDN2_MALLOC;
280 return IDN2_ENCODING_ERROR;
281 }
282
283 size_t len2 = 0;
284 for (it = 0; it < len - 1; it++)
285 {
286 IDNAMap map;
287
288 get_idna_map (domain_u32[it], &map);
289
290 if (map_is (&map, TR46_FLG_DISALLOWED))
291 {
292 if (domain_u32[it])
293 {
294 free (domain_u32);
295 return IDN2_DISALLOWED;
296 }
297 len2++;
298 }
299 else if (map_is (&map, TR46_FLG_MAPPED))
300 {
301 len2 += map.nmappings;
302 }
303 else if (map_is (&map, TR46_FLG_VALID))
304 {
305 len2++;
306 }
307 else if (map_is (&map, TR46_FLG_IGNORED))
308 {
309 continue;
310 }
311 else if (map_is (&map, TR46_FLG_DEVIATION))
312 {
313 if (transitional)
314 {
315 len2 += map.nmappings;
316 }
317 else
318 len2++;
319 }
320 else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
321 {
322 if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
323 {
324 /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
325 len2++;
326 }
327 else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
328 {
329 /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
330 len2 += map.nmappings;
331 }
332 }
333 }
334
335 /* Exit early if result is too long.
336 * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
337 if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
338 {
339 free (domain_u32);
340 return IDN2_TOO_BIG_DOMAIN;
341 }
342
343 uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
344 if (!tmp)
345 {
346 free (domain_u32);
347 return IDN2_MALLOC;
348 }
349
350 len2 = 0;
351 for (it = 0; it < len - 1; it++)
352 {
353 uint32_t c = domain_u32[it];
354 IDNAMap map;
355
356 get_idna_map (c, &map);
357
358 if (map_is (&map, TR46_FLG_DISALLOWED))
359 {
360 tmp[len2++] = c;
361 }
362 else if (map_is (&map, TR46_FLG_MAPPED))
363 {
364 len2 += get_map_data (tmp + len2, &map);
365 }
366 else if (map_is (&map, TR46_FLG_VALID))
367 {
368 tmp[len2++] = c;
369 }
370 else if (map_is (&map, TR46_FLG_IGNORED))
371 {
372 continue;
373 }
374 else if (map_is (&map, TR46_FLG_DEVIATION))
375 {
376 if (transitional)
377 {
378 len2 += get_map_data (tmp + len2, &map);
379 }
380 else
381 tmp[len2++] = c;
382 }
383 else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
384 {
385 if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
386 {
387 tmp[len2++] = c;
388 }
389 else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
390 {
391 len2 += get_map_data (tmp + len2, &map);
392 }
393 }
394 }
395 free (domain_u32);
396
397 /* Normalize to NFC */
398 tmp[len2] = 0;
399 domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
400 free (tmp);
401 tmp = NULL;
402
403 if (!domain_u32)
404 {
405 if (errno == ENOMEM)
406 return IDN2_MALLOC;
407 return IDN2_ENCODING_ERROR;
408 }
409
410 /* split into labels and check */
411 uint32_t *e, *s;
412 for (e = s = domain_u32; *e; s = e)
413 {
414 while (*e && *e != '.')
415 e++;
416
417 if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
418 && s[3] == '-')
419 {
420 /* decode punycode and check result non-transitional */
421 size_t ace_len;
422 uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
423 size_t name_len = IDN2_LABEL_MAX_LENGTH;
424 uint8_t *ace;
425
426 ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
427 if (!ace)
428 {
429 free (domain_u32);
430 if (errno == ENOMEM)
431 return IDN2_MALLOC;
432 return IDN2_ENCODING_ERROR;
433 }
434
435 rc = _idn2_punycode_decode_internal (ace_len, (char *) ace,
436 &name_len, name_u32);
437
438 free (ace);
439
440 if (rc)
441 {
442 free (domain_u32);
443 return rc;
444 }
445
446 test_flags = TR46_NONTRANSITIONAL_CHECK;
447
448 if (!(flags & IDN2_USE_STD3_ASCII_RULES))
449 test_flags |= TEST_ALLOW_STD3_DISALLOWED;
450
451 if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
452 err = rc;
453 }
454 else
455 {
456 test_flags =
457 transitional ? TR46_TRANSITIONAL_CHECK :
458 TR46_NONTRANSITIONAL_CHECK;
459
460 if (!(flags & IDN2_USE_STD3_ASCII_RULES))
461 test_flags |= TEST_ALLOW_STD3_DISALLOWED;
462
463 if ((rc = _idn2_label_test (test_flags, s, e - s)))
464 err = rc;
465 }
466
467 if (*e)
468 e++;
469 }
470
471 if (err == IDN2_OK && out)
472 {
473 uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
474 free (domain_u32);
475
476 if (!_out)
477 {
478 if (errno == ENOMEM)
479 return IDN2_MALLOC;
480 return IDN2_ENCODING_ERROR;
481 }
482
483 *out = _out;
484 }
485 else
486 free (domain_u32);
487
488 return err;
489 }
490
491 /**
492 * idn2_lookup_u8:
493 * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
494 * @lookupname: newly allocated output variable with name to lookup in DNS.
495 * @flags: optional #idn2_flags to modify behaviour.
496 *
497 * Perform IDNA2008 lookup string conversion on domain name @src, as
498 * described in section 5 of RFC 5891. Note that the input string
499 * must be encoded in UTF-8 and be in Unicode NFC form.
500 *
501 * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
502 * further processing. %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
503 * do already imply %IDN2_NFC_INPUT.
504 *
505 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
506 * convert any input A-labels to U-labels and perform additional
507 * testing. This is default since version 2.2.
508 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
509 *
510 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
511 * transitional processing, and %IDN2_NONTRANSITIONAL to enable
512 * Unicode TR46 non-transitional processing.
513 *
514 * Multiple flags may be specified by binary or:ing them together.
515 *
516 * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
517 * Previously we were eliminating non-STD3 characters from domain strings
518 * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
519 * functions. That was an unexpected regression for applications switching
520 * from libidn and thus it is no longer applied by default.
521 * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
522 *
523 * After version 0.11: @lookupname may be NULL to test lookup of @src
524 * without allocating memory.
525 *
526 * Returns: On successful conversion %IDN2_OK is returned, if the
527 * output domain or any label would have been too long
528 * %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
529 * another error code is returned.
530 *
531 * Since: 0.1
532 **/
533 int
idn2_lookup_u8(const uint8_t * src,uint8_t ** lookupname,int flags)534 idn2_lookup_u8 (const uint8_t * src, uint8_t ** lookupname, int flags)
535 {
536 size_t lookupnamelen = 0;
537 uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
538 uint8_t *src_allocated = NULL;
539 int rc;
540
541 if (src == NULL)
542 {
543 if (lookupname)
544 *lookupname = NULL;
545 return IDN2_OK;
546 }
547
548 rc = set_default_flags (&flags);
549 if (rc != IDN2_OK)
550 return rc;
551
552 if (!(flags & IDN2_NO_TR46))
553 {
554 uint8_t *out;
555
556 rc = _tr46 (src, &out, flags);
557 if (rc != IDN2_OK)
558 return rc;
559
560 src = src_allocated = out;
561 }
562
563 do
564 {
565 const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
566 /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
567 and U+FF61 here? Perhaps when IDN2_NFC_INPUT? */
568 size_t labellen = end - src;
569 uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
570 size_t tmplen = IDN2_LABEL_MAX_LENGTH;
571
572 rc = label (src, labellen, tmp, &tmplen, flags);
573 if (rc != IDN2_OK)
574 {
575 free (src_allocated);
576 return rc;
577 }
578
579 if (lookupnamelen + tmplen
580 > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
581 {
582 free (src_allocated);
583 return IDN2_TOO_BIG_DOMAIN;
584 }
585
586 memcpy (_lookupname + lookupnamelen, tmp, tmplen);
587 lookupnamelen += tmplen;
588
589 if (*end == '.')
590 {
591 if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
592 {
593 free (src_allocated);
594 return IDN2_TOO_BIG_DOMAIN;
595 }
596
597 _lookupname[lookupnamelen] = '.';
598 lookupnamelen++;
599 }
600 _lookupname[lookupnamelen] = '\0';
601
602 src = end;
603 }
604 while (*src++);
605
606 free (src_allocated);
607
608 if (lookupname)
609 {
610 uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
611
612 if (tmp == NULL)
613 return IDN2_MALLOC;
614
615 memcpy (tmp, _lookupname, lookupnamelen + 1);
616 *lookupname = tmp;
617 }
618
619 return IDN2_OK;
620 }
621
622 /**
623 * idn2_lookup_ul:
624 * @src: input zero-terminated locale encoded string.
625 * @lookupname: newly allocated output variable with name to lookup in DNS.
626 * @flags: optional #idn2_flags to modify behaviour.
627 *
628 * Perform IDNA2008 lookup string conversion on domain name @src, as
629 * described in section 5 of RFC 5891. Note that the input is assumed
630 * to be encoded in the locale's default coding system, and will be
631 * transcoded to UTF-8 and NFC normalized by this function.
632 *
633 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
634 * convert any input A-labels to U-labels and perform additional
635 * testing. This is default since version 2.2.
636 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
637 *
638 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
639 * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
640 * processing.
641 *
642 * Multiple flags may be specified by binary or:ing them together, for
643 * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
644 *
645 * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
646 *
647 * After version 0.11: @lookupname may be NULL to test lookup of @src
648 * without allocating memory.
649 *
650 * Returns: On successful conversion %IDN2_OK is returned, if
651 * conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
652 * returned, if the output domain or any label would have been too
653 * long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
654 * another error code is returned.
655 *
656 * Since: 0.1
657 **/
658 int
idn2_lookup_ul(const char * src,char ** lookupname,int flags)659 idn2_lookup_ul (const char *src, char **lookupname, int flags)
660 {
661 uint8_t *utf8src = NULL;
662 int rc;
663
664 if (src)
665 {
666 const char *encoding = locale_charset ();
667
668 utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
669
670 if (!utf8src)
671 {
672 if (errno == ENOMEM)
673 return IDN2_MALLOC;
674 return IDN2_ICONV_FAIL;
675 }
676 }
677
678 rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
679 flags | IDN2_NFC_INPUT);
680
681 free (utf8src);
682
683 return rc;
684 }
685
686 /**
687 * idn2_to_ascii_4i:
688 * @input: zero terminated input Unicode (UCS-4) string.
689 * @inlen: number of elements in @input.
690 * @output: output zero terminated string that must have room for at least 63 characters plus the terminating zero.
691 * @flags: optional #idn2_flags to modify behaviour.
692 *
693 * THIS FUNCTION HAS BEEN DEPRECATED DUE TO A DESIGN FLAW. USE idn2_to_ascii_4i2() INSTEAD !
694 *
695 * The ToASCII operation takes a sequence of Unicode code points that make
696 * up one domain label and transforms it into a sequence of code points in
697 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
698 * the resulting sequence are equivalent labels.
699 *
700 * It is important to note that the ToASCII operation can fail.
701 * ToASCII fails if any step of it fails. If any step of the
702 * ToASCII operation fails on any label in a domain name, that domain
703 * name MUST NOT be used as an internationalized domain name.
704 * The method for dealing with this failure is application-specific.
705 *
706 * The inputs to ToASCII are a sequence of code points.
707 *
708 * ToASCII never alters a sequence of code points that are all in the ASCII
709 * range to begin with (although it could fail). Applying the ToASCII operation multiple
710 * effect as applying it just once.
711 *
712 * The default behavior of this function (when flags are zero) is to apply
713 * the IDNA2008 rules without the TR46 amendments. As the TR46
714 * non-transitional processing is nowadays ubiquitous, when unsure, it is
715 * recommended to call this function with the %IDN2_NONTRANSITIONAL
716 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
717 *
718 * Return value: Returns %IDN2_OK on success, or error code.
719 *
720 * Since: 2.0.0
721 *
722 * Deprecated: 2.1.1: Use idn2_to_ascii_4i2().
723 **/
724 int
idn2_to_ascii_4i(const uint32_t * input,size_t inlen,char * output,int flags)725 idn2_to_ascii_4i (const uint32_t * input, size_t inlen, char *output,
726 int flags)
727 {
728 char *out;
729 int rc;
730
731 if (!input)
732 {
733 if (output)
734 *output = 0;
735 return IDN2_OK;
736 }
737
738 rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
739 if (rc == IDN2_OK)
740 {
741 size_t len = strlen (out);
742
743 if (len > 63)
744 rc = IDN2_TOO_BIG_DOMAIN;
745 else if (output)
746 memcpy (output, out, len);
747
748 free (out);
749 }
750
751 return rc;
752 }
753
754 /**
755 * idn2_to_ascii_4i2:
756 * @input: zero terminated input Unicode (UCS-4) string.
757 * @inlen: number of elements in @input.
758 * @output: pointer to newly allocated zero-terminated output string.
759 * @flags: optional #idn2_flags to modify behaviour.
760 *
761 * The ToASCII operation takes a sequence of Unicode code points that make
762 * up one domain label and transforms it into a sequence of code points in
763 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
764 * the resulting sequence are equivalent labels.
765 *
766 * It is important to note that the ToASCII operation can fail.
767 * ToASCII fails if any step of it fails. If any step of the
768 * ToASCII operation fails on any label in a domain name, that domain
769 * name MUST NOT be used as an internationalized domain name.
770 * The method for dealing with this failure is application-specific.
771 *
772 * The inputs to ToASCII are a sequence of code points.
773 *
774 * ToASCII never alters a sequence of code points that are all in the ASCII
775 * range to begin with (although it could fail). Applying the ToASCII operation multiple
776 * effect as applying it just once.
777 *
778 * The default behavior of this function (when flags are zero) is to apply
779 * the IDNA2008 rules without the TR46 amendments. As the TR46
780 * non-transitional processing is nowadays ubiquitous, when unsure, it is
781 * recommended to call this function with the %IDN2_NONTRANSITIONAL
782 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
783 *
784 * Return value: Returns %IDN2_OK on success, or error code.
785 *
786 * Since: 2.1.1
787 **/
788 int
idn2_to_ascii_4i2(const uint32_t * input,size_t inlen,char ** output,int flags)789 idn2_to_ascii_4i2 (const uint32_t * input, size_t inlen, char **output,
790 int flags)
791 {
792 uint32_t *input_u32;
793 uint8_t *input_u8, *output_u8;
794 size_t length;
795 int rc;
796
797 if (!input)
798 {
799 if (output)
800 *output = NULL;
801 return IDN2_OK;
802 }
803
804 input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
805 if (!input_u32)
806 return IDN2_MALLOC;
807
808 u32_cpy (input_u32, input, inlen);
809 input_u32[inlen] = 0;
810
811 input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
812 free (input_u32);
813 if (!input_u8)
814 {
815 if (errno == ENOMEM)
816 return IDN2_MALLOC;
817 return IDN2_ENCODING_ERROR;
818 }
819
820 rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
821 free (input_u8);
822
823 if (rc == IDN2_OK)
824 {
825 if (output)
826 *output = (char *) output_u8;
827 else
828 free (output_u8);
829 }
830
831 return rc;
832 }
833
834 /**
835 * idn2_to_ascii_4z:
836 * @input: zero terminated input Unicode (UCS-4) string.
837 * @output: pointer to newly allocated zero-terminated output string.
838 * @flags: optional #idn2_flags to modify behaviour.
839 *
840 * Convert UCS-4 domain name to ASCII string using the IDNA2008
841 * rules. The domain name may contain several labels, separated by dots.
842 * The output buffer must be deallocated by the caller.
843 *
844 * The default behavior of this function (when flags are zero) is to apply
845 * the IDNA2008 rules without the TR46 amendments. As the TR46
846 * non-transitional processing is nowadays ubiquitous, when unsure, it is
847 * recommended to call this function with the %IDN2_NONTRANSITIONAL
848 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
849 *
850 * Return value: Returns %IDN2_OK on success, or error code.
851 *
852 * Since: 2.0.0
853 **/
854 int
idn2_to_ascii_4z(const uint32_t * input,char ** output,int flags)855 idn2_to_ascii_4z (const uint32_t * input, char **output, int flags)
856 {
857 uint8_t *input_u8;
858 size_t length;
859 int rc;
860
861 if (!input)
862 {
863 if (output)
864 *output = NULL;
865 return IDN2_OK;
866 }
867
868 input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
869 if (!input_u8)
870 {
871 if (errno == ENOMEM)
872 return IDN2_MALLOC;
873 return IDN2_ENCODING_ERROR;
874 }
875
876 rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
877 free (input_u8);
878
879 return rc;
880 }
881
882 /**
883 * idn2_to_ascii_8z:
884 * @input: zero terminated input UTF-8 string.
885 * @output: pointer to newly allocated output string.
886 * @flags: optional #idn2_flags to modify behaviour.
887 *
888 * Convert UTF-8 domain name to ASCII string using the IDNA2008
889 * rules. The domain name may contain several labels, separated by dots.
890 * The output buffer must be deallocated by the caller.
891 *
892 * The default behavior of this function (when flags are zero) is to apply
893 * the IDNA2008 rules without the TR46 amendments. As the TR46
894 * non-transitional processing is nowadays ubiquitous, when unsure, it is
895 * recommended to call this function with the %IDN2_NONTRANSITIONAL
896 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
897 *
898 * Return value: Returns %IDN2_OK on success, or error code.
899 *
900 * Since: 2.0.0
901 **/
902 int
idn2_to_ascii_8z(const char * input,char ** output,int flags)903 idn2_to_ascii_8z (const char *input, char **output, int flags)
904 {
905 return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
906 }
907
908 /**
909 * idn2_to_ascii_lz:
910 * @input: zero terminated input UTF-8 string.
911 * @output: pointer to newly allocated output string.
912 * @flags: optional #idn2_flags to modify behaviour.
913 *
914 * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
915 * rules. The domain name may contain several labels, separated by dots.
916 * The output buffer must be deallocated by the caller.
917 *
918 * The default behavior of this function (when flags are zero) is to apply
919 * the IDNA2008 rules without the TR46 amendments. As the TR46
920 * non-transitional processing is nowadays ubiquitous, when unsure, it is
921 * recommended to call this function with the %IDN2_NONTRANSITIONAL
922 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
923 *
924 * Returns: %IDN2_OK on success, or error code.
925 * Same as described in idn2_lookup_ul() documentation.
926 *
927 * Since: 2.0.0
928 **/
929 int
idn2_to_ascii_lz(const char * input,char ** output,int flags)930 idn2_to_ascii_lz (const char *input, char **output, int flags)
931 {
932 return idn2_lookup_ul (input, output, flags);
933 }
934