1 /*************************************************
2 *     Exim - an Internet mail transport agent    *
3 *************************************************/
4 
5 /* Copyright (c) Jeremy Harris 2015 - 2018 */
6 /* See the file NOTICE for conditions of use and distribution. */
7 
8 
9 #include "exim.h"
10 
11 #ifdef SUPPORT_I18N
12 
13 #ifdef SUPPORT_I18N_2008
14 # include <idn2.h>
15 #else
16 # include <idna.h>
17 #endif
18 
19 #include <punycode.h>
20 #include <stringprep.h>
21 
22 static uschar *
23 string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err);
24 
25 /**************************************************/
26 
27 BOOL
string_is_utf8(const uschar * s)28 string_is_utf8(const uschar * s)
29 {
30 uschar c;
31 if (s) while ((c = *s++)) if (c & 0x80) return TRUE;
32 return FALSE;
33 }
34 
35 static BOOL
string_is_alabel(const uschar * s)36 string_is_alabel(const uschar * s)
37 {
38 return s[0] == 'x' && s[1] == 'n' && s[2] == '-' && s[3] == '-';
39 }
40 
41 /**************************************************/
42 /* Domain conversions.
43 The *err string pointer should be null before the call
44 
45 Return NULL for error, with optional errstr pointer filled in
46 */
47 
48 uschar *
string_domain_utf8_to_alabel(const uschar * utf8,uschar ** err)49 string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
50 {
51 uschar * s1, * s;
52 int rc;
53 
54 #ifdef SUPPORT_I18N_2008
55 /* Avoid lowercasing plain-ascii domains */
56 if (!string_is_utf8(utf8))
57   return string_copy(utf8);
58 
59 /* Only lowercase is accepted by the library call.  A pity since we lose
60 any mixed-case annotation.  This does not really matter for a domain. */
61   {
62   uschar c;
63   for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
64     {
65     s = string_copy(utf8);
66     for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
67       *s1 = tolower(c);
68     break;
69     }
70   }
71 if ((rc = idn2_lookup_u8((const uint8_t *) s, &s1, IDN2_NFC_INPUT)) != IDN2_OK)
72   {
73   if (err) *err = US idn2_strerror(rc);
74   return NULL;
75   }
76 #else
77 s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
78 if (  (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
79    != IDNA_SUCCESS)
80   {
81   free(s);
82   if (err) *err = US idna_strerror(rc);
83   return NULL;
84   }
85 free(s);
86 #endif
87 s = string_copy(s1);
88 free(s1);
89 return s;
90 }
91 
92 
93 
94 uschar *
string_domain_alabel_to_utf8(const uschar * alabel,uschar ** err)95 string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err)
96 {
97 #ifdef SUPPORT_I18N_2008
98 const uschar * label;
99 int sep = '.';
100 gstring * g = NULL;
101 
102 while (label = string_nextinlist(&alabel, &sep, NULL, 0))
103   if (  string_is_alabel(label)
104      && !(label = string_localpart_alabel_to_utf8_(label, err))
105      )
106     return NULL;
107   else
108     g = string_append_listele(g, '.', label);
109 return string_from_gstring(g);
110 
111 #else
112 
113 uschar * s1, * s;
114 int rc;
115 
116 if (  (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES))
117    != IDNA_SUCCESS)
118   {
119   if (err) *err = US idna_strerror(rc);
120   return NULL;
121   }
122 s = string_copy(s1);
123 free(s1);
124 return s;
125 #endif
126 }
127 
128 /**************************************************/
129 /* localpart conversions */
130 /* the *err string pointer should be null before the call */
131 
132 
133 uschar *
string_localpart_utf8_to_alabel(const uschar * utf8,uschar ** err)134 string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err)
135 {
136 size_t ucs4_len;
137 punycode_uint * p;
138 size_t p_len;
139 uschar * res;
140 int rc;
141 
142 if (!string_is_utf8(utf8)) return string_copy(utf8);
143 
144 p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len);
145 p_len = ucs4_len*4;	/* this multiplier is pure guesswork */
146 res = store_get(p_len+5, is_tainted(utf8));
147 
148 res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
149 
150 if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, CS res+4)) != PUNYCODE_SUCCESS)
151   {
152   DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
153   free(p);
154   if (err) *err = US punycode_strerror(rc);
155   return NULL;
156   }
157 p_len += 4;
158 free(p);
159 res[p_len] = '\0';
160 return res;
161 }
162 
163 
164 static uschar *
string_localpart_alabel_to_utf8_(const uschar * alabel,uschar ** err)165 string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err)
166 {
167 size_t p_len;
168 punycode_uint * p;
169 int rc;
170 uschar * s, * res;
171 
172 DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel);
173 alabel += 4;
174 p_len = Ustrlen(alabel);
175 p = store_get((p_len+1) * sizeof(*p), is_tainted(alabel));
176 
177 if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
178   {
179   if (err) *err = US punycode_strerror(rc);
180   return NULL;
181   }
182 
183 s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
184 res = string_copyn(s, p_len);
185 free(s);
186 return res;
187 }
188 
189 
190 uschar *
string_localpart_alabel_to_utf8(const uschar * alabel,uschar ** err)191 string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
192 {
193 if (string_is_alabel(alabel))
194   return string_localpart_alabel_to_utf8_(alabel, err);
195 
196 if (err) *err = US"bad alabel prefix";
197 return NULL;
198 }
199 
200 
201 /**************************************************/
202 /* Whole address conversion.
203 The *err string pointer should be null before the call.
204 
205 Return NULL on error, with (optional) errstring pointer filled in
206 */
207 
208 uschar *
string_address_utf8_to_alabel(const uschar * utf8,uschar ** err)209 string_address_utf8_to_alabel(const uschar * utf8, uschar ** err)
210 {
211 uschar * l, * d;
212 
213 if (!*utf8) return string_copy(utf8);
214 
215 DEBUG(D_expand) debug_printf("addr from utf8 <%s>", utf8);
216 
217 for (const uschar * s = utf8; *s; s++)
218   if (*s == '@')
219     {
220     l = string_copyn(utf8, s - utf8);
221     if (  !(l = string_localpart_utf8_to_alabel(l, err))
222        || !(d = string_domain_utf8_to_alabel(++s, err))
223        )
224       return NULL;
225     l = string_sprintf("%s@%s", l, d);
226     DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
227     return l;
228     }
229 
230 l =  string_localpart_utf8_to_alabel(utf8, err);
231 DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
232 return l;
233 }
234 
235 
236 
237 /*************************************************
238 *         Report the library versions.           *
239 *************************************************/
240 
241 /* See a description in tls-openssl.c for an explanation of why this exists.
242 
243 Arguments:   a FILE* to print the results to
244 Returns:     nothing
245 */
246 
247 void
utf8_version_report(FILE * f)248 utf8_version_report(FILE *f)
249 {
250 #ifdef SUPPORT_I18N_2008
251 fprintf(f, "Library version: IDN2: Compile: %s\n"
252            "                       Runtime: %s\n",
253 	IDN2_VERSION,
254 	idn2_check_version(NULL));
255 fprintf(f, "Library version: Stringprep: Compile: %s\n"
256            "                             Runtime: %s\n",
257 	STRINGPREP_VERSION,
258 	stringprep_check_version(NULL));
259 #else
260 fprintf(f, "Library version: IDN: Compile: %s\n"
261            "                      Runtime: %s\n",
262 	STRINGPREP_VERSION,
263 	stringprep_check_version(NULL));
264 #endif
265 }
266 
267 #endif	/* whole file */
268 
269 /* vi: aw ai sw=2
270 */
271 /* End of utf8.c */
272