1 /*
2 * Copyright (c) 2010, 2011, 2012, 2013
3 * Inferno Nettverk A/S, Norway. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. The above copyright notice, this list of conditions and the following
9 * disclaimer must appear in all copies of the software, derivative works
10 * or modified versions, and any portions thereof, aswell as in all
11 * supporting documentation.
12 * 2. All advertising materials mentioning features or use of this software
13 * must display the following acknowledgement:
14 * This product includes software developed by
15 * Inferno Nettverk A/S, Norway.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * Inferno Nettverk A/S requests users of this software to return to
31 *
32 * Software Distribution Coordinator or sdc@inet.no
33 * Inferno Nettverk A/S
34 * Oslo Research Park
35 * Gaustadalléen 21
36 * NO-0349 Oslo
37 * Norway
38 *
39 * any improvements or extensions that they make and grant Inferno Nettverk A/S
40 * the rights to redistribute these changes.
41 *
42 */
43
44 /*
45 * This code was contributed by
46 * Markus Moeller (markus_moeller at compuserve.com).
47 */
48
49 #include "common.h"
50
51 static const char rcsid[] =
52 "$Id: ldap_util.c,v 1.14 2013/10/27 15:24:42 karls Exp $";
53
54 #if HAVE_LDAP
55 char
asciitoutf8(input)56 *asciitoutf8(input)
57 char *input;
58 {
59 const char *function = "asciitoutf8()";
60 size_t n, a;
61 unsigned char *p, *utf8;
62 char *at;
63 int c, s;
64
65 SASSERTX(input != NULL);
66
67 for (n = 0, c = 0; n < strlen(input); ++n)
68 if ((unsigned char)input[n] > 127)
69 c++;
70
71 at = strrchr(input, '@');
72 if (at)
73 a = at - input;
74 else
75 a = strlen(input) - 1;
76
77 if (c != 0) {
78 if ((p = malloc(strlen(input) + 1 + c)) == NULL)
79 serrx("%s: %s", function, NOMEM);
80
81 utf8 = p;
82 for (n = 0; n < strlen(input); ++n) {
83 if (n == a) {
84 /* Do not change domain name */
85 break;
86 }
87 s = (unsigned char)input[n];
88 /* Change ASCII > 127 to UTF-8
89 0xC2 0x80-0xBF
90 0xC3 0x80-0xBF
91 */
92 if (s > 127 && s < 192) {
93 *p = 194;
94 p++;
95 *p = s;
96 } else if (s > 191 && s < 256) {
97 *p = 195;
98 p++;
99 *p = s - 64;
100 } else
101 *p = s;
102 p++;
103 }
104
105 *p = NUL;
106
107 slog(LOG_DEBUG, "%s: Group %s as UTF-8: %s", function, input, utf8);
108
109 return (char *)utf8;
110 }
111 else
112 return input;
113 }
114
115 char
hextoutf8(input,flag)116 *hextoutf8(input, flag)
117 const char *input;
118 int flag;
119 {
120 /*
121 UTF8 = UTF1 / UTFMB
122 UTFMB = UTF2 / UTF3 / UTF4
123
124 UTF0 = %x80-BF
125 UTF1 = %x00-7F
126 UTF2 = %xC2-DF UTF0
127 UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC 2(UTF0) /
128 %xED %x80-9F UTF0 / %xEE-EF 2(UTF0)
129 UTF4 = %xF0 %x90-BF 2(UTF0) / %xF1-F3 3(UTF0) /
130 %xF4 %x80-8F 2(UTF0)
131
132 http://www.utf8-chartable.de/unicode-utf8-table.pl
133 */
134 const char *function = "hextoutf8()";
135 size_t i, n, a;
136 unsigned char *utf8;
137 char *p, *at;
138 int ival, ichar, iUTF2, iUTF3, iUTF4;
139
140 SASSERTX(input != NULL);
141
142 at = strrchr(input, '@');
143 if (at)
144 a = at - input;
145 else
146 a = strlen(input) ;
147
148 if ((utf8 = malloc(strlen(input) + 1)) == NULL)
149 serrx("%s: %s", function, NOMEM);
150
151 i = 0;
152 iUTF2 = 0;
153 iUTF3 = 0;
154 iUTF4 = 0;
155
156 n = 0;
157 while (n < strlen(input)) {
158 if (!flag && n == a) {
159 /* Do not change domain name */
160 break;
161 }
162 if (flag < 2 && input[n] == '@') {
163 /* Do not change @ separator */
164 utf8[i] = '@';
165 i++;
166 n++;
167 }
168
169 ival = input[n];
170 if (ival > 64 && ival < 71)
171 ichar = (ival - 55) * 16;
172 else if (ival > 96 && ival < 103)
173 ichar = (ival - 87)*16;
174 else if (ival > 47 && ival < 58)
175 ichar = (ival - 48)*16;
176 else {
177 slog(LOG_DEBUG, "%s: invalid Hex value 0x%x", function, ival);
178
179 SASSERTX((ival > 64 && ival < 71) ||
180 (ival > 96 && ival < 103) ||
181 (ival > 47 && ival < 58));
182 }
183
184 if (n == a - 1) {
185 slog(LOG_DEBUG, "%s: invalid Hex UTF-8 string \"%s\"", function, input);
186 SASSERTX(n < a - 1);
187 }
188
189 n++;
190 ival = input[n];
191 if (ival > 64 && ival < 71) ichar = ichar + ival - 55;
192 else if (ival > 96 && ival < 103) ichar = ichar + ival - 87;
193 else if (ival > 47 && ival < 58) ichar = ichar + ival - 48;
194 else {
195 slog(LOG_DEBUG, "%s: invalid Hex value 0x%x", function, ival);
196 SASSERTX((ival > 64 && ival < 71) ||
197 (ival > 96 && ival < 103) ||
198 (ival > 47 && ival < 58));
199 }
200
201 if (iUTF2) {
202 if (iUTF2 == 0xC2 && ichar > 0x7F && ichar < 0xC0) {
203 iUTF2 = 0;
204 utf8[i-1] = ichar;
205 }
206 else if (iUTF2 == 0xC3 && ichar > 0x7F && ichar < 0xC0) {
207 iUTF2 = 0;
208 utf8[i-1] = ichar + 64;
209 }
210 else if (iUTF2 > 0xC3 && iUTF2 < 0xE0 && ichar > 0x7F
211 && ichar < 0xC0) {
212 iUTF2 = 0;
213 utf8[i] = ichar;
214 i++;
215 }
216 else {
217 iUTF2 = 0;
218 utf8[i] = ichar;
219 utf8[i + 1] = NUL;
220
221 slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
222 function, utf8);
223
224 SASSERTX((iUTF2 == 0xC2 && ichar > 0x7F && ichar < 0xC0)
225 || (iUTF2 == 0xC3 && ichar > 0x7F && ichar < 0xC0)
226 || (iUTF2 > 0xC3 && iUTF2 < 0xE0 && ichar > 0x7F
227 && ichar < 0xC0));
228 }
229 }
230 else if (iUTF3) {
231 if (iUTF3 == 0xE0 && ichar > 0x9F && ichar < 0xC0) {
232 iUTF3 = 1;
233 utf8[i] = ichar;
234 i++;
235 }
236 else if (iUTF3 > 0xE0 && iUTF3 < 0xED && ichar > 0x7F
237 && ichar < 0xC0) {
238 iUTF3 = 2;
239 utf8[i] = ichar;
240 i++;
241 }
242 else if (iUTF3 == 0xED && ichar > 0x7F && ichar < 0xA0) {
243 iUTF3 = 3;
244 utf8[i] = ichar;
245 i++;
246 }
247 else if (iUTF3 > 0xED && iUTF3 < 0xF0 && ichar > 0x7F && ichar < 0xC0){
248 iUTF3 = 4;
249 utf8[i] = ichar;
250 i++;
251 }
252 else if (iUTF3 > 0 && iUTF3 < 5 && ichar > 0x7F && ichar < 0xC0) {
253 iUTF3 = 0;
254 utf8[i] = ichar;
255 i++;
256 }
257 else {
258 iUTF3 = 0;
259 utf8[i] = ichar;
260 utf8[i + 1] = NUL;
261
262 slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for unicode \"%s\"",
263 function, utf8);
264
265 SASSERTX((iUTF3 == 0xE0 && ichar > 0x9F && ichar < 0xC0)
266 || (iUTF3 > 0xE0 && iUTF3 < 0xED && ichar > 0x7F
267 && ichar < 0xC0)
268 || (iUTF3 == 0xED && ichar > 0x7F && ichar < 0xA0)
269 || (iUTF3 > 0xED && iUTF3 < 0xF0 && ichar > 0x7F
270 && ichar < 0xC0)
271 || (iUTF3 > 0 && iUTF3 < 5 && ichar > 0x7F && ichar < 0xC0));
272 }
273 }
274 else if (iUTF4) {
275 if (iUTF4 == 0xF0 && ichar > 0x8F && ichar < 0xC0) {
276 iUTF4 = 1;
277 utf8[i] = ichar;
278 i++;
279 }
280 else if (iUTF4 > 0xF0 && iUTF3 < 0xF4 && ichar > 0x7F && ichar < 0xC0){
281 iUTF4 = 2;
282 utf8[i] = ichar;
283 i++;
284 }
285 else if (iUTF4 == 0xF4 && ichar > 0x7F && ichar < 0x90) {
286 iUTF4 = 3;
287 utf8[i] = ichar;
288 i++;
289 }
290 else if (iUTF4 > 0 && iUTF4 < 5 && ichar > 0x7F && ichar < 0xC0) {
291 if (iUTF4 == 4)
292 iUTF4 = 0;
293 else
294 iUTF4 = 4;
295
296 utf8[i] = ichar;
297 i++;
298 }
299 else {
300 iUTF4 = 0;
301 utf8[i] = ichar;
302 utf8[i + 1] = NUL;
303
304 slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
305 function, utf8);
306
307 SASSERTX((iUTF4 == 0xF0 && ichar > 0x8F && ichar < 0xC0)
308 || (iUTF4 > 0xF0 && iUTF3 < 0xF4 && ichar > 0x7F
309 && ichar < 0xC0)
310 || (iUTF4 == 0xF4 && ichar > 0x7F && ichar < 0x90)
311 || (iUTF4 > 0 && iUTF4 < 5 && ichar > 0x7F && ichar < 0xC0));
312 }
313 }
314 else if (ichar < 0x80) {
315 /* UTF1 */
316 utf8[i] = ichar;
317 i++;
318 }
319 else if (ichar > 0xC1 && ichar < 0xE0) {
320 /* UTF2 (Latin) */
321 iUTF2 = ichar;
322 utf8[i] = ichar;
323 i++;
324 }
325 else if (ichar > 0xDF && ichar < 0xF0) {
326 /* UTF3 */
327 iUTF3 = ichar;
328 utf8[i] = ichar;
329 i++;
330 }
331 else if (ichar > 0xEF && ichar < 0xF5) {
332 /* UTF4 */
333 iUTF4 = ichar;
334 utf8[i] = ichar;
335 i++;
336 } else {
337 utf8[i] = ichar;
338 utf8[i+1] = NUL;
339
340 slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
341 function, utf8);
342
343 SASSERTX(iUTF2 || iUTF3 || iUTF4 || (ichar < 0x80)
344 || (ichar > 0xC1 && ichar < 0xE0)
345 || (ichar > 0xDF && ichar < 0xF0)
346 || (ichar > 0xEF && ichar < 0xF5));
347 }
348
349 n++;
350 }
351
352 utf8[i] = NUL;
353 if (iUTF2 || iUTF3 || iUTF4) {
354 slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
355 function, utf8);
356
357 SASSERTX(!iUTF2 && !iUTF3 && !iUTF4);
358 }
359
360 p = (char *)utf8;
361 if (!flag && at)
362 strcat(p, at);
363
364 return p;
365 }
366
367 #endif /* HAVE_LDAP */
368