1 /*
2  * Copyright (c) 2010, 2011, 2012, 2013
3  *      Inferno Nettverk A/S, Norway.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. The above copyright notice, this list of conditions and the following
9  *    disclaimer must appear in all copies of the software, derivative works
10  *    or modified versions, and any portions thereof, aswell as in all
11  *    supporting documentation.
12  * 2. All advertising materials mentioning features or use of this software
13  *    must display the following acknowledgement:
14  *      This product includes software developed by
15  *      Inferno Nettverk A/S, Norway.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * Inferno Nettverk A/S requests users of this software to return to
31  *
32  *  Software Distribution Coordinator  or  sdc@inet.no
33  *  Inferno Nettverk A/S
34  *  Oslo Research Park
35  *  Gaustadalléen 21
36  *  NO-0349 Oslo
37  *  Norway
38  *
39  * any improvements or extensions that they make and grant Inferno Nettverk A/S
40  * the rights to redistribute these changes.
41  *
42  */
43 
44  /*
45   * This code was contributed by
46   * Markus Moeller (markus_moeller at compuserve.com).
47   */
48 
49 #include "common.h"
50 
51 static const char rcsid[] =
52 "$Id: ldap_util.c,v 1.14 2013/10/27 15:24:42 karls Exp $";
53 
54 #if HAVE_LDAP
55 char
asciitoutf8(input)56 *asciitoutf8(input)
57    char *input;
58 {
59    const char *function = "asciitoutf8()";
60    size_t n, a;
61    unsigned char *p, *utf8;
62    char *at;
63    int c, s;
64 
65    SASSERTX(input != NULL);
66 
67    for (n = 0, c = 0; n < strlen(input); ++n)
68       if ((unsigned char)input[n] > 127)
69          c++;
70 
71    at = strrchr(input, '@');
72    if (at)
73       a = at - input;
74    else
75       a = strlen(input) - 1;
76 
77    if (c != 0) {
78       if ((p = malloc(strlen(input) + 1 + c)) == NULL)
79          serrx("%s: %s", function, NOMEM);
80 
81       utf8 = p;
82       for (n = 0; n < strlen(input); ++n) {
83          if (n == a) {
84             /* Do not change domain name */
85             break;
86          }
87          s = (unsigned char)input[n];
88          /* Change ASCII > 127 to UTF-8
89             0xC2 0x80-0xBF
90             0xC3 0x80-0xBF
91          */
92          if (s > 127 && s < 192) {
93             *p = 194;
94             p++;
95             *p = s;
96          } else if (s > 191 && s < 256) {
97             *p = 195;
98             p++;
99             *p = s - 64;
100          } else
101             *p = s;
102          p++;
103       }
104 
105       *p = NUL;
106 
107       slog(LOG_DEBUG, "%s: Group %s as UTF-8: %s", function, input, utf8);
108 
109       return (char *)utf8;
110    }
111    else
112       return input;
113 }
114 
115 char
hextoutf8(input,flag)116 *hextoutf8(input, flag)
117    const char *input;
118    int flag;
119 {
120 /*
121    UTF8    = UTF1 / UTFMB
122    UTFMB   = UTF2 / UTF3 / UTF4
123 
124    UTF0    = %x80-BF
125    UTF1    = %x00-7F
126    UTF2    = %xC2-DF UTF0
127    UTF3    = %xE0 %xA0-BF UTF0 / %xE1-EC 2(UTF0) /
128              %xED %x80-9F UTF0 / %xEE-EF 2(UTF0)
129    UTF4    = %xF0 %x90-BF 2(UTF0) / %xF1-F3 3(UTF0) /
130              %xF4 %x80-8F 2(UTF0)
131 
132    http://www.utf8-chartable.de/unicode-utf8-table.pl
133 */
134    const char *function = "hextoutf8()";
135    size_t i, n, a;
136    unsigned char *utf8;
137    char *p, *at;
138    int ival, ichar, iUTF2, iUTF3, iUTF4;
139 
140    SASSERTX(input != NULL);
141 
142    at = strrchr(input, '@');
143    if (at)
144       a = at - input;
145    else
146       a = strlen(input) ;
147 
148    if ((utf8 = malloc(strlen(input) + 1)) == NULL)
149       serrx("%s: %s", function, NOMEM);
150 
151    i     = 0;
152    iUTF2 = 0;
153    iUTF3 = 0;
154    iUTF4 = 0;
155 
156    n = 0;
157    while (n < strlen(input)) {
158       if (!flag && n == a) {
159          /* Do not change domain name */
160          break;
161       }
162       if (flag < 2 && input[n] == '@') {
163          /* Do not change @ separator */
164          utf8[i] = '@';
165          i++;
166          n++;
167       }
168 
169       ival = input[n];
170       if (ival > 64 && ival < 71)
171          ichar = (ival - 55) * 16;
172       else if (ival > 96 && ival < 103)
173          ichar = (ival - 87)*16;
174       else if (ival > 47 && ival < 58)
175          ichar = (ival - 48)*16;
176       else {
177          slog(LOG_DEBUG, "%s: invalid Hex value 0x%x", function, ival);
178 
179          SASSERTX((ival > 64 && ival < 71) ||
180                   (ival > 96 && ival < 103) ||
181                   (ival > 47 && ival < 58));
182       }
183 
184       if (n == a - 1) {
185         slog(LOG_DEBUG, "%s: invalid Hex UTF-8 string \"%s\"", function, input);
186         SASSERTX(n < a - 1);
187       }
188 
189       n++;
190       ival = input[n];
191       if (ival > 64 && ival < 71) ichar = ichar + ival - 55;
192       else if (ival > 96 && ival < 103) ichar = ichar + ival - 87;
193       else if (ival > 47 && ival < 58) ichar = ichar + ival - 48;
194       else {
195          slog(LOG_DEBUG, "%s: invalid Hex value 0x%x", function, ival);
196          SASSERTX((ival > 64 && ival < 71) ||
197                   (ival > 96 && ival < 103) ||
198                   (ival > 47 && ival < 58));
199       }
200 
201       if (iUTF2) {
202          if (iUTF2 == 0xC2 && ichar > 0x7F && ichar < 0xC0) {
203             iUTF2 = 0;
204             utf8[i-1] = ichar;
205          }
206          else if (iUTF2 == 0xC3 && ichar > 0x7F && ichar < 0xC0) {
207             iUTF2 = 0;
208             utf8[i-1] = ichar + 64;
209          }
210          else if (iUTF2 > 0xC3 && iUTF2 < 0xE0 && ichar > 0x7F
211          && ichar < 0xC0) {
212             iUTF2 = 0;
213             utf8[i] = ichar;
214             i++;
215          }
216          else {
217             iUTF2 = 0;
218             utf8[i] = ichar;
219             utf8[i + 1] = NUL;
220 
221             slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
222             function, utf8);
223 
224             SASSERTX((iUTF2 == 0xC2 && ichar > 0x7F && ichar < 0xC0)
225             ||       (iUTF2 == 0xC3 && ichar > 0x7F && ichar < 0xC0)
226             ||       (iUTF2 > 0xC3 && iUTF2 < 0xE0 && ichar > 0x7F
227                    && ichar < 0xC0));
228          }
229       }
230       else if (iUTF3) {
231          if (iUTF3 == 0xE0 && ichar > 0x9F && ichar < 0xC0) {
232             iUTF3 = 1;
233             utf8[i] = ichar;
234             i++;
235          }
236          else if (iUTF3 > 0xE0 && iUTF3 < 0xED && ichar > 0x7F
237          && ichar < 0xC0) {
238             iUTF3 = 2;
239             utf8[i] = ichar;
240             i++;
241          }
242          else if (iUTF3 == 0xED && ichar > 0x7F && ichar < 0xA0) {
243             iUTF3 = 3;
244             utf8[i] = ichar;
245             i++;
246          }
247          else if (iUTF3 > 0xED && iUTF3 < 0xF0 && ichar > 0x7F && ichar < 0xC0){
248             iUTF3 = 4;
249             utf8[i] = ichar;
250             i++;
251          }
252          else if (iUTF3 > 0 && iUTF3 < 5 && ichar > 0x7F && ichar < 0xC0) {
253             iUTF3 = 0;
254             utf8[i] = ichar;
255             i++;
256          }
257          else {
258             iUTF3 = 0;
259             utf8[i] = ichar;
260             utf8[i + 1] = NUL;
261 
262             slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for unicode \"%s\"",
263             function, utf8);
264 
265             SASSERTX((iUTF3 == 0xE0 && ichar > 0x9F && ichar < 0xC0)
266             ||       (iUTF3 > 0xE0 && iUTF3 < 0xED && ichar > 0x7F
267                    && ichar < 0xC0)
268             ||       (iUTF3 == 0xED && ichar > 0x7F && ichar < 0xA0)
269             ||       (iUTF3 > 0xED && iUTF3 < 0xF0 && ichar > 0x7F
270                    && ichar < 0xC0)
271             || (iUTF3 > 0 && iUTF3 < 5 && ichar > 0x7F && ichar < 0xC0));
272          }
273       }
274       else if (iUTF4) {
275          if (iUTF4 == 0xF0 && ichar > 0x8F && ichar < 0xC0) {
276             iUTF4 = 1;
277             utf8[i] = ichar;
278             i++;
279          }
280          else if (iUTF4 > 0xF0 && iUTF3 < 0xF4 && ichar > 0x7F && ichar < 0xC0){
281             iUTF4 = 2;
282             utf8[i] = ichar;
283             i++;
284          }
285          else if (iUTF4 == 0xF4 && ichar > 0x7F && ichar < 0x90) {
286             iUTF4 = 3;
287             utf8[i] = ichar;
288             i++;
289          }
290          else if (iUTF4 > 0 && iUTF4 < 5 && ichar > 0x7F && ichar < 0xC0) {
291             if (iUTF4 == 4)
292                iUTF4 = 0;
293             else
294                iUTF4 = 4;
295 
296             utf8[i] = ichar;
297             i++;
298          }
299          else {
300             iUTF4 = 0;
301             utf8[i]     = ichar;
302             utf8[i + 1] = NUL;
303 
304             slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
305             function, utf8);
306 
307             SASSERTX((iUTF4 == 0xF0 && ichar > 0x8F && ichar < 0xC0)
308             ||       (iUTF4 > 0xF0 && iUTF3 < 0xF4 && ichar > 0x7F
309                    && ichar < 0xC0)
310             ||       (iUTF4 == 0xF4 && ichar > 0x7F && ichar < 0x90)
311             ||       (iUTF4 > 0 && iUTF4 < 5 && ichar > 0x7F && ichar < 0xC0));
312          }
313       }
314       else if (ichar < 0x80) {
315           /* UTF1 */
316           utf8[i] = ichar;
317           i++;
318       }
319       else if (ichar > 0xC1 && ichar < 0xE0) {
320           /* UTF2 (Latin) */
321           iUTF2 = ichar;
322           utf8[i] = ichar;
323           i++;
324       }
325       else if (ichar > 0xDF && ichar < 0xF0) {
326           /* UTF3 */
327           iUTF3 = ichar;
328           utf8[i] = ichar;
329           i++;
330       }
331       else if (ichar > 0xEF && ichar < 0xF5) {
332           /* UTF4 */
333           iUTF4 = ichar;
334           utf8[i] = ichar;
335           i++;
336       } else {
337           utf8[i]   = ichar;
338           utf8[i+1] = NUL;
339 
340           slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
341           function, utf8);
342 
343           SASSERTX(iUTF2 || iUTF3 || iUTF4 || (ichar < 0x80)
344           ||      (ichar > 0xC1 && ichar < 0xE0)
345           ||      (ichar > 0xDF && ichar < 0xF0)
346           ||      (ichar > 0xEF && ichar < 0xF5));
347       }
348 
349       n++;
350    }
351 
352    utf8[i] = NUL;
353    if (iUTF2 || iUTF3 || iUTF4) {
354       slog(LOG_DEBUG, "%s: invalid UTF-8 sequence for Unicode \"%s\"",
355            function, utf8);
356 
357       SASSERTX(!iUTF2 && !iUTF3 && !iUTF4);
358    }
359 
360    p = (char *)utf8;
361    if (!flag && at)
362       strcat(p, at);
363 
364    return p;
365 }
366 
367 #endif /* HAVE_LDAP */
368