xref: /freebsd/crypto/heimdal/lib/wind/test-utf8.c (revision ae771770)
1*ae771770SStanislav Sedov /*
2*ae771770SStanislav Sedov  * Copyright (c) 2004 Kungliga Tekniska Högskolan
3*ae771770SStanislav Sedov  * (Royal Institute of Technology, Stockholm, Sweden).
4*ae771770SStanislav Sedov  * All rights reserved.
5*ae771770SStanislav Sedov  *
6*ae771770SStanislav Sedov  * Redistribution and use in source and binary forms, with or without
7*ae771770SStanislav Sedov  * modification, are permitted provided that the following conditions
8*ae771770SStanislav Sedov  * are met:
9*ae771770SStanislav Sedov  *
10*ae771770SStanislav Sedov  * 1. Redistributions of source code must retain the above copyright
11*ae771770SStanislav Sedov  *    notice, this list of conditions and the following disclaimer.
12*ae771770SStanislav Sedov  *
13*ae771770SStanislav Sedov  * 2. Redistributions in binary form must reproduce the above copyright
14*ae771770SStanislav Sedov  *    notice, this list of conditions and the following disclaimer in the
15*ae771770SStanislav Sedov  *    documentation and/or other materials provided with the distribution.
16*ae771770SStanislav Sedov  *
17*ae771770SStanislav Sedov  * 3. Neither the name of the Institute nor the names of its contributors
18*ae771770SStanislav Sedov  *    may be used to endorse or promote products derived from this software
19*ae771770SStanislav Sedov  *    without specific prior written permission.
20*ae771770SStanislav Sedov  *
21*ae771770SStanislav Sedov  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22*ae771770SStanislav Sedov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23*ae771770SStanislav Sedov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24*ae771770SStanislav Sedov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25*ae771770SStanislav Sedov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26*ae771770SStanislav Sedov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27*ae771770SStanislav Sedov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28*ae771770SStanislav Sedov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29*ae771770SStanislav Sedov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30*ae771770SStanislav Sedov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31*ae771770SStanislav Sedov  * SUCH DAMAGE.
32*ae771770SStanislav Sedov  */
33*ae771770SStanislav Sedov 
34*ae771770SStanislav Sedov #ifdef HAVE_CONFIG_H
35*ae771770SStanislav Sedov #include <config.h>
36*ae771770SStanislav Sedov #endif
37*ae771770SStanislav Sedov #include <stdio.h>
38*ae771770SStanislav Sedov #include <string.h>
39*ae771770SStanislav Sedov #include <err.h>
40*ae771770SStanislav Sedov #include "windlocl.h"
41*ae771770SStanislav Sedov 
42*ae771770SStanislav Sedov static const char *failing_testcases[] = {
43*ae771770SStanislav Sedov     "\x80",
44*ae771770SStanislav Sedov     "\xFF",
45*ae771770SStanislav Sedov     "\xC0",
46*ae771770SStanislav Sedov     "\xDF",
47*ae771770SStanislav Sedov     "\xE0",
48*ae771770SStanislav Sedov     "\xEF",
49*ae771770SStanislav Sedov     "\xF0",
50*ae771770SStanislav Sedov     "\xF7",
51*ae771770SStanislav Sedov     "\xC0\x01",
52*ae771770SStanislav Sedov     "\xC0\x7F",
53*ae771770SStanislav Sedov     "\xC0\xFF",
54*ae771770SStanislav Sedov     "\xC0\x80\x80",
55*ae771770SStanislav Sedov     "\xE0\x01",
56*ae771770SStanislav Sedov     "\xE0\x7F",
57*ae771770SStanislav Sedov     "\xE0\x80",
58*ae771770SStanislav Sedov     "\xE0\xFF",
59*ae771770SStanislav Sedov     "\xE0\x80\x20",
60*ae771770SStanislav Sedov     "\xE0\x80\xFF",
61*ae771770SStanislav Sedov     "\xE0\x80\x80\x80",
62*ae771770SStanislav Sedov     "\xF0\x01",
63*ae771770SStanislav Sedov     "\xF0\x80",
64*ae771770SStanislav Sedov     "\xF0\x80\x01",
65*ae771770SStanislav Sedov     "\xF0\x80\x80",
66*ae771770SStanislav Sedov     "\xF0\x80\x80\x01",
67*ae771770SStanislav Sedov     "\xF0\x80\x80\xFF",
68*ae771770SStanislav Sedov     NULL
69*ae771770SStanislav Sedov };
70*ae771770SStanislav Sedov 
71*ae771770SStanislav Sedov #define MAX_LENGTH 10
72*ae771770SStanislav Sedov 
73*ae771770SStanislav Sedov struct testcase {
74*ae771770SStanislav Sedov     const char *utf8_str;
75*ae771770SStanislav Sedov     size_t len;
76*ae771770SStanislav Sedov     uint32_t u[MAX_LENGTH];
77*ae771770SStanislav Sedov     int invalid_ucs2;
78*ae771770SStanislav Sedov };
79*ae771770SStanislav Sedov 
80*ae771770SStanislav Sedov static const struct testcase testcases[] = {
81*ae771770SStanislav Sedov     {"", 0, {0}},
82*ae771770SStanislav Sedov     {"\x01", 1, {1}},
83*ae771770SStanislav Sedov     {"\x7F", 1, {0x7F}},
84*ae771770SStanislav Sedov     {"\x01\x7F", 2, {0x01, 0x7F}},
85*ae771770SStanislav Sedov     {"\xC0\x80", 1, {0}},
86*ae771770SStanislav Sedov     {"\xC0\x81", 1, {1}},
87*ae771770SStanislav Sedov     {"\xC1\x80", 1, {0x40}},
88*ae771770SStanislav Sedov     {"\xDF\xBF", 1, {0x7FF}},
89*ae771770SStanislav Sedov     {"\xE0\x80\x80", 1, {0}},
90*ae771770SStanislav Sedov     {"\xE0\x80\x81", 1, {1}},
91*ae771770SStanislav Sedov     {"\xE0\x81\x80", 1, {0x40}},
92*ae771770SStanislav Sedov     {"\xE1\x80\x80", 1, {0x1000}},
93*ae771770SStanislav Sedov     {"\xEF\xBF\xBF", 1, {0xFFFF}},
94*ae771770SStanislav Sedov     {"\xF0\x80\x80\x80", 1, {0}},
95*ae771770SStanislav Sedov     {"\xF0\x80\x80\x81", 1, {1}},
96*ae771770SStanislav Sedov     {"\xF0\x80\x81\x80", 1, {0x40}},
97*ae771770SStanislav Sedov     {"\xF0\x81\x80\x80", 1, {0x1000}},
98*ae771770SStanislav Sedov     {"\xF1\x80\x80\x80", 1, {0x40000}},
99*ae771770SStanislav Sedov     {"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1},
100*ae771770SStanislav Sedov };
101*ae771770SStanislav Sedov 
102*ae771770SStanislav Sedov int
main(void)103*ae771770SStanislav Sedov main(void)
104*ae771770SStanislav Sedov {
105*ae771770SStanislav Sedov     unsigned failures = 0;
106*ae771770SStanislav Sedov     unsigned i;
107*ae771770SStanislav Sedov     const char **s;
108*ae771770SStanislav Sedov     int ret;
109*ae771770SStanislav Sedov     size_t len, len2;
110*ae771770SStanislav Sedov     uint32_t u[MAX_LENGTH];
111*ae771770SStanislav Sedov     char str[MAX_LENGTH * 4];
112*ae771770SStanislav Sedov 
113*ae771770SStanislav Sedov     for (s = failing_testcases; *s != NULL; ++s) {
114*ae771770SStanislav Sedov 	len = MAX_LENGTH;
115*ae771770SStanislav Sedov 	ret = wind_utf8ucs4(*s, u, &len);
116*ae771770SStanislav Sedov 	if (ret == 0) {
117*ae771770SStanislav Sedov 	    printf("utf8 decode of \"%s\" should have failed\n", *s);
118*ae771770SStanislav Sedov 	    ++failures;
119*ae771770SStanislav Sedov 	}
120*ae771770SStanislav Sedov     }
121*ae771770SStanislav Sedov 
122*ae771770SStanislav Sedov     for (i = 0; i < sizeof(testcases)/sizeof(testcases[0]); ++i) {
123*ae771770SStanislav Sedov 	const struct testcase *t = &testcases[i];
124*ae771770SStanislav Sedov 
125*ae771770SStanislav Sedov 	ret = wind_utf8ucs4_length(t->utf8_str, &len);
126*ae771770SStanislav Sedov 	if (ret) {
127*ae771770SStanislav Sedov 	    printf("utf8ucs4 length of \"%s\" should have succeeded\n",
128*ae771770SStanislav Sedov 		   t->utf8_str);
129*ae771770SStanislav Sedov 	    ++failures;
130*ae771770SStanislav Sedov 	    continue;
131*ae771770SStanislav Sedov 	}
132*ae771770SStanislav Sedov 	if (len != t->len) {
133*ae771770SStanislav Sedov 	    printf("utf8ucs4_length of \"%s\" has wrong length: "
134*ae771770SStanislav Sedov 		   "expected: %u, actual: %u\n",
135*ae771770SStanislav Sedov 		   t->utf8_str, (unsigned int)t->len, (unsigned int)len);
136*ae771770SStanislav Sedov 	    ++failures;
137*ae771770SStanislav Sedov 	    continue;
138*ae771770SStanislav Sedov 	}
139*ae771770SStanislav Sedov 
140*ae771770SStanislav Sedov 	len = MAX_LENGTH;
141*ae771770SStanislav Sedov 	ret = wind_utf8ucs4(t->utf8_str, u, &len);
142*ae771770SStanislav Sedov 	if (ret) {
143*ae771770SStanislav Sedov 	    printf("utf8 decode of \"%s\" should have succeeded\n",
144*ae771770SStanislav Sedov 		   t->utf8_str);
145*ae771770SStanislav Sedov 	    ++failures;
146*ae771770SStanislav Sedov 	    continue;
147*ae771770SStanislav Sedov 	}
148*ae771770SStanislav Sedov 	if (len != t->len) {
149*ae771770SStanislav Sedov 	    printf("utf8 decode of \"%s\" has wrong length: "
150*ae771770SStanislav Sedov 		   "expected: %u, actual: %u\n",
151*ae771770SStanislav Sedov 		   t->utf8_str, (unsigned int)t->len, (unsigned int)len);
152*ae771770SStanislav Sedov 	    ++failures;
153*ae771770SStanislav Sedov 	    continue;
154*ae771770SStanislav Sedov 	}
155*ae771770SStanislav Sedov 	if (memcmp(t->u, u, len * sizeof(uint32_t)) != 0) {
156*ae771770SStanislav Sedov 	    printf("utf8 decode of \"%s\" has wrong data\n",
157*ae771770SStanislav Sedov 		   t->utf8_str);
158*ae771770SStanislav Sedov 	    ++failures;
159*ae771770SStanislav Sedov 	    continue;
160*ae771770SStanislav Sedov 	}
161*ae771770SStanislav Sedov 	if (t->invalid_ucs2 == 0) {
162*ae771770SStanislav Sedov 	    len2 = sizeof(str);
163*ae771770SStanislav Sedov 	    ret = wind_ucs4utf8(u, len, str, &len2);
164*ae771770SStanislav Sedov 	    if (ret) {
165*ae771770SStanislav Sedov 		printf("ucs4 decode of \"%s\" should have succeeded\n",
166*ae771770SStanislav Sedov 		       t->utf8_str);
167*ae771770SStanislav Sedov 		++failures;
168*ae771770SStanislav Sedov 		continue;
169*ae771770SStanislav Sedov 	    }
170*ae771770SStanislav Sedov 	}
171*ae771770SStanislav Sedov     }
172*ae771770SStanislav Sedov 
173*ae771770SStanislav Sedov     return failures != 0;
174*ae771770SStanislav Sedov }
175