1 /*
2 Unix SMB/CIFS implementation.
3
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
6
7 Copyright (C) Andrew Tridgell 2004
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24 #include "includes.h"
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
30
31 #if HAVE_NATIVE_ICONV
32 /*
33 generate a UTF-16LE buffer for a given unicode codepoint
34 */
gen_codepoint_utf16(unsigned int codepoint,char * buf,size_t * size)35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
37 {
38 static iconv_t cd;
39 uint8_t in[4];
40 char *ptr_in;
41 size_t size_in, size_out, ret;
42 if (!cd) {
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
45 cd = NULL;
46 return -1;
47 }
48 }
49
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
54
55 ptr_in = (char *)in;
56 size_in = 4;
57 size_out = 8;
58
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
60
61 *size = 8 - size_out;
62
63 return ret;
64 }
65
66
67 /*
68 work out the unicode codepoint of the first UTF-8 character in the buffer
69 */
get_codepoint(char * buf,size_t size,const char * charset)70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
71 {
72 iconv_t cd;
73 uint8_t out[4];
74 char *ptr_out;
75 size_t size_out, size_in, ret;
76
77 cd = iconv_open("UCS-4LE", charset);
78
79 size_in = size;
80 ptr_out = (char *)out;
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
83
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
85
86 iconv_close(cd);
87
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
89 }
90
91 /*
92 display a buffer with name prefix
93 */
show_buf(const char * name,uint8_t * buf,size_t size)94 static void show_buf(const char *name, uint8_t *buf, size_t size)
95 {
96 int i;
97 printf("%s ", name);
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
100 }
101 printf("\n");
102 }
103
104 /*
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
108 buffer back
109 */
test_buffer(struct torture_context * test,uint8_t * inbuf,size_t size,const char * charset)110 static bool test_buffer(struct torture_context *test,
111 uint8_t *inbuf, size_t size, const char *charset)
112 {
113 uint8_t buf1[1000], buf2[1000], buf3[1000];
114 size_t outsize1, outsize2, outsize3;
115 const char *ptr_in;
116 char *ptr_out;
117 size_t size_in1, size_in2, size_in3;
118 size_t ret1, ret2, ret3, len1, len2;
119 int errno1, errno2;
120 static iconv_t cd;
121 static smb_iconv_t cd2, cd3;
122 static const char *last_charset;
123
124 if (cd && last_charset) {
125 iconv_close(cd);
126 smb_iconv_close(cd2);
127 smb_iconv_close(cd3);
128 cd = NULL;
129 }
130
131 if (!cd) {
132 cd = iconv_open(charset, "UTF-16LE");
133 if (cd == (iconv_t)-1) {
134 torture_fail(test,
135 talloc_asprintf(test,
136 "failed to open %s to UTF-16LE\n",
137 charset));
138 cd = NULL;
139 return false;
140 }
141 cd2 = smb_iconv_open(charset, "UTF-16LE");
142 cd3 = smb_iconv_open("UTF-16LE", charset);
143 last_charset = charset;
144 }
145
146 /* internal convert to charset - placing result in buf1 */
147 ptr_in = (const char *)inbuf;
148 ptr_out = (char *)buf1;
149 size_in1 = size;
150 outsize1 = sizeof(buf1);
151
152 memset(ptr_out, 0, outsize1);
153 errno = 0;
154 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
155 errno1 = errno;
156
157 /* system convert to charset - placing result in buf2 */
158 ptr_in = (const char *)inbuf;
159 ptr_out = (char *)buf2;
160 size_in2 = size;
161 outsize2 = sizeof(buf2);
162
163 memset(ptr_out, 0, outsize2);
164 errno = 0;
165 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
166 errno2 = errno;
167
168 len1 = sizeof(buf1) - outsize1;
169 len2 = sizeof(buf2) - outsize2;
170
171 /* codepoints above 1M are not interesting for now */
172 if (len2 > len1 &&
173 memcmp(buf1, buf2, len1) == 0 &&
174 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
175 return true;
176 }
177 if (len1 > len2 &&
178 memcmp(buf1, buf2, len2) == 0 &&
179 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
180 return true;
181 }
182
183 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
184
185 if (errno1 != errno2) {
186 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
187 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
188 torture_fail(test, talloc_asprintf(test,
189 "e1=%s e2=%s", strerror(errno1), strerror(errno2)));
190 }
191
192 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
193
194 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
195
196 if (len1 != len2 ||
197 memcmp(buf1, buf2, len1) != 0) {
198 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
199 show_buf(" IN1:", inbuf, size-size_in1);
200 show_buf(" IN2:", inbuf, size-size_in2);
201 show_buf("OUT1:", buf1, len1);
202 show_buf("OUT2:", buf2, len2);
203 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
204 torture_comment(test, "next codepoint is %u",
205 get_codepoint((char *)(buf2+len1), len2-len1, charset));
206 }
207 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
208 torture_comment(test, "next codepoint is %u",
209 get_codepoint((char *)(buf1+len2),len1-len2, charset));
210 }
211
212 torture_fail(test, "failed");
213 }
214
215 /* convert back to UTF-16, putting result in buf3 */
216 size = size - size_in1;
217 ptr_in = (const char *)buf1;
218 ptr_out = (char *)buf3;
219 size_in3 = len1;
220 outsize3 = sizeof(buf3);
221
222 memset(ptr_out, 0, outsize3);
223 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
224
225 /* we only internally support the first 1M codepoints */
226 if (outsize3 != sizeof(buf3) - size &&
227 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
228 size - (sizeof(buf3) - outsize3),
229 "UTF-16LE") >= (1<<20)) {
230 return true;
231 }
232
233 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
234 "pull failed - %s", strerror(errno)));
235
236 if (strncmp(charset, "UTF", 3) != 0) {
237 /* don't expect perfect mappings for non UTF charsets */
238 return true;
239 }
240
241
242 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
243 "wrong outsize3");
244
245 if (memcmp(buf3, inbuf, size) != 0) {
246 torture_comment(test, "pull bytes mismatch:");
247 show_buf("inbuf", inbuf, size);
248 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
249 torture_fail(test, "");
250 torture_comment(test, "next codepoint is %u\n",
251 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
252 size - (sizeof(buf3) - outsize3),
253 "UTF-16LE"));
254 }
255
256 return true;
257 }
258
259
260 /*
261 test the push_codepoint() and next_codepoint() functions for a given
262 codepoint
263 */
test_codepoint(struct torture_context * tctx,unsigned int codepoint)264 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
265 {
266 uint8_t buf[10];
267 size_t size, size2;
268 codepoint_t c;
269
270 size = push_codepoint((char *)buf, codepoint);
271 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
272 "Invalid Codepoint range");
273
274 if (size == -1) return true;
275
276 buf[size] = random();
277 buf[size+1] = random();
278 buf[size+2] = random();
279 buf[size+3] = random();
280
281 c = next_codepoint((char *)buf, &size2);
282
283 torture_assert(tctx, c == codepoint,
284 talloc_asprintf(tctx,
285 "next_codepoint(%u) failed - gave %u", codepoint, c));
286
287 torture_assert(tctx, size2 == size,
288 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
289 codepoint, (int)size2, (int)size));
290
291 return true;
292 }
293
test_next_codepoint(struct torture_context * tctx)294 static bool test_next_codepoint(struct torture_context *tctx)
295 {
296 unsigned int codepoint;
297 for (codepoint=0;codepoint<(1<<20);codepoint++) {
298 if (!test_codepoint(tctx, codepoint))
299 return false;
300 }
301 return true;
302 }
303
test_first_1m(struct torture_context * tctx)304 static bool test_first_1m(struct torture_context *tctx)
305 {
306 unsigned int codepoint;
307 size_t size;
308 unsigned char inbuf[1000];
309
310 for (codepoint=0;codepoint<(1<<20);codepoint++) {
311 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
312 continue;
313 }
314
315 if (codepoint % 1000 == 0) {
316 if (torture_setting_bool(tctx, "progress", True)) {
317 torture_comment(tctx, "codepoint=%u \r", codepoint);
318 }
319 }
320
321 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
322 return false;
323 }
324 return true;
325 }
326
test_random_5m(struct torture_context * tctx)327 static bool test_random_5m(struct torture_context *tctx)
328 {
329 unsigned char inbuf[1000];
330 unsigned int i;
331 for (i=0;i<500000;i++) {
332 size_t size;
333 unsigned int c;
334
335 if (i % 1000 == 0) {
336 if (torture_setting_bool(tctx, "progress", true)) {
337 torture_comment(tctx, "i=%u \r", i);
338 }
339 }
340
341 size = random() % 100;
342 for (c=0;c<size;c++) {
343 if (random() % 100 < 80) {
344 inbuf[c] = random() % 128;
345 } else {
346 inbuf[c] = random();
347 }
348 if (random() % 10 == 0) {
349 inbuf[c] |= 0xd8;
350 }
351 if (random() % 10 == 0) {
352 inbuf[c] |= 0xdc;
353 }
354 }
355 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
356 printf("i=%d failed UTF-8\n", i);
357 return false;
358 }
359
360 if (!test_buffer(tctx, inbuf, size, "CP850")) {
361 printf("i=%d failed CP850\n", i);
362 return false;
363 }
364 }
365 return true;
366 }
367
torture_local_iconv(TALLOC_CTX * mem_ctx)368 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
369 {
370 static iconv_t cd;
371 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
372
373 if (!lp_parm_bool(-1, "iconv", "native", True)) {
374 printf("system iconv disabled - skipping test\n");
375 return NULL;
376 }
377
378 cd = iconv_open("UTF-16LE", "UCS-4LE");
379 if (cd == (iconv_t)-1) {
380 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
381 return NULL;
382 }
383 iconv_close(cd);
384
385 cd = iconv_open("UTF-16LE", "CP850");
386 if (cd == (iconv_t)-1) {
387 printf("unable to test - system iconv library does not support UTF-16LE -> CP850\n");
388 return NULL;
389 }
390 iconv_close(cd);
391
392 srandom(time(NULL));
393
394 torture_suite_add_simple_test(suite, "next_codepoint()",
395 test_next_codepoint);
396
397 torture_suite_add_simple_test(suite, "first 1M codepoints",
398 test_first_1m);
399
400 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
401 test_random_5m);
402 return suite;
403 }
404
405 #else
406
torture_local_iconv(TALLOC_CTX * mem_ctx)407 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
408 {
409 printf("No native iconv library - can't run iconv test\n");
410 return NULL;
411 }
412
413 #endif
414