1 /*
2    Unix SMB/CIFS implementation.
3 
4    local testing of iconv routines. This tests the system iconv code against
5    the built-in iconv code
6 
7    Copyright (C) Andrew Tridgell 2004
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23 
24 #include "includes.h"
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
30 
31 #if HAVE_NATIVE_ICONV
32 /*
33   generate a UTF-16LE buffer for a given unicode codepoint
34 */
gen_codepoint_utf16(unsigned int codepoint,char * buf,size_t * size)35 static int gen_codepoint_utf16(unsigned int codepoint,
36 			       char *buf, size_t *size)
37 {
38 	static iconv_t cd;
39 	uint8_t in[4];
40 	char *ptr_in;
41 	size_t size_in, size_out, ret;
42 	if (!cd) {
43 		cd = iconv_open("UTF-16LE", "UCS-4LE");
44 		if (cd == (iconv_t)-1) {
45 			cd = NULL;
46 			return -1;
47 		}
48 	}
49 
50 	in[0] = codepoint & 0xFF;
51 	in[1] = (codepoint>>8) & 0xFF;
52 	in[2] = (codepoint>>16) & 0xFF;
53 	in[3] = (codepoint>>24) & 0xFF;
54 
55 	ptr_in = (char *)in;
56 	size_in = 4;
57 	size_out = 8;
58 
59 	ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
60 
61 	*size = 8 - size_out;
62 
63 	return ret;
64 }
65 
66 
67 /*
68   work out the unicode codepoint of the first UTF-8 character in the buffer
69 */
get_codepoint(char * buf,size_t size,const char * charset)70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
71 {
72 	iconv_t cd;
73 	uint8_t out[4];
74 	char *ptr_out;
75 	size_t size_out, size_in, ret;
76 
77 	cd = iconv_open("UCS-4LE", charset);
78 
79 	size_in = size;
80 	ptr_out = (char *)out;
81 	size_out = sizeof(out);
82 	memset(out, 0, sizeof(out));
83 
84 	ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
85 
86 	iconv_close(cd);
87 
88 	return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
89 }
90 
91 /*
92   display a buffer with name prefix
93 */
show_buf(const char * name,uint8_t * buf,size_t size)94 static void show_buf(const char *name, uint8_t *buf, size_t size)
95 {
96 	int i;
97 	printf("%s ", name);
98 	for (i=0;i<size;i++) {
99 		printf("%02x ", buf[i]);
100 	}
101 	printf("\n");
102 }
103 
104 /*
105   given a UTF-16LE buffer, test the system and built-in iconv code to
106   make sure they do exactly the same thing in converting the buffer to
107   "charset", then convert it back again and ensure we get the same
108   buffer back
109 */
test_buffer(struct torture_context * test,uint8_t * inbuf,size_t size,const char * charset)110 static bool test_buffer(struct torture_context *test,
111 			uint8_t *inbuf, size_t size, const char *charset)
112 {
113 	uint8_t buf1[1000], buf2[1000], buf3[1000];
114 	size_t outsize1, outsize2, outsize3;
115 	const char *ptr_in;
116 	char *ptr_out;
117 	size_t size_in1, size_in2, size_in3;
118 	size_t ret1, ret2, ret3, len1, len2;
119 	int errno1, errno2;
120 	static iconv_t cd;
121 	static smb_iconv_t cd2, cd3;
122 	static const char *last_charset;
123 
124 	if (cd && last_charset) {
125 		iconv_close(cd);
126 		smb_iconv_close(cd2);
127 		smb_iconv_close(cd3);
128 		cd = NULL;
129 	}
130 
131 	if (!cd) {
132 		cd = iconv_open(charset, "UTF-16LE");
133 		if (cd == (iconv_t)-1) {
134 			torture_fail(test,
135 				     talloc_asprintf(test,
136 						     "failed to open %s to UTF-16LE\n",
137 						     charset));
138 			cd = NULL;
139 			return false;
140 		}
141 		cd2 = smb_iconv_open(charset, "UTF-16LE");
142 		cd3 = smb_iconv_open("UTF-16LE", charset);
143 		last_charset = charset;
144 	}
145 
146 	/* internal convert to charset - placing result in buf1 */
147 	ptr_in = (const char *)inbuf;
148 	ptr_out = (char *)buf1;
149 	size_in1 = size;
150 	outsize1 = sizeof(buf1);
151 
152 	memset(ptr_out, 0, outsize1);
153 	errno = 0;
154 	ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
155 	errno1 = errno;
156 
157 	/* system convert to charset - placing result in buf2 */
158 	ptr_in = (const char *)inbuf;
159 	ptr_out = (char *)buf2;
160 	size_in2 = size;
161 	outsize2 = sizeof(buf2);
162 
163 	memset(ptr_out, 0, outsize2);
164 	errno = 0;
165 	ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
166 	errno2 = errno;
167 
168 	len1 = sizeof(buf1) - outsize1;
169 	len2 = sizeof(buf2) - outsize2;
170 
171 	/* codepoints above 1M are not interesting for now */
172 	if (len2 > len1 &&
173 	    memcmp(buf1, buf2, len1) == 0 &&
174 	    get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
175 		return true;
176 	}
177 	if (len1 > len2 &&
178 	    memcmp(buf1, buf2, len2) == 0 &&
179 	    get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
180 		return true;
181 	}
182 
183 	torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
184 
185 	if (errno1 != errno2) {
186 		show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
187 		show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
188 		torture_fail(test, talloc_asprintf(test,
189 					"e1=%s e2=%s", strerror(errno1), strerror(errno2)));
190 	}
191 
192 	torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
193 
194 	torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
195 
196 	if (len1 != len2 ||
197 	    memcmp(buf1, buf2, len1) != 0) {
198 		torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
199 		show_buf(" IN1:", inbuf, size-size_in1);
200 		show_buf(" IN2:", inbuf, size-size_in2);
201 		show_buf("OUT1:", buf1, len1);
202 		show_buf("OUT2:", buf2, len2);
203 		if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
204 			torture_comment(test, "next codepoint is %u",
205 			       get_codepoint((char *)(buf2+len1), len2-len1, charset));
206 		}
207 		if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
208 			torture_comment(test, "next codepoint is %u",
209 			       get_codepoint((char *)(buf1+len2),len1-len2, charset));
210 		}
211 
212 		torture_fail(test, "failed");
213 	}
214 
215 	/* convert back to UTF-16, putting result in buf3 */
216 	size = size - size_in1;
217 	ptr_in = (const char *)buf1;
218 	ptr_out = (char *)buf3;
219 	size_in3 = len1;
220 	outsize3 = sizeof(buf3);
221 
222 	memset(ptr_out, 0, outsize3);
223 	ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
224 
225 	/* we only internally support the first 1M codepoints */
226 	if (outsize3 != sizeof(buf3) - size &&
227 	    get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
228 			  size - (sizeof(buf3) - outsize3),
229 			  "UTF-16LE") >= (1<<20)) {
230 		return true;
231 	}
232 
233 	torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
234 								"pull failed - %s", strerror(errno)));
235 
236 	if (strncmp(charset, "UTF", 3) != 0) {
237 		/* don't expect perfect mappings for non UTF charsets */
238 		return true;
239 	}
240 
241 
242 	torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
243 		"wrong outsize3");
244 
245 	if (memcmp(buf3, inbuf, size) != 0) {
246 		torture_comment(test, "pull bytes mismatch:");
247 		show_buf("inbuf", inbuf, size);
248 		show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
249 		torture_fail(test, "");
250 		torture_comment(test, "next codepoint is %u\n",
251 		       get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
252 				     size - (sizeof(buf3) - outsize3),
253 				     "UTF-16LE"));
254 	}
255 
256 	return true;
257 }
258 
259 
260 /*
261   test the push_codepoint() and next_codepoint() functions for a given
262   codepoint
263 */
test_codepoint(struct torture_context * tctx,unsigned int codepoint)264 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
265 {
266 	uint8_t buf[10];
267 	size_t size, size2;
268 	codepoint_t c;
269 
270 	size = push_codepoint((char *)buf, codepoint);
271 	torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
272 		       "Invalid Codepoint range");
273 
274 	if (size == -1) return true;
275 
276 	buf[size] = random();
277 	buf[size+1] = random();
278 	buf[size+2] = random();
279 	buf[size+3] = random();
280 
281 	c = next_codepoint((char *)buf, &size2);
282 
283 	torture_assert(tctx, c == codepoint,
284 		       talloc_asprintf(tctx,
285 				       "next_codepoint(%u) failed - gave %u", codepoint, c));
286 
287 	torture_assert(tctx, size2 == size,
288 			talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
289 		       codepoint, (int)size2, (int)size));
290 
291 	return true;
292 }
293 
test_next_codepoint(struct torture_context * tctx)294 static bool test_next_codepoint(struct torture_context *tctx)
295 {
296 	unsigned int codepoint;
297 	for (codepoint=0;codepoint<(1<<20);codepoint++) {
298 		if (!test_codepoint(tctx, codepoint))
299 			return false;
300 	}
301 	return true;
302 }
303 
test_first_1m(struct torture_context * tctx)304 static bool test_first_1m(struct torture_context *tctx)
305 {
306 	unsigned int codepoint;
307 	size_t size;
308 	unsigned char inbuf[1000];
309 
310 	for (codepoint=0;codepoint<(1<<20);codepoint++) {
311 		if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
312 			continue;
313 		}
314 
315 		if (codepoint % 1000 == 0) {
316 			if (torture_setting_bool(tctx, "progress", True)) {
317 				torture_comment(tctx, "codepoint=%u   \r", codepoint);
318 			}
319 		}
320 
321 		if (!test_buffer(tctx, inbuf, size, "UTF-8"))
322 			return false;
323 	}
324 	return true;
325 }
326 
test_random_5m(struct torture_context * tctx)327 static bool test_random_5m(struct torture_context *tctx)
328 {
329 	unsigned char inbuf[1000];
330 	unsigned int i;
331 	for (i=0;i<500000;i++) {
332 		size_t size;
333 		unsigned int c;
334 
335 		if (i % 1000 == 0) {
336 			if (torture_setting_bool(tctx, "progress", true)) {
337 				torture_comment(tctx, "i=%u              \r", i);
338 			}
339 		}
340 
341 		size = random() % 100;
342 		for (c=0;c<size;c++) {
343 			if (random() % 100 < 80) {
344 				inbuf[c] = random() % 128;
345 			} else {
346 				inbuf[c] = random();
347 			}
348 			if (random() % 10 == 0) {
349 				inbuf[c] |= 0xd8;
350 			}
351 			if (random() % 10 == 0) {
352 				inbuf[c] |= 0xdc;
353 			}
354 		}
355 		if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
356 			printf("i=%d failed UTF-8\n", i);
357 			return false;
358 		}
359 
360 		if (!test_buffer(tctx, inbuf, size, "CP850")) {
361 			printf("i=%d failed CP850\n", i);
362 			return false;
363 		}
364 	}
365 	return true;
366 }
367 
torture_local_iconv(TALLOC_CTX * mem_ctx)368 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
369 {
370 	static iconv_t cd;
371 	struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
372 
373 	if (!lp_parm_bool(-1, "iconv", "native", True)) {
374 		printf("system iconv disabled - skipping test\n");
375 		return NULL;
376 	}
377 
378 	cd = iconv_open("UTF-16LE", "UCS-4LE");
379 	if (cd == (iconv_t)-1) {
380 		printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
381 		return NULL;
382 	}
383 	iconv_close(cd);
384 
385 	cd = iconv_open("UTF-16LE", "CP850");
386 	if (cd == (iconv_t)-1) {
387 		printf("unable to test - system iconv library does not support UTF-16LE -> CP850\n");
388 		return NULL;
389 	}
390 	iconv_close(cd);
391 
392 	srandom(time(NULL));
393 
394 	torture_suite_add_simple_test(suite, "next_codepoint()",
395 								   test_next_codepoint);
396 
397 	torture_suite_add_simple_test(suite, "first 1M codepoints",
398 								   test_first_1m);
399 
400 	torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
401 								   test_random_5m);
402 	return suite;
403 }
404 
405 #else
406 
torture_local_iconv(TALLOC_CTX * mem_ctx)407 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
408 {
409 	printf("No native iconv library - can't run iconv test\n");
410 	return NULL;
411 }
412 
413 #endif
414