xref: /dragonfly/test/debug/utf8bin.c (revision 8a84c799)
1*8a84c799SMatthew Dillon 
2*8a84c799SMatthew Dillon /*
3*8a84c799SMatthew Dillon  * UTF8BIN.C
4*8a84c799SMatthew Dillon  *
5*8a84c799SMatthew Dillon  * cc utf8bin.c -o ~/bin/utf8bin
6*8a84c799SMatthew Dillon  * rehash
7*8a84c799SMatthew Dillon  * setenv LANG en_US.UTF-8
8*8a84c799SMatthew Dillon  * dd if=/dev/urandom bs=32k count=1024 | utf8bin
9*8a84c799SMatthew Dillon  *
10*8a84c799SMatthew Dillon  * Test round-trip UTF8-B binary escaping functions.
11*8a84c799SMatthew Dillon  */
12*8a84c799SMatthew Dillon #include <sys/types.h>
13*8a84c799SMatthew Dillon #include <sys/file.h>
14*8a84c799SMatthew Dillon #include <locale.h>
15*8a84c799SMatthew Dillon #include <stdio.h>
16*8a84c799SMatthew Dillon #include <unistd.h>
17*8a84c799SMatthew Dillon #include <string.h>
18*8a84c799SMatthew Dillon #include <wchar.h>
19*8a84c799SMatthew Dillon 
20*8a84c799SMatthew Dillon int
main(int ac,char ** av)21*8a84c799SMatthew Dillon main(int ac, char **av)
22*8a84c799SMatthew Dillon {
23*8a84c799SMatthew Dillon 	char ibuf[1024];
24*8a84c799SMatthew Dillon 	char obuf[1024];
25*8a84c799SMatthew Dillon 	wchar_t warray[1024];
26*8a84c799SMatthew Dillon 	ssize_t r1;
27*8a84c799SMatthew Dillon 	ssize_t r2;
28*8a84c799SMatthew Dillon 	ssize_t x;
29*8a84c799SMatthew Dillon 	ssize_t w1;
30*8a84c799SMatthew Dillon 	ssize_t w2;
31*8a84c799SMatthew Dillon 	ssize_t i;
32*8a84c799SMatthew Dillon 	ssize_t o;
33*8a84c799SMatthew Dillon 	int failed;
34*8a84c799SMatthew Dillon 	int flags;
35*8a84c799SMatthew Dillon 
36*8a84c799SMatthew Dillon 	/*
37*8a84c799SMatthew Dillon 	 * NOTE: If we use WCSBIN_SURRO the round-trip will not be 8-bit
38*8a84c799SMatthew Dillon 	 *	 clean.
39*8a84c799SMatthew Dillon 	 *
40*8a84c799SMatthew Dillon 	 *	 Typically use either 0 or WCSBIN_LONGCODE, both of which
41*8a84c799SMatthew Dillon 	 *	 are 8-bit clean, will escape, and cannot error on input
42*8a84c799SMatthew Dillon 	 *	 (or generate wchars that might error on output) given the
43*8a84c799SMatthew Dillon 	 *	 same flags).
44*8a84c799SMatthew Dillon 	 */
45*8a84c799SMatthew Dillon 	flags = 0;
46*8a84c799SMatthew Dillon 	flags = WCSBIN_LONGCODES;
47*8a84c799SMatthew Dillon 
48*8a84c799SMatthew Dillon 	setlocale(LC_ALL, "");
49*8a84c799SMatthew Dillon 	x = 0;
50*8a84c799SMatthew Dillon 	while ((flags & WCSBIN_EOF) == 0 &&
51*8a84c799SMatthew Dillon 	       (r1 = read(0, ibuf + x, sizeof(ibuf) - x)) >= 0) {
52*8a84c799SMatthew Dillon 		/* allow final loop for loose ends */
53*8a84c799SMatthew Dillon 		if (r1 == 0)
54*8a84c799SMatthew Dillon 			flags |= WCSBIN_EOF;
55*8a84c799SMatthew Dillon 		r1 += x;
56*8a84c799SMatthew Dillon 		r2 = r1;
57*8a84c799SMatthew Dillon 		w1 = mbintowcr(warray, ibuf, 1024, &r2, flags);
58*8a84c799SMatthew Dillon 
59*8a84c799SMatthew Dillon 		/* round-trip, output buffer can be same size as input buffer */
60*8a84c799SMatthew Dillon 		w2 = w1;
61*8a84c799SMatthew Dillon 		o = wcrtombin(obuf, warray, sizeof(obuf), &w2, flags);
62*8a84c799SMatthew Dillon 		fflush(stdout);
63*8a84c799SMatthew Dillon 
64*8a84c799SMatthew Dillon 		printf("read %4d/%-4d wc=%4d/%-4d write=%-4d\t",
65*8a84c799SMatthew Dillon 		       r2, r1, w2, w1, o);
66*8a84c799SMatthew Dillon 		if (r2 == o) {
67*8a84c799SMatthew Dillon 			if (bcmp(ibuf, obuf, o) == 0) {
68*8a84c799SMatthew Dillon 				printf("ok%c",
69*8a84c799SMatthew Dillon 					(flags & WCSBIN_EOF) ? '\n' : '\r');
70*8a84c799SMatthew Dillon 				fflush(stdout);
71*8a84c799SMatthew Dillon 			} else {
72*8a84c799SMatthew Dillon 				printf("compare-fail\n");
73*8a84c799SMatthew Dillon 			}
74*8a84c799SMatthew Dillon 		} else if (r2 < o) {
75*8a84c799SMatthew Dillon 			if (bcmp(ibuf, obuf, r2) == 0)
76*8a84c799SMatthew Dillon 				printf("len-fail, rest-ok\n");
77*8a84c799SMatthew Dillon 			else
78*8a84c799SMatthew Dillon 				printf("len-fail, compare-fail\n");
79*8a84c799SMatthew Dillon 		} else if (o < r2) {
80*8a84c799SMatthew Dillon 			if (bcmp(ibuf, obuf, o) == 0)
81*8a84c799SMatthew Dillon 				printf("len-fail, rest-ok\n");
82*8a84c799SMatthew Dillon 			else
83*8a84c799SMatthew Dillon 				printf("len-fail, compare-fail\n");
84*8a84c799SMatthew Dillon 		}
85*8a84c799SMatthew Dillon 		for (i = failed = 0; i < r2 && i < o; ++i) {
86*8a84c799SMatthew Dillon 			if (ibuf[i] != obuf[i]) {
87*8a84c799SMatthew Dillon 				printf("    @%04x %02x %02x\n",
88*8a84c799SMatthew Dillon 				       i, (uint8_t)ibuf[i], (uint8_t)obuf[i]);
89*8a84c799SMatthew Dillon 				failed = 16;
90*8a84c799SMatthew Dillon 			} else if (failed) {
91*8a84c799SMatthew Dillon 				--failed;
92*8a84c799SMatthew Dillon 				printf("    @%04x %02x %02x\n",
93*8a84c799SMatthew Dillon 				       i, (uint8_t)ibuf[i], (uint8_t)obuf[i]);
94*8a84c799SMatthew Dillon 			}
95*8a84c799SMatthew Dillon 		}
96*8a84c799SMatthew Dillon 
97*8a84c799SMatthew Dillon 		x = r1 - r2;
98*8a84c799SMatthew Dillon 		if (x)
99*8a84c799SMatthew Dillon 			bcopy(ibuf + r2, ibuf, x);
100*8a84c799SMatthew Dillon 	}
101*8a84c799SMatthew Dillon }
102