1*8a84c799SMatthew Dillon
2*8a84c799SMatthew Dillon /*
3*8a84c799SMatthew Dillon * UTF8BIN.C
4*8a84c799SMatthew Dillon *
5*8a84c799SMatthew Dillon * cc utf8bin.c -o ~/bin/utf8bin
6*8a84c799SMatthew Dillon * rehash
7*8a84c799SMatthew Dillon * setenv LANG en_US.UTF-8
8*8a84c799SMatthew Dillon * dd if=/dev/urandom bs=32k count=1024 | utf8bin
9*8a84c799SMatthew Dillon *
10*8a84c799SMatthew Dillon * Test round-trip UTF8-B binary escaping functions.
11*8a84c799SMatthew Dillon */
12*8a84c799SMatthew Dillon #include <sys/types.h>
13*8a84c799SMatthew Dillon #include <sys/file.h>
14*8a84c799SMatthew Dillon #include <locale.h>
15*8a84c799SMatthew Dillon #include <stdio.h>
16*8a84c799SMatthew Dillon #include <unistd.h>
17*8a84c799SMatthew Dillon #include <string.h>
18*8a84c799SMatthew Dillon #include <wchar.h>
19*8a84c799SMatthew Dillon
20*8a84c799SMatthew Dillon int
main(int ac,char ** av)21*8a84c799SMatthew Dillon main(int ac, char **av)
22*8a84c799SMatthew Dillon {
23*8a84c799SMatthew Dillon char ibuf[1024];
24*8a84c799SMatthew Dillon char obuf[1024];
25*8a84c799SMatthew Dillon wchar_t warray[1024];
26*8a84c799SMatthew Dillon ssize_t r1;
27*8a84c799SMatthew Dillon ssize_t r2;
28*8a84c799SMatthew Dillon ssize_t x;
29*8a84c799SMatthew Dillon ssize_t w1;
30*8a84c799SMatthew Dillon ssize_t w2;
31*8a84c799SMatthew Dillon ssize_t i;
32*8a84c799SMatthew Dillon ssize_t o;
33*8a84c799SMatthew Dillon int failed;
34*8a84c799SMatthew Dillon int flags;
35*8a84c799SMatthew Dillon
36*8a84c799SMatthew Dillon /*
37*8a84c799SMatthew Dillon * NOTE: If we use WCSBIN_SURRO the round-trip will not be 8-bit
38*8a84c799SMatthew Dillon * clean.
39*8a84c799SMatthew Dillon *
40*8a84c799SMatthew Dillon * Typically use either 0 or WCSBIN_LONGCODE, both of which
41*8a84c799SMatthew Dillon * are 8-bit clean, will escape, and cannot error on input
42*8a84c799SMatthew Dillon * (or generate wchars that might error on output) given the
43*8a84c799SMatthew Dillon * same flags).
44*8a84c799SMatthew Dillon */
45*8a84c799SMatthew Dillon flags = 0;
46*8a84c799SMatthew Dillon flags = WCSBIN_LONGCODES;
47*8a84c799SMatthew Dillon
48*8a84c799SMatthew Dillon setlocale(LC_ALL, "");
49*8a84c799SMatthew Dillon x = 0;
50*8a84c799SMatthew Dillon while ((flags & WCSBIN_EOF) == 0 &&
51*8a84c799SMatthew Dillon (r1 = read(0, ibuf + x, sizeof(ibuf) - x)) >= 0) {
52*8a84c799SMatthew Dillon /* allow final loop for loose ends */
53*8a84c799SMatthew Dillon if (r1 == 0)
54*8a84c799SMatthew Dillon flags |= WCSBIN_EOF;
55*8a84c799SMatthew Dillon r1 += x;
56*8a84c799SMatthew Dillon r2 = r1;
57*8a84c799SMatthew Dillon w1 = mbintowcr(warray, ibuf, 1024, &r2, flags);
58*8a84c799SMatthew Dillon
59*8a84c799SMatthew Dillon /* round-trip, output buffer can be same size as input buffer */
60*8a84c799SMatthew Dillon w2 = w1;
61*8a84c799SMatthew Dillon o = wcrtombin(obuf, warray, sizeof(obuf), &w2, flags);
62*8a84c799SMatthew Dillon fflush(stdout);
63*8a84c799SMatthew Dillon
64*8a84c799SMatthew Dillon printf("read %4d/%-4d wc=%4d/%-4d write=%-4d\t",
65*8a84c799SMatthew Dillon r2, r1, w2, w1, o);
66*8a84c799SMatthew Dillon if (r2 == o) {
67*8a84c799SMatthew Dillon if (bcmp(ibuf, obuf, o) == 0) {
68*8a84c799SMatthew Dillon printf("ok%c",
69*8a84c799SMatthew Dillon (flags & WCSBIN_EOF) ? '\n' : '\r');
70*8a84c799SMatthew Dillon fflush(stdout);
71*8a84c799SMatthew Dillon } else {
72*8a84c799SMatthew Dillon printf("compare-fail\n");
73*8a84c799SMatthew Dillon }
74*8a84c799SMatthew Dillon } else if (r2 < o) {
75*8a84c799SMatthew Dillon if (bcmp(ibuf, obuf, r2) == 0)
76*8a84c799SMatthew Dillon printf("len-fail, rest-ok\n");
77*8a84c799SMatthew Dillon else
78*8a84c799SMatthew Dillon printf("len-fail, compare-fail\n");
79*8a84c799SMatthew Dillon } else if (o < r2) {
80*8a84c799SMatthew Dillon if (bcmp(ibuf, obuf, o) == 0)
81*8a84c799SMatthew Dillon printf("len-fail, rest-ok\n");
82*8a84c799SMatthew Dillon else
83*8a84c799SMatthew Dillon printf("len-fail, compare-fail\n");
84*8a84c799SMatthew Dillon }
85*8a84c799SMatthew Dillon for (i = failed = 0; i < r2 && i < o; ++i) {
86*8a84c799SMatthew Dillon if (ibuf[i] != obuf[i]) {
87*8a84c799SMatthew Dillon printf(" @%04x %02x %02x\n",
88*8a84c799SMatthew Dillon i, (uint8_t)ibuf[i], (uint8_t)obuf[i]);
89*8a84c799SMatthew Dillon failed = 16;
90*8a84c799SMatthew Dillon } else if (failed) {
91*8a84c799SMatthew Dillon --failed;
92*8a84c799SMatthew Dillon printf(" @%04x %02x %02x\n",
93*8a84c799SMatthew Dillon i, (uint8_t)ibuf[i], (uint8_t)obuf[i]);
94*8a84c799SMatthew Dillon }
95*8a84c799SMatthew Dillon }
96*8a84c799SMatthew Dillon
97*8a84c799SMatthew Dillon x = r1 - r2;
98*8a84c799SMatthew Dillon if (x)
99*8a84c799SMatthew Dillon bcopy(ibuf + r2, ibuf, x);
100*8a84c799SMatthew Dillon }
101*8a84c799SMatthew Dillon }
102