xref: /openbsd/regress/lib/libc/printf/string.c (revision 5dea098c)
1 /* $OpenBSD: string.c,v 1.2 2020/07/14 16:40:04 kettenis Exp $ */
2 /*
3  * Copyright (c) 2020 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Test the %c, %lc, %s, and %ls conversion specifiers with all their
18  * modifiers, in particular with the minus flag, width, and maxbytes.
19  * Also verify that other flags do nothing useful.
20  */
21 #include <err.h>
22 #include <errno.h>
23 #include <locale.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <wchar.h>
29 
30 void	 tc(const char *, int, const char *);
31 void	 tlc(const char *, wint_t, const char *);
32 void	 tlc_expect_fail(const char *, wint_t);
33 void	 ts(const char *, const char *, const char *);
34 void	 tls(const char *, const wchar_t *, const char *);
35 void	 tls_expect_fail(const char *, const wchar_t *);
36 
37 static int	 badret, badlen, badout;	/* Error counters. */
38 static int	 verbose;			/* For debugging. */
39 
40 
41 /*
42  * Print the single-byte character c with the format fmt,
43  * check that the result matches what we want,
44  * and report and count the error on failure.
45  */
46 void
47 tc(const char *fmt, int c, const char *want)
48 {
49 	char		 buf[32];
50 	size_t		 len;
51 	int		 irc, happy;
52 
53 	happy = 1;
54 	irc = snprintf(buf, sizeof(buf), fmt, c);
55 	len = strlen(want);
56 	if (irc < 0) {
57 		warn("printf(\"%s\", %d) returned %d", fmt, c, irc);
58 		badret++;
59 		return;
60 	}
61 	if ((unsigned long long)irc != len) {
62 		warnx("printf(\"%s\", %d) returned %d (expected %zu)",
63 		    fmt, c, irc, len);
64 		badlen++;
65 		happy = 0;
66 	}
67 	if (strcmp(buf, want) != 0) {
68 		warnx("printf(\"%s\", %d) wrote \"%s\" (expected \"%s\")",
69 		    fmt, c, buf, want);
70 		badout++;
71 		happy = 0;
72 	}
73 	if (verbose && happy)
74 		warnx("printf(\"%s\", %d) wrote \"%s\" length %d (OK)",
75 		    fmt, c, buf, irc);
76 }
77 
78 /*
79  * Print the wide character wc with the format fmt,
80  * check that the result matches what we want,
81  * and report and count the error on failure.
82  */
83 void
84 tlc(const char *fmt, wint_t wc, const char *want)
85 {
86 	char		 buf[32];
87 	const char	*charset;
88 	size_t		 len;
89 	int		 irc, happy;
90 
91 	happy = 1;
92 	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
93 	irc = snprintf(buf, sizeof(buf), fmt, wc);
94 	len = strlen(want);
95 	if (irc < 0) {
96 		warn("%s printf(\"%s\", U+%.4X) returned %d",
97 		    charset, fmt, (unsigned int)wc, irc);
98 		badret++;
99 		return;
100 	}
101 	if ((unsigned long long)irc != len) {
102 		warnx("%s printf(\"%s\", U+%.4X) returned %d (expected %zu)",
103 		    charset, fmt, (unsigned int)wc, irc, len);
104 		badlen++;
105 		happy = 0;
106 	}
107 	if (strcmp(buf, want) != 0) {
108 		warnx("%s printf(\"%s\", U+%.4X) "
109 		    "wrote \"%s\" (expected \"%s\")",
110 		    charset, fmt, (unsigned int)wc, buf, want);
111 		badout++;
112 		happy = 0;
113 	}
114 	if (verbose && happy)
115 		warnx("%s printf(\"%s\", U+%.4X) wrote \"%s\" length %d (OK)",
116 		    charset, fmt, (unsigned int)wc, buf, irc);
117 }
118 
119 /*
120  * Try to print the invalid wide character wc with the format fmt,
121  * check that it fails as it should, and report and count if it doesn't.
122  */
123 void
124 tlc_expect_fail(const char *fmt, wint_t wc)
125 {
126 	char		 buf[32];
127 	const char	*charset;
128 	int		 irc;
129 
130 	errno = 0;
131 	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
132 	irc = snprintf(buf, sizeof(buf), fmt, wc);
133 	if (irc != -1) {
134 		warn("%s printf(\"%s\", U+%.4X) returned %d",
135 		    charset, fmt, (unsigned int)wc, irc);
136 		badret++;
137 	} else if (errno != EILSEQ) {
138 		warnx("%s printf(\"%s\", U+%.4X) errno %d (expected %d)",
139 		    charset, fmt, (unsigned int)wc, errno, EILSEQ);
140 		badret++;
141 	} else if (verbose)
142 		warnx("%s printf(\"%s\", U+%.4X) returned %d errno %d (OK)",
143 		    charset, fmt, (unsigned int)wc, irc, errno);
144 }
145 
146 /*
147  * Print the string s with the format fmt,
148  * check that the result matches what we want,
149  * and report and count the error on failure.
150  */
151 void
152 ts(const char *fmt, const char *s, const char *want)
153 {
154 	char		 buf[32];
155 	size_t		 len;
156 	int		 irc, happy;
157 
158 	happy = 1;
159 	irc = snprintf(buf, sizeof(buf), fmt, s);
160 	len = strlen(want);
161 	if (irc < 0) {
162 		warn("printf(\"%s\", \"%s\") returned %d", fmt, s, irc);
163 		badret++;
164 		return;
165 	}
166 	if ((unsigned long long)irc != len) {
167 		warnx("printf(\"%s\", \"%s\") returned %d (expected %zu)",
168 		    fmt, s, irc, len);
169 		badlen++;
170 		happy = 0;
171 	}
172 	if (strcmp(buf, want) != 0) {
173 		warnx("printf(\"%s\", \"%s\") wrote \"%s\" (expected \"%s\")",
174 		    fmt, s, buf, want);
175 		badout++;
176 		happy = 0;
177 	}
178 	if (verbose && happy)
179 		warnx("printf(\"%s\", \"%s\") wrote \"%s\" length %d (OK)",
180 		    fmt, s, buf, irc);
181 }
182 
183 /*
184  * Print the wide character string ws with the format fmt,
185  * check that the result matches what we want,
186  * and report and count the error on failure.
187  */
188 void
189 tls(const char *fmt, const wchar_t *ws, const char *want)
190 {
191 	char		 buf[32];
192 	const char	*charset;
193 	size_t		 len;
194 	int		 irc, happy;
195 
196 	happy = 1;
197 	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
198 	irc = snprintf(buf, sizeof(buf), fmt, ws);
199 	len = strlen(want);
200 	if (irc < 0) {
201 		warn("%s printf(\"%s\", \"%ls\") returned %d",
202 		    charset, fmt, ws, irc);
203 		badret++;
204 		return;
205 	}
206 	if ((unsigned long long)irc != len) {
207 		warnx("%s printf(\"%s\", \"%ls\") returned %d (expected %zu)",
208 		    charset, fmt, ws, irc, len);
209 		badlen++;
210 		happy = 0;
211 	}
212 	if (strcmp(buf, want) != 0) {
213 		warnx("%s printf(\"%s\", \"%ls\") "
214 		    "wrote \"%s\" (expected \"%s\")",
215 		    charset, fmt, ws, buf, want);
216 		badout++;
217 		happy = 0;
218 	}
219 	if (verbose && happy)
220 		warnx("%s printf(\"%s\", \"%ls\") wrote \"%s\" length %d (OK)",
221 		    charset, fmt, ws, buf, irc);
222 }
223 
224 /*
225  * Try to print the invalid wide character string ws with the format fmt,
226  * check that it fails as it should, and report and count if it doesn't.
227  */
228 void
229 tls_expect_fail(const char *fmt, const wchar_t *ws)
230 {
231 	char		 buf[32];
232 	const char	*charset;
233 	int		 irc;
234 
235 	errno = 0;
236 	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
237 	irc = snprintf(buf, sizeof(buf), fmt, ws);
238 	if (irc != -1) {
239 		warn("%s printf(\"%s\", U+%.4X, ...) returned %d",
240 		    charset, fmt, (unsigned int)*ws, irc);
241 		badret++;
242 	} else if (errno != EILSEQ) {
243 		warnx("%s printf(\"%s\", U+%.4X, ...) errno %d (expected %d)",
244 		    charset, fmt, (unsigned int)*ws, errno, EILSEQ);
245 		badret++;
246 	} else if (verbose)
247 		warnx("%s printf(\"%s\", U+%.4X, ...) "
248 		    "returned %d errno %d (OK)",
249 		    charset, fmt, (unsigned int)*ws, irc, errno);
250 }
251 
252 int
253 main(int argc, char *argv[])
254 {
255 	const wchar_t	 ws[] = { 0x0421, 0x043e, 0x0444, 0x044f, 0 };
256 	const wchar_t	 wsbad[] = { 0x0391, 0xdeef, 0x3c9, 0 };
257 	int		 badarg, picky;
258 	int		 ch;
259 
260 	badarg = picky = 0;
261 	while ((ch = getopt(argc, argv, "pv")) != -1) {
262 		switch (ch) {
263 		case 'p':
264 			picky = 1;
265 			break;
266 		case 'v':
267 			verbose = 1;
268 			break;
269 		default:
270 			badarg = 1;
271 			break;
272 		}
273 	}
274 	argc -= optind;
275 	argv += optind;
276 	if (argc > 0) {
277 		warnx("unexpected argument \"%s\"", *argv);
278 		badarg = 1;
279 	}
280 	if (badarg) {
281 		fputs("usage: string [-pv]\n", stderr);
282 		return 1;
283 	}
284 
285 	/*
286 	 * Valid use cases of %c and %s.
287 	 */
288 
289 	tc("<%c>", '=', "<=>");
290 	tc("<%c>", '\t', "<\t>");
291 	tc("<%c>", 0xfe, "<\xfe>");
292 	tc("<%-c>", '=', "<=>");
293 	tc("<%2c>", '=', "< =>");
294 	tc("<%-2c>", '=', "<= >");
295 
296 	ts("<%s>", "text", "<text>");
297 	ts("<%-s>", "text", "<text>");
298 	ts("<%6s>", "text", "<  text>");
299 	ts("<%-6s>", "text", "<text  >");
300 	ts("<%.2s>", "text", "<te>");
301 	ts("<%4.2s>", "text", "<  te>");
302 	ts("<%-4.2s>", "text", "<te  >");
303 
304 	/*
305 	 * Undefined behaviour of %c and %s.
306 	 * Do not test by default to avoid noise.
307 	 * But provide the tests anyway to help track down
308 	 * unintended changes of behaviour when needed.
309 	 */
310 
311 	if (picky) {
312 		tc("<%#c>", '=', "<=>");
313 		tc("<% -3c>", '=', "<=  >");
314 		tc("<%+-3c>", '=', "<=  >");
315 		tc("<%03c>", '=', "<00=>");
316 		tc("<%-03c>", '=', "<=  >");
317 		tc("<%3.2c>", '=', "<  =>");
318 		tc("<%hc>", '=', "<=>");
319 
320 		ts("<%#s>", "text", "<text>");
321 		ts("<% -6s>", "text", "<text  >");
322 		ts("<%+-6s>", "text", "<text  >");
323 		ts("<%06s>", "text", "<00text>");
324 		ts("<%-06s>", "text", "<text  >");
325 		ts("<%hs>", "text", "<text>");
326 	}
327 
328 	/*
329 	 * Valid use cases of %lc and %ls in the POSIX locale.
330 	 */
331 
332 	tlc("<%lc>", L'=', "<=>");
333 	tlc("<%lc>", L'\t', "<\t>");
334 	tlc_expect_fail("<%lc>", 0x03c0);
335 	tlc("<%-lc>", L'=', "<=>");
336 	tlc("<%2lc>", L'=', "< =>");
337 	tlc("<%-2lc>", L'=', "<= >");
338 
339 	tls("<%ls>", L"text", "<text>");
340 	tls_expect_fail("<%ls>", ws);
341 	tls_expect_fail("<%ls>", wsbad);
342 	tls("<%-ls>", L"text", "<text>");
343 	tls("<%6ls>", L"text", "<  text>");
344 	tls("<%-6ls>", L"text", "<text  >");
345 	tls("<%.2ls>", L"text", "<te>");
346 	tls("<%4.2ls>", L"text", "<  te>");
347 	tls("<%-4.2ls>", L"text", "<te  >");
348 
349 	/*
350 	 * Undefined behaviour of %lc and %ls in the POSIX locale.
351 	 */
352 
353 	if (picky) {
354 		tlc("<%lc>", 0x00fe, "<\xfe>");
355 		tlc("<%#lc>", L'=', "<=>");
356 		tlc("<% -3lc>", L'=', "<=  >");
357 		tlc("<%+-3lc>", L'=', "<=  >");
358 		tlc("<%03lc>", L'=', "<00=>");
359 		tlc("<%-03lc>", L'=', "<=  >");
360 		tlc("<%3.2lc>", L'=', "<  =>");
361 		tc("<%llc>", '=', "<=>");
362 
363 		tls("<%#ls>", L"text", "<text>");
364 		tls("<% -6ls>", L"text", "<text  >");
365 		tls("<%+-6ls>", L"text", "<text  >");
366 		tls("<%06ls>", L"text", "<00text>");
367 		tls("<%-06ls>", L"text", "<text  >");
368 		ts("<%lls>", "text", "<text>");
369 	}
370 
371 	/*
372 	 * Valid use cases of %lc and %ls in a UTF-8 locale.
373 	 */
374 
375 	if (setlocale(LC_CTYPE, "C.UTF-8") == NULL)
376 		err(1, "setlocale");
377 
378 	tlc("<%lc>", L'=', "<=>");
379 	tlc("<%lc>", L'\t', "<\t>");
380 	tlc("<%lc>", 0x00fe, "<\xc3\xbe>");
381 	tlc("<%lc>", 0x03c0, "<\xcf\x80>");
382 	tlc_expect_fail("<%lc>", 0x123456);
383 	tlc("<%-lc>", L'=', "<=>");
384 	tlc("<%-lc>", 0x03c0, "<\xcf\x80>");
385 	tlc("<%2lc>", L'=', "< =>");
386 	tlc("<%3lc>", 0x03c0, "< \xcf\x80>");
387 	tlc("<%-2lc>", L'=', "<= >");
388 	tlc("<%-3lc>", 0x03c0, "<\xcf\x80 >");
389 
390 	tls("<%ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>");
391 	tls_expect_fail("<%ls>", wsbad);
392 	tls("<%-ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>");
393 	tls("<%9ls>", ws, "< \xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>");
394 	tls("<%-9ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f >");
395 	tls("<%.4ls>", ws, "<\xd0\xa1\xd0\xbe>");
396 	tls("<%.3ls>", ws, "<\xd0\xa1>");
397 	tls("<%6.4ls>", ws, "<  \xd0\xa1\xd0\xbe>");
398 	tls("<%3.3ls>", ws, "< \xd0\xa1>");
399 	tls("<%-6.4ls>", ws, "<\xd0\xa1\xd0\xbe  >");
400 	tls("<%-3.3ls>", ws, "<\xd0\xa1 >");
401 
402 	/*
403 	 * Undefined behaviour of %lc and %ls in a UTF-8 locale.
404 	 */
405 
406 	if (picky) {
407 		tlc("<%#lc>", 0x03c0, "<\xcf\x80>");
408 		tlc("<% -4lc>", 0x03c0, "<\xcf\x80  >");
409 		tlc("<%+-4lc>", 0x03c0, "<\xcf\x80  >");
410 		tlc("<%04lc>", 0x03c0, "<00\xcf\x80>");
411 		tlc("<%-04lc>", 0x03c0, "<\xcf\x80  >");
412 		tlc("<%4.5lc>", 0x03c0, "<  \xcf\x80>");
413 		tlc("<%4.3lc>", 0x03c0, "<  \xcf\x80>");
414 		tlc("<%4.1lc>", 0x03c0, "<  \xcf\x80>");
415 		tc("<%llc>", 0xfe, "<\xfe>");
416 
417 		tls("<%#ls>", ws + 2, "<\xd1\x84\xd1\x8f>");
418 		tls("<% -6ls>", ws + 2, "<\xd1\x84\xd1\x8f  >");
419 		tls("<%+-6ls>", ws + 2, "<\xd1\x84\xd1\x8f  >");
420 		tls("<%06ls>", ws + 2, "<00\xd1\x84\xd1\x8f>");
421 		tls("<%-06ls>", ws + 2, "<\xd1\x84\xd1\x8f  >");
422 		ts("<%lls>", "text", "<text>");
423 	}
424 
425 	/*
426 	 * Summarize the results.
427 	 */
428 
429 	if (badret + badlen + badout)
430 		errx(1, "ERRORS: %d fail + %d mismatch (incl. %d bad length)",
431 		    badret, badout, badlen);
432 	else if (verbose)
433 		warnx("SUCCESS");
434 	return 0;
435 }
436