1 /* $OpenBSD: string.c,v 1.2 2020/07/14 16:40:04 kettenis Exp $ */ 2 /* 3 * Copyright (c) 2020 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Test the %c, %lc, %s, and %ls conversion specifiers with all their 18 * modifiers, in particular with the minus flag, width, and maxbytes. 19 * Also verify that other flags do nothing useful. 20 */ 21 #include <err.h> 22 #include <errno.h> 23 #include <locale.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <unistd.h> 28 #include <wchar.h> 29 30 void tc(const char *, int, const char *); 31 void tlc(const char *, wint_t, const char *); 32 void tlc_expect_fail(const char *, wint_t); 33 void ts(const char *, const char *, const char *); 34 void tls(const char *, const wchar_t *, const char *); 35 void tls_expect_fail(const char *, const wchar_t *); 36 37 static int badret, badlen, badout; /* Error counters. */ 38 static int verbose; /* For debugging. */ 39 40 41 /* 42 * Print the single-byte character c with the format fmt, 43 * check that the result matches what we want, 44 * and report and count the error on failure. 45 */ 46 void 47 tc(const char *fmt, int c, const char *want) 48 { 49 char buf[32]; 50 size_t len; 51 int irc, happy; 52 53 happy = 1; 54 irc = snprintf(buf, sizeof(buf), fmt, c); 55 len = strlen(want); 56 if (irc < 0) { 57 warn("printf(\"%s\", %d) returned %d", fmt, c, irc); 58 badret++; 59 return; 60 } 61 if ((unsigned long long)irc != len) { 62 warnx("printf(\"%s\", %d) returned %d (expected %zu)", 63 fmt, c, irc, len); 64 badlen++; 65 happy = 0; 66 } 67 if (strcmp(buf, want) != 0) { 68 warnx("printf(\"%s\", %d) wrote \"%s\" (expected \"%s\")", 69 fmt, c, buf, want); 70 badout++; 71 happy = 0; 72 } 73 if (verbose && happy) 74 warnx("printf(\"%s\", %d) wrote \"%s\" length %d (OK)", 75 fmt, c, buf, irc); 76 } 77 78 /* 79 * Print the wide character wc with the format fmt, 80 * check that the result matches what we want, 81 * and report and count the error on failure. 82 */ 83 void 84 tlc(const char *fmt, wint_t wc, const char *want) 85 { 86 char buf[32]; 87 const char *charset; 88 size_t len; 89 int irc, happy; 90 91 happy = 1; 92 charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII"; 93 irc = snprintf(buf, sizeof(buf), fmt, wc); 94 len = strlen(want); 95 if (irc < 0) { 96 warn("%s printf(\"%s\", U+%.4X) returned %d", 97 charset, fmt, (unsigned int)wc, irc); 98 badret++; 99 return; 100 } 101 if ((unsigned long long)irc != len) { 102 warnx("%s printf(\"%s\", U+%.4X) returned %d (expected %zu)", 103 charset, fmt, (unsigned int)wc, irc, len); 104 badlen++; 105 happy = 0; 106 } 107 if (strcmp(buf, want) != 0) { 108 warnx("%s printf(\"%s\", U+%.4X) " 109 "wrote \"%s\" (expected \"%s\")", 110 charset, fmt, (unsigned int)wc, buf, want); 111 badout++; 112 happy = 0; 113 } 114 if (verbose && happy) 115 warnx("%s printf(\"%s\", U+%.4X) wrote \"%s\" length %d (OK)", 116 charset, fmt, (unsigned int)wc, buf, irc); 117 } 118 119 /* 120 * Try to print the invalid wide character wc with the format fmt, 121 * check that it fails as it should, and report and count if it doesn't. 122 */ 123 void 124 tlc_expect_fail(const char *fmt, wint_t wc) 125 { 126 char buf[32]; 127 const char *charset; 128 int irc; 129 130 errno = 0; 131 charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII"; 132 irc = snprintf(buf, sizeof(buf), fmt, wc); 133 if (irc != -1) { 134 warn("%s printf(\"%s\", U+%.4X) returned %d", 135 charset, fmt, (unsigned int)wc, irc); 136 badret++; 137 } else if (errno != EILSEQ) { 138 warnx("%s printf(\"%s\", U+%.4X) errno %d (expected %d)", 139 charset, fmt, (unsigned int)wc, errno, EILSEQ); 140 badret++; 141 } else if (verbose) 142 warnx("%s printf(\"%s\", U+%.4X) returned %d errno %d (OK)", 143 charset, fmt, (unsigned int)wc, irc, errno); 144 } 145 146 /* 147 * Print the string s with the format fmt, 148 * check that the result matches what we want, 149 * and report and count the error on failure. 150 */ 151 void 152 ts(const char *fmt, const char *s, const char *want) 153 { 154 char buf[32]; 155 size_t len; 156 int irc, happy; 157 158 happy = 1; 159 irc = snprintf(buf, sizeof(buf), fmt, s); 160 len = strlen(want); 161 if (irc < 0) { 162 warn("printf(\"%s\", \"%s\") returned %d", fmt, s, irc); 163 badret++; 164 return; 165 } 166 if ((unsigned long long)irc != len) { 167 warnx("printf(\"%s\", \"%s\") returned %d (expected %zu)", 168 fmt, s, irc, len); 169 badlen++; 170 happy = 0; 171 } 172 if (strcmp(buf, want) != 0) { 173 warnx("printf(\"%s\", \"%s\") wrote \"%s\" (expected \"%s\")", 174 fmt, s, buf, want); 175 badout++; 176 happy = 0; 177 } 178 if (verbose && happy) 179 warnx("printf(\"%s\", \"%s\") wrote \"%s\" length %d (OK)", 180 fmt, s, buf, irc); 181 } 182 183 /* 184 * Print the wide character string ws with the format fmt, 185 * check that the result matches what we want, 186 * and report and count the error on failure. 187 */ 188 void 189 tls(const char *fmt, const wchar_t *ws, const char *want) 190 { 191 char buf[32]; 192 const char *charset; 193 size_t len; 194 int irc, happy; 195 196 happy = 1; 197 charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII"; 198 irc = snprintf(buf, sizeof(buf), fmt, ws); 199 len = strlen(want); 200 if (irc < 0) { 201 warn("%s printf(\"%s\", \"%ls\") returned %d", 202 charset, fmt, ws, irc); 203 badret++; 204 return; 205 } 206 if ((unsigned long long)irc != len) { 207 warnx("%s printf(\"%s\", \"%ls\") returned %d (expected %zu)", 208 charset, fmt, ws, irc, len); 209 badlen++; 210 happy = 0; 211 } 212 if (strcmp(buf, want) != 0) { 213 warnx("%s printf(\"%s\", \"%ls\") " 214 "wrote \"%s\" (expected \"%s\")", 215 charset, fmt, ws, buf, want); 216 badout++; 217 happy = 0; 218 } 219 if (verbose && happy) 220 warnx("%s printf(\"%s\", \"%ls\") wrote \"%s\" length %d (OK)", 221 charset, fmt, ws, buf, irc); 222 } 223 224 /* 225 * Try to print the invalid wide character string ws with the format fmt, 226 * check that it fails as it should, and report and count if it doesn't. 227 */ 228 void 229 tls_expect_fail(const char *fmt, const wchar_t *ws) 230 { 231 char buf[32]; 232 const char *charset; 233 int irc; 234 235 errno = 0; 236 charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII"; 237 irc = snprintf(buf, sizeof(buf), fmt, ws); 238 if (irc != -1) { 239 warn("%s printf(\"%s\", U+%.4X, ...) returned %d", 240 charset, fmt, (unsigned int)*ws, irc); 241 badret++; 242 } else if (errno != EILSEQ) { 243 warnx("%s printf(\"%s\", U+%.4X, ...) errno %d (expected %d)", 244 charset, fmt, (unsigned int)*ws, errno, EILSEQ); 245 badret++; 246 } else if (verbose) 247 warnx("%s printf(\"%s\", U+%.4X, ...) " 248 "returned %d errno %d (OK)", 249 charset, fmt, (unsigned int)*ws, irc, errno); 250 } 251 252 int 253 main(int argc, char *argv[]) 254 { 255 const wchar_t ws[] = { 0x0421, 0x043e, 0x0444, 0x044f, 0 }; 256 const wchar_t wsbad[] = { 0x0391, 0xdeef, 0x3c9, 0 }; 257 int badarg, picky; 258 int ch; 259 260 badarg = picky = 0; 261 while ((ch = getopt(argc, argv, "pv")) != -1) { 262 switch (ch) { 263 case 'p': 264 picky = 1; 265 break; 266 case 'v': 267 verbose = 1; 268 break; 269 default: 270 badarg = 1; 271 break; 272 } 273 } 274 argc -= optind; 275 argv += optind; 276 if (argc > 0) { 277 warnx("unexpected argument \"%s\"", *argv); 278 badarg = 1; 279 } 280 if (badarg) { 281 fputs("usage: string [-pv]\n", stderr); 282 return 1; 283 } 284 285 /* 286 * Valid use cases of %c and %s. 287 */ 288 289 tc("<%c>", '=', "<=>"); 290 tc("<%c>", '\t', "<\t>"); 291 tc("<%c>", 0xfe, "<\xfe>"); 292 tc("<%-c>", '=', "<=>"); 293 tc("<%2c>", '=', "< =>"); 294 tc("<%-2c>", '=', "<= >"); 295 296 ts("<%s>", "text", "<text>"); 297 ts("<%-s>", "text", "<text>"); 298 ts("<%6s>", "text", "< text>"); 299 ts("<%-6s>", "text", "<text >"); 300 ts("<%.2s>", "text", "<te>"); 301 ts("<%4.2s>", "text", "< te>"); 302 ts("<%-4.2s>", "text", "<te >"); 303 304 /* 305 * Undefined behaviour of %c and %s. 306 * Do not test by default to avoid noise. 307 * But provide the tests anyway to help track down 308 * unintended changes of behaviour when needed. 309 */ 310 311 if (picky) { 312 tc("<%#c>", '=', "<=>"); 313 tc("<% -3c>", '=', "<= >"); 314 tc("<%+-3c>", '=', "<= >"); 315 tc("<%03c>", '=', "<00=>"); 316 tc("<%-03c>", '=', "<= >"); 317 tc("<%3.2c>", '=', "< =>"); 318 tc("<%hc>", '=', "<=>"); 319 320 ts("<%#s>", "text", "<text>"); 321 ts("<% -6s>", "text", "<text >"); 322 ts("<%+-6s>", "text", "<text >"); 323 ts("<%06s>", "text", "<00text>"); 324 ts("<%-06s>", "text", "<text >"); 325 ts("<%hs>", "text", "<text>"); 326 } 327 328 /* 329 * Valid use cases of %lc and %ls in the POSIX locale. 330 */ 331 332 tlc("<%lc>", L'=', "<=>"); 333 tlc("<%lc>", L'\t', "<\t>"); 334 tlc_expect_fail("<%lc>", 0x03c0); 335 tlc("<%-lc>", L'=', "<=>"); 336 tlc("<%2lc>", L'=', "< =>"); 337 tlc("<%-2lc>", L'=', "<= >"); 338 339 tls("<%ls>", L"text", "<text>"); 340 tls_expect_fail("<%ls>", ws); 341 tls_expect_fail("<%ls>", wsbad); 342 tls("<%-ls>", L"text", "<text>"); 343 tls("<%6ls>", L"text", "< text>"); 344 tls("<%-6ls>", L"text", "<text >"); 345 tls("<%.2ls>", L"text", "<te>"); 346 tls("<%4.2ls>", L"text", "< te>"); 347 tls("<%-4.2ls>", L"text", "<te >"); 348 349 /* 350 * Undefined behaviour of %lc and %ls in the POSIX locale. 351 */ 352 353 if (picky) { 354 tlc("<%lc>", 0x00fe, "<\xfe>"); 355 tlc("<%#lc>", L'=', "<=>"); 356 tlc("<% -3lc>", L'=', "<= >"); 357 tlc("<%+-3lc>", L'=', "<= >"); 358 tlc("<%03lc>", L'=', "<00=>"); 359 tlc("<%-03lc>", L'=', "<= >"); 360 tlc("<%3.2lc>", L'=', "< =>"); 361 tc("<%llc>", '=', "<=>"); 362 363 tls("<%#ls>", L"text", "<text>"); 364 tls("<% -6ls>", L"text", "<text >"); 365 tls("<%+-6ls>", L"text", "<text >"); 366 tls("<%06ls>", L"text", "<00text>"); 367 tls("<%-06ls>", L"text", "<text >"); 368 ts("<%lls>", "text", "<text>"); 369 } 370 371 /* 372 * Valid use cases of %lc and %ls in a UTF-8 locale. 373 */ 374 375 if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) 376 err(1, "setlocale"); 377 378 tlc("<%lc>", L'=', "<=>"); 379 tlc("<%lc>", L'\t', "<\t>"); 380 tlc("<%lc>", 0x00fe, "<\xc3\xbe>"); 381 tlc("<%lc>", 0x03c0, "<\xcf\x80>"); 382 tlc_expect_fail("<%lc>", 0x123456); 383 tlc("<%-lc>", L'=', "<=>"); 384 tlc("<%-lc>", 0x03c0, "<\xcf\x80>"); 385 tlc("<%2lc>", L'=', "< =>"); 386 tlc("<%3lc>", 0x03c0, "< \xcf\x80>"); 387 tlc("<%-2lc>", L'=', "<= >"); 388 tlc("<%-3lc>", 0x03c0, "<\xcf\x80 >"); 389 390 tls("<%ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>"); 391 tls_expect_fail("<%ls>", wsbad); 392 tls("<%-ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>"); 393 tls("<%9ls>", ws, "< \xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>"); 394 tls("<%-9ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f >"); 395 tls("<%.4ls>", ws, "<\xd0\xa1\xd0\xbe>"); 396 tls("<%.3ls>", ws, "<\xd0\xa1>"); 397 tls("<%6.4ls>", ws, "< \xd0\xa1\xd0\xbe>"); 398 tls("<%3.3ls>", ws, "< \xd0\xa1>"); 399 tls("<%-6.4ls>", ws, "<\xd0\xa1\xd0\xbe >"); 400 tls("<%-3.3ls>", ws, "<\xd0\xa1 >"); 401 402 /* 403 * Undefined behaviour of %lc and %ls in a UTF-8 locale. 404 */ 405 406 if (picky) { 407 tlc("<%#lc>", 0x03c0, "<\xcf\x80>"); 408 tlc("<% -4lc>", 0x03c0, "<\xcf\x80 >"); 409 tlc("<%+-4lc>", 0x03c0, "<\xcf\x80 >"); 410 tlc("<%04lc>", 0x03c0, "<00\xcf\x80>"); 411 tlc("<%-04lc>", 0x03c0, "<\xcf\x80 >"); 412 tlc("<%4.5lc>", 0x03c0, "< \xcf\x80>"); 413 tlc("<%4.3lc>", 0x03c0, "< \xcf\x80>"); 414 tlc("<%4.1lc>", 0x03c0, "< \xcf\x80>"); 415 tc("<%llc>", 0xfe, "<\xfe>"); 416 417 tls("<%#ls>", ws + 2, "<\xd1\x84\xd1\x8f>"); 418 tls("<% -6ls>", ws + 2, "<\xd1\x84\xd1\x8f >"); 419 tls("<%+-6ls>", ws + 2, "<\xd1\x84\xd1\x8f >"); 420 tls("<%06ls>", ws + 2, "<00\xd1\x84\xd1\x8f>"); 421 tls("<%-06ls>", ws + 2, "<\xd1\x84\xd1\x8f >"); 422 ts("<%lls>", "text", "<text>"); 423 } 424 425 /* 426 * Summarize the results. 427 */ 428 429 if (badret + badlen + badout) 430 errx(1, "ERRORS: %d fail + %d mismatch (incl. %d bad length)", 431 badret, badout, badlen); 432 else if (verbose) 433 warnx("SUCCESS"); 434 return 0; 435 } 436