1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Scanf/printf implementation for use in *Sanitizer interceptors. 10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html 12// with a few common GNU extensions. 13// 14//===----------------------------------------------------------------------===// 15 16#include <stdarg.h> 17 18static const char *parse_number(const char *p, int *out) { 19 *out = internal_atoll(p); 20 while (*p >= '0' && *p <= '9') 21 ++p; 22 return p; 23} 24 25static const char *maybe_parse_param_index(const char *p, int *out) { 26 // n$ 27 if (*p >= '0' && *p <= '9') { 28 int number; 29 const char *q = parse_number(p, &number); 30 CHECK(q); 31 if (*q == '$') { 32 *out = number; 33 p = q + 1; 34 } 35 } 36 37 // Otherwise, do not change p. This will be re-parsed later as the field 38 // width. 39 return p; 40} 41 42static bool char_is_one_of(char c, const char *s) { 43 return !!internal_strchr(s, c); 44} 45 46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { 47 if (char_is_one_of(*p, "jztLq")) { 48 ll[0] = *p; 49 ++p; 50 } else if (*p == 'h') { 51 ll[0] = 'h'; 52 ++p; 53 if (*p == 'h') { 54 ll[1] = 'h'; 55 ++p; 56 } 57 } else if (*p == 'l') { 58 ll[0] = 'l'; 59 ++p; 60 if (*p == 'l') { 61 ll[1] = 'l'; 62 ++p; 63 } 64 } 65 return p; 66} 67 68// Returns true if the character is an integer conversion specifier. 69static bool format_is_integer_conv(char c) { 70 return char_is_one_of(c, "diouxXn"); 71} 72 73// Returns true if the character is an floating point conversion specifier. 74static bool format_is_float_conv(char c) { 75 return char_is_one_of(c, "aAeEfFgG"); 76} 77 78// Returns string output character size for string-like conversions, 79// or 0 if the conversion is invalid. 80static int format_get_char_size(char convSpecifier, 81 const char lengthModifier[2]) { 82 if (char_is_one_of(convSpecifier, "CS")) { 83 return sizeof(wchar_t); 84 } 85 86 if (char_is_one_of(convSpecifier, "cs[")) { 87 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') 88 return sizeof(wchar_t); 89 else if (lengthModifier[0] == '\0') 90 return sizeof(char); 91 } 92 93 return 0; 94} 95 96enum FormatStoreSize { 97 // Store size not known in advance; can be calculated as wcslen() of the 98 // destination buffer. 99 FSS_WCSLEN = -2, 100 // Store size not known in advance; can be calculated as strlen() of the 101 // destination buffer. 102 FSS_STRLEN = -1, 103 // Invalid conversion specifier. 104 FSS_INVALID = 0 105}; 106 107// Returns the memory size of a format directive (if >0), or a value of 108// FormatStoreSize. 109static int format_get_value_size(char convSpecifier, 110 const char lengthModifier[2], 111 bool promote_float) { 112 if (format_is_integer_conv(convSpecifier)) { 113 switch (lengthModifier[0]) { 114 case 'h': 115 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 116 case 'l': 117 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 118 case 'q': 119 return sizeof(long long); 120 case 'L': 121 return sizeof(long long); 122 case 'j': 123 return sizeof(INTMAX_T); 124 case 'z': 125 return sizeof(SIZE_T); 126 case 't': 127 return sizeof(PTRDIFF_T); 128 case 0: 129 return sizeof(int); 130 default: 131 return FSS_INVALID; 132 } 133 } 134 135 if (format_is_float_conv(convSpecifier)) { 136 switch (lengthModifier[0]) { 137 case 'L': 138 case 'q': 139 return sizeof(long double); 140 case 'l': 141 return lengthModifier[1] == 'l' ? sizeof(long double) 142 : sizeof(double); 143 case 0: 144 // Printf promotes floats to doubles but scanf does not 145 return promote_float ? sizeof(double) : sizeof(float); 146 default: 147 return FSS_INVALID; 148 } 149 } 150 151 if (convSpecifier == 'p') { 152 if (lengthModifier[0] != 0) 153 return FSS_INVALID; 154 return sizeof(void *); 155 } 156 157 return FSS_INVALID; 158} 159 160struct ScanfDirective { 161 int argIdx; // argument index, or -1 if not specified ("%n$") 162 int fieldWidth; 163 const char *begin; 164 const char *end; 165 bool suppressed; // suppress assignment ("*") 166 bool allocate; // allocate space ("m") 167 char lengthModifier[2]; 168 char convSpecifier; 169 bool maybeGnuMalloc; 170}; 171 172// Parse scanf format string. If a valid directive in encountered, it is 173// returned in dir. This function returns the pointer to the first 174// unprocessed character, or 0 in case of error. 175// In case of the end-of-string, a pointer to the closing \0 is returned. 176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 177 ScanfDirective *dir) { 178 internal_memset(dir, 0, sizeof(*dir)); 179 dir->argIdx = -1; 180 181 while (*p) { 182 if (*p != '%') { 183 ++p; 184 continue; 185 } 186 dir->begin = p; 187 ++p; 188 // %% 189 if (*p == '%') { 190 ++p; 191 continue; 192 } 193 if (*p == '\0') { 194 return nullptr; 195 } 196 // %n$ 197 p = maybe_parse_param_index(p, &dir->argIdx); 198 CHECK(p); 199 // * 200 if (*p == '*') { 201 dir->suppressed = true; 202 ++p; 203 } 204 // Field width 205 if (*p >= '0' && *p <= '9') { 206 p = parse_number(p, &dir->fieldWidth); 207 CHECK(p); 208 if (dir->fieldWidth <= 0) // Width if at all must be non-zero 209 return nullptr; 210 } 211 // m 212 if (*p == 'm') { 213 dir->allocate = true; 214 ++p; 215 } 216 // Length modifier. 217 p = maybe_parse_length_modifier(p, dir->lengthModifier); 218 // Conversion specifier. 219 dir->convSpecifier = *p++; 220 // Consume %[...] expression. 221 if (dir->convSpecifier == '[') { 222 if (*p == '^') 223 ++p; 224 if (*p == ']') 225 ++p; 226 while (*p && *p != ']') 227 ++p; 228 if (*p == 0) 229 return nullptr; // unexpected end of string 230 // Consume the closing ']'. 231 ++p; 232 } 233 // This is unfortunately ambiguous between old GNU extension 234 // of %as, %aS and %a[...] and newer POSIX %a followed by 235 // letters s, S or [. 236 if (allowGnuMalloc && dir->convSpecifier == 'a' && 237 !dir->lengthModifier[0]) { 238 if (*p == 's' || *p == 'S') { 239 dir->maybeGnuMalloc = true; 240 ++p; 241 } else if (*p == '[') { 242 // Watch for %a[h-j%d], if % appears in the 243 // [...] range, then we need to give up, we don't know 244 // if scanf will parse it as POSIX %a [h-j %d ] or 245 // GNU allocation of string with range dh-j plus %. 246 const char *q = p + 1; 247 if (*q == '^') 248 ++q; 249 if (*q == ']') 250 ++q; 251 while (*q && *q != ']' && *q != '%') 252 ++q; 253 if (*q == 0 || *q == '%') 254 return nullptr; 255 p = q + 1; // Consume the closing ']'. 256 dir->maybeGnuMalloc = true; 257 } 258 } 259 dir->end = p; 260 break; 261 } 262 return p; 263} 264 265static int scanf_get_value_size(ScanfDirective *dir) { 266 if (dir->allocate) { 267 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 268 return FSS_INVALID; 269 return sizeof(char *); 270 } 271 272 if (dir->maybeGnuMalloc) { 273 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 274 return FSS_INVALID; 275 // This is ambiguous, so check the smaller size of char * (if it is 276 // a GNU extension of %as, %aS or %a[...]) and float (if it is 277 // POSIX %a followed by s, S or [ letters). 278 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 279 } 280 281 if (char_is_one_of(dir->convSpecifier, "cCsS[")) { 282 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); 283 unsigned charSize = 284 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 285 if (charSize == 0) 286 return FSS_INVALID; 287 if (dir->fieldWidth == 0) { 288 if (!needsTerminator) 289 return charSize; 290 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 291 } 292 return (dir->fieldWidth + needsTerminator) * charSize; 293 } 294 295 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); 296} 297 298// Common part of *scanf interceptors. 299// Process format string and va_list, and report all store ranges. 300// Stops when "consuming" n_inputs input items. 301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 302 const char *format, va_list aq) { 303 CHECK_GT(n_inputs, 0); 304 const char *p = format; 305 306 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 307 308 while (*p) { 309 ScanfDirective dir; 310 p = scanf_parse_next(p, allowGnuMalloc, &dir); 311 if (!p) 312 break; 313 if (dir.convSpecifier == 0) { 314 // This can only happen at the end of the format string. 315 CHECK_EQ(*p, 0); 316 break; 317 } 318 // Here the directive is valid. Do what it says. 319 if (dir.argIdx != -1) { 320 // Unsupported. 321 break; 322 } 323 if (dir.suppressed) 324 continue; 325 int size = scanf_get_value_size(&dir); 326 if (size == FSS_INVALID) { 327 Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n", 328 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); 329 break; 330 } 331 void *argp = va_arg(aq, void *); 332 if (dir.convSpecifier != 'n') 333 --n_inputs; 334 if (n_inputs < 0) 335 break; 336 if (size == FSS_STRLEN) { 337 size = internal_strlen((const char *)argp) + 1; 338 } else if (size == FSS_WCSLEN) { 339 // FIXME: actually use wcslen() to calculate it. 340 size = 0; 341 } 342 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 343 // For %ms/%mc, write the allocated output buffer as well. 344 if (dir.allocate) { 345 char *buf = *(char **)argp; 346 if (buf) 347 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1); 348 } 349 } 350} 351 352#if SANITIZER_INTERCEPT_PRINTF 353 354struct PrintfDirective { 355 int fieldWidth; 356 int fieldPrecision; 357 int argIdx; // width argument index, or -1 if not specified ("%*n$") 358 int precisionIdx; // precision argument index, or -1 if not specified (".*n$") 359 const char *begin; 360 const char *end; 361 bool starredWidth; 362 bool starredPrecision; 363 char lengthModifier[2]; 364 char convSpecifier; 365}; 366 367static const char *maybe_parse_number(const char *p, int *out) { 368 if (*p >= '0' && *p <= '9') 369 p = parse_number(p, out); 370 return p; 371} 372 373static const char *maybe_parse_number_or_star(const char *p, int *out, 374 bool *star) { 375 if (*p == '*') { 376 *star = true; 377 ++p; 378 } else { 379 *star = false; 380 p = maybe_parse_number(p, out); 381 } 382 return p; 383} 384 385// Parse printf format string. Same as scanf_parse_next. 386static const char *printf_parse_next(const char *p, PrintfDirective *dir) { 387 internal_memset(dir, 0, sizeof(*dir)); 388 dir->argIdx = -1; 389 dir->precisionIdx = -1; 390 391 while (*p) { 392 if (*p != '%') { 393 ++p; 394 continue; 395 } 396 dir->begin = p; 397 ++p; 398 // %% 399 if (*p == '%') { 400 ++p; 401 continue; 402 } 403 if (*p == '\0') { 404 return nullptr; 405 } 406 // %n$ 407 p = maybe_parse_param_index(p, &dir->precisionIdx); 408 CHECK(p); 409 // Flags 410 while (char_is_one_of(*p, "'-+ #0")) { 411 ++p; 412 } 413 // Field width 414 p = maybe_parse_number_or_star(p, &dir->fieldWidth, 415 &dir->starredWidth); 416 if (!p) 417 return nullptr; 418 // Precision 419 if (*p == '.') { 420 ++p; 421 // Actual precision is optional (surprise!) 422 p = maybe_parse_number_or_star(p, &dir->fieldPrecision, 423 &dir->starredPrecision); 424 if (!p) 425 return nullptr; 426 // m$ 427 if (dir->starredPrecision) { 428 p = maybe_parse_param_index(p, &dir->precisionIdx); 429 CHECK(p); 430 } 431 } 432 // Length modifier. 433 p = maybe_parse_length_modifier(p, dir->lengthModifier); 434 // Conversion specifier. 435 dir->convSpecifier = *p++; 436 dir->end = p; 437 break; 438 } 439 return p; 440} 441 442static int printf_get_value_size(PrintfDirective *dir) { 443 if (char_is_one_of(dir->convSpecifier, "cCsS")) { 444 unsigned charSize = 445 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 446 if (charSize == 0) 447 return FSS_INVALID; 448 if (char_is_one_of(dir->convSpecifier, "sS")) { 449 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 450 } 451 return charSize; 452 } 453 454 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); 455} 456 457#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ 458 do { \ 459 if (format_is_float_conv(convSpecifier)) { \ 460 switch (size) { \ 461 case 8: \ 462 va_arg(*aq, double); \ 463 break; \ 464 case 12: \ 465 va_arg(*aq, long double); \ 466 break; \ 467 case 16: \ 468 va_arg(*aq, long double); \ 469 break; \ 470 default: \ 471 Report("WARNING: unexpected floating-point arg size" \ 472 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ 473 return; \ 474 } \ 475 } else { \ 476 switch (size) { \ 477 case 1: \ 478 case 2: \ 479 case 4: \ 480 va_arg(*aq, u32); \ 481 break; \ 482 case 8: \ 483 va_arg(*aq, u64); \ 484 break; \ 485 default: \ 486 Report("WARNING: unexpected arg size" \ 487 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ 488 return; \ 489 } \ 490 } \ 491 } while (0) 492 493// Common part of *printf interceptors. 494// Process format string and va_list, and report all load ranges. 495static void printf_common(void *ctx, const char *format, va_list aq) { 496 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 497 498 const char *p = format; 499 500 while (*p) { 501 PrintfDirective dir; 502 p = printf_parse_next(p, &dir); 503 if (!p) 504 break; 505 if (dir.convSpecifier == 0) { 506 // This can only happen at the end of the format string. 507 CHECK_EQ(*p, 0); 508 break; 509 } 510 // Here the directive is valid. Do what it says. 511 if (dir.argIdx != -1 || dir.precisionIdx != -1) { 512 // Unsupported. 513 break; 514 } 515 if (dir.starredWidth) { 516 // Dynamic width 517 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 518 } 519 if (dir.starredPrecision) { 520 // Dynamic precision 521 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 522 } 523 // %m does not require an argument: strlen(errno). 524 if (dir.convSpecifier == 'm') 525 continue; 526 int size = printf_get_value_size(&dir); 527 if (size == FSS_INVALID) { 528 static int ReportedOnce; 529 if (!ReportedOnce++) 530 Report( 531 "%s: WARNING: unexpected format specifier in printf " 532 "interceptor: %.*s (reported once per process)\n", 533 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); 534 break; 535 } 536 if (dir.convSpecifier == 'n') { 537 void *argp = va_arg(aq, void *); 538 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 539 continue; 540 } else if (size == FSS_STRLEN) { 541 if (void *argp = va_arg(aq, void *)) { 542 if (dir.starredPrecision) { 543 // FIXME: properly support starred precision for strings. 544 size = 0; 545 } else if (dir.fieldPrecision > 0) { 546 // Won't read more than "precision" symbols. 547 size = internal_strnlen((const char *)argp, dir.fieldPrecision); 548 if (size < dir.fieldPrecision) size++; 549 } else { 550 // Whole string will be accessed. 551 size = internal_strlen((const char *)argp) + 1; 552 } 553 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 554 } 555 } else if (size == FSS_WCSLEN) { 556 if (void *argp = va_arg(aq, void *)) { 557 // FIXME: Properly support wide-character strings (via wcsrtombs). 558 size = 0; 559 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 560 } 561 } else { 562 // Skip non-pointer args 563 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); 564 } 565 } 566} 567 568#endif // SANITIZER_INTERCEPT_PRINTF 569