1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Scanf/printf implementation for use in *Sanitizer interceptors.
10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
12// with a few common GNU extensions.
13//
14//===----------------------------------------------------------------------===//
15
16#include <stdarg.h>
17
18static const char *parse_number(const char *p, int *out) {
19  *out = internal_atoll(p);
20  while (*p >= '0' && *p <= '9')
21    ++p;
22  return p;
23}
24
25static const char *maybe_parse_param_index(const char *p, int *out) {
26  // n$
27  if (*p >= '0' && *p <= '9') {
28    int number;
29    const char *q = parse_number(p, &number);
30    CHECK(q);
31    if (*q == '$') {
32      *out = number;
33      p = q + 1;
34    }
35  }
36
37  // Otherwise, do not change p. This will be re-parsed later as the field
38  // width.
39  return p;
40}
41
42static bool char_is_one_of(char c, const char *s) {
43  return !!internal_strchr(s, c);
44}
45
46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47  if (char_is_one_of(*p, "jztLq")) {
48    ll[0] = *p;
49    ++p;
50  } else if (*p == 'h') {
51    ll[0] = 'h';
52    ++p;
53    if (*p == 'h') {
54      ll[1] = 'h';
55      ++p;
56    }
57  } else if (*p == 'l') {
58    ll[0] = 'l';
59    ++p;
60    if (*p == 'l') {
61      ll[1] = 'l';
62      ++p;
63    }
64  }
65  return p;
66}
67
68// Returns true if the character is an integer conversion specifier.
69static bool format_is_integer_conv(char c) {
70  return char_is_one_of(c, "diouxXn");
71}
72
73// Returns true if the character is an floating point conversion specifier.
74static bool format_is_float_conv(char c) {
75  return char_is_one_of(c, "aAeEfFgG");
76}
77
78// Returns string output character size for string-like conversions,
79// or 0 if the conversion is invalid.
80static int format_get_char_size(char convSpecifier,
81                                const char lengthModifier[2]) {
82  if (char_is_one_of(convSpecifier, "CS")) {
83    return sizeof(wchar_t);
84  }
85
86  if (char_is_one_of(convSpecifier, "cs[")) {
87    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
88      return sizeof(wchar_t);
89    else if (lengthModifier[0] == '\0')
90      return sizeof(char);
91  }
92
93  return 0;
94}
95
96enum FormatStoreSize {
97  // Store size not known in advance; can be calculated as wcslen() of the
98  // destination buffer.
99  FSS_WCSLEN = -2,
100  // Store size not known in advance; can be calculated as strlen() of the
101  // destination buffer.
102  FSS_STRLEN = -1,
103  // Invalid conversion specifier.
104  FSS_INVALID = 0
105};
106
107// Returns the memory size of a format directive (if >0), or a value of
108// FormatStoreSize.
109static int format_get_value_size(char convSpecifier,
110                                 const char lengthModifier[2],
111                                 bool promote_float) {
112  if (format_is_integer_conv(convSpecifier)) {
113    switch (lengthModifier[0]) {
114    case 'h':
115      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
116    case 'l':
117      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
118    case 'q':
119      return sizeof(long long);
120    case 'L':
121      return sizeof(long long);
122    case 'j':
123      return sizeof(INTMAX_T);
124    case 'z':
125      return sizeof(SIZE_T);
126    case 't':
127      return sizeof(PTRDIFF_T);
128    case 0:
129      return sizeof(int);
130    default:
131      return FSS_INVALID;
132    }
133  }
134
135  if (format_is_float_conv(convSpecifier)) {
136    switch (lengthModifier[0]) {
137    case 'L':
138    case 'q':
139      return sizeof(long double);
140    case 'l':
141      return lengthModifier[1] == 'l' ? sizeof(long double)
142                                           : sizeof(double);
143    case 0:
144      // Printf promotes floats to doubles but scanf does not
145      return promote_float ? sizeof(double) : sizeof(float);
146    default:
147      return FSS_INVALID;
148    }
149  }
150
151  if (convSpecifier == 'p') {
152    if (lengthModifier[0] != 0)
153      return FSS_INVALID;
154    return sizeof(void *);
155  }
156
157  return FSS_INVALID;
158}
159
160struct ScanfDirective {
161  int argIdx; // argument index, or -1 if not specified ("%n$")
162  int fieldWidth;
163  const char *begin;
164  const char *end;
165  bool suppressed; // suppress assignment ("*")
166  bool allocate;   // allocate space ("m")
167  char lengthModifier[2];
168  char convSpecifier;
169  bool maybeGnuMalloc;
170};
171
172// Parse scanf format string. If a valid directive in encountered, it is
173// returned in dir. This function returns the pointer to the first
174// unprocessed character, or 0 in case of error.
175// In case of the end-of-string, a pointer to the closing \0 is returned.
176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
177                                    ScanfDirective *dir) {
178  internal_memset(dir, 0, sizeof(*dir));
179  dir->argIdx = -1;
180
181  while (*p) {
182    if (*p != '%') {
183      ++p;
184      continue;
185    }
186    dir->begin = p;
187    ++p;
188    // %%
189    if (*p == '%') {
190      ++p;
191      continue;
192    }
193    if (*p == '\0') {
194      return nullptr;
195    }
196    // %n$
197    p = maybe_parse_param_index(p, &dir->argIdx);
198    CHECK(p);
199    // *
200    if (*p == '*') {
201      dir->suppressed = true;
202      ++p;
203    }
204    // Field width
205    if (*p >= '0' && *p <= '9') {
206      p = parse_number(p, &dir->fieldWidth);
207      CHECK(p);
208      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
209        return nullptr;
210    }
211    // m
212    if (*p == 'm') {
213      dir->allocate = true;
214      ++p;
215    }
216    // Length modifier.
217    p = maybe_parse_length_modifier(p, dir->lengthModifier);
218    // Conversion specifier.
219    dir->convSpecifier = *p++;
220    // Consume %[...] expression.
221    if (dir->convSpecifier == '[') {
222      if (*p == '^')
223        ++p;
224      if (*p == ']')
225        ++p;
226      while (*p && *p != ']')
227        ++p;
228      if (*p == 0)
229        return nullptr; // unexpected end of string
230                        // Consume the closing ']'.
231      ++p;
232    }
233    // This is unfortunately ambiguous between old GNU extension
234    // of %as, %aS and %a[...] and newer POSIX %a followed by
235    // letters s, S or [.
236    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
237        !dir->lengthModifier[0]) {
238      if (*p == 's' || *p == 'S') {
239        dir->maybeGnuMalloc = true;
240        ++p;
241      } else if (*p == '[') {
242        // Watch for %a[h-j%d], if % appears in the
243        // [...] range, then we need to give up, we don't know
244        // if scanf will parse it as POSIX %a [h-j %d ] or
245        // GNU allocation of string with range dh-j plus %.
246        const char *q = p + 1;
247        if (*q == '^')
248          ++q;
249        if (*q == ']')
250          ++q;
251        while (*q && *q != ']' && *q != '%')
252          ++q;
253        if (*q == 0 || *q == '%')
254          return nullptr;
255        p = q + 1; // Consume the closing ']'.
256        dir->maybeGnuMalloc = true;
257      }
258    }
259    dir->end = p;
260    break;
261  }
262  return p;
263}
264
265static int scanf_get_value_size(ScanfDirective *dir) {
266  if (dir->allocate) {
267    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
268      return FSS_INVALID;
269    return sizeof(char *);
270  }
271
272  if (dir->maybeGnuMalloc) {
273    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
274      return FSS_INVALID;
275    // This is ambiguous, so check the smaller size of char * (if it is
276    // a GNU extension of %as, %aS or %a[...]) and float (if it is
277    // POSIX %a followed by s, S or [ letters).
278    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
279  }
280
281  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
282    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
283    unsigned charSize =
284        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
285    if (charSize == 0)
286      return FSS_INVALID;
287    if (dir->fieldWidth == 0) {
288      if (!needsTerminator)
289        return charSize;
290      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
291    }
292    return (dir->fieldWidth + needsTerminator) * charSize;
293  }
294
295  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
296}
297
298// Common part of *scanf interceptors.
299// Process format string and va_list, and report all store ranges.
300// Stops when "consuming" n_inputs input items.
301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
302                         const char *format, va_list aq) {
303  CHECK_GT(n_inputs, 0);
304  const char *p = format;
305
306  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
307
308  while (*p) {
309    ScanfDirective dir;
310    p = scanf_parse_next(p, allowGnuMalloc, &dir);
311    if (!p)
312      break;
313    if (dir.convSpecifier == 0) {
314      // This can only happen at the end of the format string.
315      CHECK_EQ(*p, 0);
316      break;
317    }
318    // Here the directive is valid. Do what it says.
319    if (dir.argIdx != -1) {
320      // Unsupported.
321      break;
322    }
323    if (dir.suppressed)
324      continue;
325    int size = scanf_get_value_size(&dir);
326    if (size == FSS_INVALID) {
327      Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
328             SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
329      break;
330    }
331    void *argp = va_arg(aq, void *);
332    if (dir.convSpecifier != 'n')
333      --n_inputs;
334    if (n_inputs < 0)
335      break;
336    if (size == FSS_STRLEN) {
337      size = internal_strlen((const char *)argp) + 1;
338    } else if (size == FSS_WCSLEN) {
339      // FIXME: actually use wcslen() to calculate it.
340      size = 0;
341    }
342    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
343    // For %ms/%mc, write the allocated output buffer as well.
344    if (dir.allocate) {
345      char *buf = *(char **)argp;
346      if (buf)
347        COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
348    }
349  }
350}
351
352#if SANITIZER_INTERCEPT_PRINTF
353
354struct PrintfDirective {
355  int fieldWidth;
356  int fieldPrecision;
357  int argIdx; // width argument index, or -1 if not specified ("%*n$")
358  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
359  const char *begin;
360  const char *end;
361  bool starredWidth;
362  bool starredPrecision;
363  char lengthModifier[2];
364  char convSpecifier;
365};
366
367static const char *maybe_parse_number(const char *p, int *out) {
368  if (*p >= '0' && *p <= '9')
369    p = parse_number(p, out);
370  return p;
371}
372
373static const char *maybe_parse_number_or_star(const char *p, int *out,
374                                              bool *star) {
375  if (*p == '*') {
376    *star = true;
377    ++p;
378  } else {
379    *star = false;
380    p = maybe_parse_number(p, out);
381  }
382  return p;
383}
384
385// Parse printf format string. Same as scanf_parse_next.
386static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
387  internal_memset(dir, 0, sizeof(*dir));
388  dir->argIdx = -1;
389  dir->precisionIdx = -1;
390
391  while (*p) {
392    if (*p != '%') {
393      ++p;
394      continue;
395    }
396    dir->begin = p;
397    ++p;
398    // %%
399    if (*p == '%') {
400      ++p;
401      continue;
402    }
403    if (*p == '\0') {
404      return nullptr;
405    }
406    // %n$
407    p = maybe_parse_param_index(p, &dir->precisionIdx);
408    CHECK(p);
409    // Flags
410    while (char_is_one_of(*p, "'-+ #0")) {
411      ++p;
412    }
413    // Field width
414    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
415                                   &dir->starredWidth);
416    if (!p)
417      return nullptr;
418    // Precision
419    if (*p == '.') {
420      ++p;
421      // Actual precision is optional (surprise!)
422      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
423                                     &dir->starredPrecision);
424      if (!p)
425        return nullptr;
426      // m$
427      if (dir->starredPrecision) {
428        p = maybe_parse_param_index(p, &dir->precisionIdx);
429        CHECK(p);
430      }
431    }
432    // Length modifier.
433    p = maybe_parse_length_modifier(p, dir->lengthModifier);
434    // Conversion specifier.
435    dir->convSpecifier = *p++;
436    dir->end = p;
437    break;
438  }
439  return p;
440}
441
442static int printf_get_value_size(PrintfDirective *dir) {
443  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
444    unsigned charSize =
445        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
446    if (charSize == 0)
447      return FSS_INVALID;
448    if (char_is_one_of(dir->convSpecifier, "sS")) {
449      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
450    }
451    return charSize;
452  }
453
454  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
455}
456
457#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
458  do {                                                             \
459    if (format_is_float_conv(convSpecifier)) {                     \
460      switch (size) {                                              \
461      case 8:                                                      \
462        va_arg(*aq, double);                                       \
463        break;                                                     \
464      case 12:                                                     \
465        va_arg(*aq, long double);                                  \
466        break;                                                     \
467      case 16:                                                     \
468        va_arg(*aq, long double);                                  \
469        break;                                                     \
470      default:                                                     \
471        Report("WARNING: unexpected floating-point arg size"       \
472               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
473        return;                                                    \
474      }                                                            \
475    } else {                                                       \
476      switch (size) {                                              \
477      case 1:                                                      \
478      case 2:                                                      \
479      case 4:                                                      \
480        va_arg(*aq, u32);                                          \
481        break;                                                     \
482      case 8:                                                      \
483        va_arg(*aq, u64);                                          \
484        break;                                                     \
485      default:                                                     \
486        Report("WARNING: unexpected arg size"                      \
487               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
488        return;                                                    \
489      }                                                            \
490    }                                                              \
491  } while (0)
492
493// Common part of *printf interceptors.
494// Process format string and va_list, and report all load ranges.
495static void printf_common(void *ctx, const char *format, va_list aq) {
496  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
497
498  const char *p = format;
499
500  while (*p) {
501    PrintfDirective dir;
502    p = printf_parse_next(p, &dir);
503    if (!p)
504      break;
505    if (dir.convSpecifier == 0) {
506      // This can only happen at the end of the format string.
507      CHECK_EQ(*p, 0);
508      break;
509    }
510    // Here the directive is valid. Do what it says.
511    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
512      // Unsupported.
513      break;
514    }
515    if (dir.starredWidth) {
516      // Dynamic width
517      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
518    }
519    if (dir.starredPrecision) {
520      // Dynamic precision
521      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
522    }
523    // %m does not require an argument: strlen(errno).
524    if (dir.convSpecifier == 'm')
525      continue;
526    int size = printf_get_value_size(&dir);
527    if (size == FSS_INVALID) {
528      static int ReportedOnce;
529      if (!ReportedOnce++)
530        Report(
531            "%s: WARNING: unexpected format specifier in printf "
532            "interceptor: %.*s (reported once per process)\n",
533            SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
534      break;
535    }
536    if (dir.convSpecifier == 'n') {
537      void *argp = va_arg(aq, void *);
538      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
539      continue;
540    } else if (size == FSS_STRLEN) {
541      if (void *argp = va_arg(aq, void *)) {
542        if (dir.starredPrecision) {
543          // FIXME: properly support starred precision for strings.
544          size = 0;
545        } else if (dir.fieldPrecision > 0) {
546          // Won't read more than "precision" symbols.
547          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
548          if (size < dir.fieldPrecision) size++;
549        } else {
550          // Whole string will be accessed.
551          size = internal_strlen((const char *)argp) + 1;
552        }
553        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
554      }
555    } else if (size == FSS_WCSLEN) {
556      if (void *argp = va_arg(aq, void *)) {
557        // FIXME: Properly support wide-character strings (via wcsrtombs).
558        size = 0;
559        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
560      }
561    } else {
562      // Skip non-pointer args
563      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
564    }
565  }
566}
567
568#endif // SANITIZER_INTERCEPT_PRINTF
569