1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Scanf/printf implementation for use in *Sanitizer interceptors.
10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
12// with a few common GNU extensions.
13//
14//===----------------------------------------------------------------------===//
15
16#include <stdarg.h>
17
18static const char *parse_number(const char *p, int *out) {
19  *out = internal_atoll(p);
20  while (*p >= '0' && *p <= '9')
21    ++p;
22  return p;
23}
24
25static const char *maybe_parse_param_index(const char *p, int *out) {
26  // n$
27  if (*p >= '0' && *p <= '9') {
28    int number;
29    const char *q = parse_number(p, &number);
30    CHECK(q);
31    if (*q == '$') {
32      *out = number;
33      p = q + 1;
34    }
35  }
36
37  // Otherwise, do not change p. This will be re-parsed later as the field
38  // width.
39  return p;
40}
41
42static bool char_is_one_of(char c, const char *s) {
43  return !!internal_strchr(s, c);
44}
45
46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47  if (char_is_one_of(*p, "jztLq")) {
48    ll[0] = *p;
49    ++p;
50  } else if (*p == 'h') {
51    ll[0] = 'h';
52    ++p;
53    if (*p == 'h') {
54      ll[1] = 'h';
55      ++p;
56    }
57  } else if (*p == 'l') {
58    ll[0] = 'l';
59    ++p;
60    if (*p == 'l') {
61      ll[1] = 'l';
62      ++p;
63    }
64  }
65  return p;
66}
67
68// Returns true if the character is an integer conversion specifier.
69static bool format_is_integer_conv(char c) {
70  return char_is_one_of(c, "diouxXn");
71}
72
73// Returns true if the character is an floating point conversion specifier.
74static bool format_is_float_conv(char c) {
75  return char_is_one_of(c, "aAeEfFgG");
76}
77
78// Returns string output character size for string-like conversions,
79// or 0 if the conversion is invalid.
80static int format_get_char_size(char convSpecifier,
81                                const char lengthModifier[2]) {
82  if (char_is_one_of(convSpecifier, "CS")) {
83    return sizeof(wchar_t);
84  }
85
86  if (char_is_one_of(convSpecifier, "cs[")) {
87    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
88      return sizeof(wchar_t);
89    else if (lengthModifier[0] == '\0')
90      return sizeof(char);
91  }
92
93  return 0;
94}
95
96enum FormatStoreSize {
97  // Store size not known in advance; can be calculated as wcslen() of the
98  // destination buffer.
99  FSS_WCSLEN = -2,
100  // Store size not known in advance; can be calculated as strlen() of the
101  // destination buffer.
102  FSS_STRLEN = -1,
103  // Invalid conversion specifier.
104  FSS_INVALID = 0
105};
106
107// Returns the memory size of a format directive (if >0), or a value of
108// FormatStoreSize.
109static int format_get_value_size(char convSpecifier,
110                                 const char lengthModifier[2],
111                                 bool promote_float) {
112  if (format_is_integer_conv(convSpecifier)) {
113    switch (lengthModifier[0]) {
114    case 'h':
115      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
116    case 'l':
117      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
118    case 'q':
119      return sizeof(long long);
120    case 'L':
121      return sizeof(long long);
122    case 'j':
123      return sizeof(INTMAX_T);
124    case 'z':
125      return sizeof(SIZE_T);
126    case 't':
127      return sizeof(PTRDIFF_T);
128    case 0:
129      return sizeof(int);
130    default:
131      return FSS_INVALID;
132    }
133  }
134
135  if (format_is_float_conv(convSpecifier)) {
136    switch (lengthModifier[0]) {
137    case 'L':
138    case 'q':
139      return sizeof(long double);
140    case 'l':
141      return lengthModifier[1] == 'l' ? sizeof(long double)
142                                           : sizeof(double);
143    case 0:
144      // Printf promotes floats to doubles but scanf does not
145      return promote_float ? sizeof(double) : sizeof(float);
146    default:
147      return FSS_INVALID;
148    }
149  }
150
151  if (convSpecifier == 'p') {
152    if (lengthModifier[0] != 0)
153      return FSS_INVALID;
154    return sizeof(void *);
155  }
156
157  return FSS_INVALID;
158}
159
160struct ScanfDirective {
161  int argIdx; // argument index, or -1 if not specified ("%n$")
162  int fieldWidth;
163  const char *begin;
164  const char *end;
165  bool suppressed; // suppress assignment ("*")
166  bool allocate;   // allocate space ("m")
167  char lengthModifier[2];
168  char convSpecifier;
169  bool maybeGnuMalloc;
170};
171
172// Parse scanf format string. If a valid directive in encountered, it is
173// returned in dir. This function returns the pointer to the first
174// unprocessed character, or 0 in case of error.
175// In case of the end-of-string, a pointer to the closing \0 is returned.
176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
177                                    ScanfDirective *dir) {
178  internal_memset(dir, 0, sizeof(*dir));
179  dir->argIdx = -1;
180
181  while (*p) {
182    if (*p != '%') {
183      ++p;
184      continue;
185    }
186    dir->begin = p;
187    ++p;
188    // %%
189    if (*p == '%') {
190      ++p;
191      continue;
192    }
193    if (*p == '\0') {
194      return nullptr;
195    }
196    // %n$
197    p = maybe_parse_param_index(p, &dir->argIdx);
198    CHECK(p);
199    // *
200    if (*p == '*') {
201      dir->suppressed = true;
202      ++p;
203    }
204    // Field width
205    if (*p >= '0' && *p <= '9') {
206      p = parse_number(p, &dir->fieldWidth);
207      CHECK(p);
208      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
209        return nullptr;
210    }
211    // m
212    if (*p == 'm') {
213      dir->allocate = true;
214      ++p;
215    }
216    // Length modifier.
217    p = maybe_parse_length_modifier(p, dir->lengthModifier);
218    // Conversion specifier.
219    dir->convSpecifier = *p++;
220    // Consume %[...] expression.
221    if (dir->convSpecifier == '[') {
222      if (*p == '^')
223        ++p;
224      if (*p == ']')
225        ++p;
226      while (*p && *p != ']')
227        ++p;
228      if (*p == 0)
229        return nullptr; // unexpected end of string
230                        // Consume the closing ']'.
231      ++p;
232    }
233    // This is unfortunately ambiguous between old GNU extension
234    // of %as, %aS and %a[...] and newer POSIX %a followed by
235    // letters s, S or [.
236    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
237        !dir->lengthModifier[0]) {
238      if (*p == 's' || *p == 'S') {
239        dir->maybeGnuMalloc = true;
240        ++p;
241      } else if (*p == '[') {
242        // Watch for %a[h-j%d], if % appears in the
243        // [...] range, then we need to give up, we don't know
244        // if scanf will parse it as POSIX %a [h-j %d ] or
245        // GNU allocation of string with range dh-j plus %.
246        const char *q = p + 1;
247        if (*q == '^')
248          ++q;
249        if (*q == ']')
250          ++q;
251        while (*q && *q != ']' && *q != '%')
252          ++q;
253        if (*q == 0 || *q == '%')
254          return nullptr;
255        p = q + 1; // Consume the closing ']'.
256        dir->maybeGnuMalloc = true;
257      }
258    }
259    dir->end = p;
260    break;
261  }
262  return p;
263}
264
265static int scanf_get_value_size(ScanfDirective *dir) {
266  if (dir->allocate) {
267    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
268      return FSS_INVALID;
269    return sizeof(char *);
270  }
271
272  if (dir->maybeGnuMalloc) {
273    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
274      return FSS_INVALID;
275    // This is ambiguous, so check the smaller size of char * (if it is
276    // a GNU extension of %as, %aS or %a[...]) and float (if it is
277    // POSIX %a followed by s, S or [ letters).
278    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
279  }
280
281  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
282    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
283    unsigned charSize =
284        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
285    if (charSize == 0)
286      return FSS_INVALID;
287    if (dir->fieldWidth == 0) {
288      if (!needsTerminator)
289        return charSize;
290      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
291    }
292    return (dir->fieldWidth + needsTerminator) * charSize;
293  }
294
295  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
296}
297
298// Common part of *scanf interceptors.
299// Process format string and va_list, and report all store ranges.
300// Stops when "consuming" n_inputs input items.
301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
302                         const char *format, va_list aq) {
303  CHECK_GT(n_inputs, 0);
304  const char *p = format;
305
306  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
307
308  while (*p) {
309    ScanfDirective dir;
310    p = scanf_parse_next(p, allowGnuMalloc, &dir);
311    if (!p)
312      break;
313    if (dir.convSpecifier == 0) {
314      // This can only happen at the end of the format string.
315      CHECK_EQ(*p, 0);
316      break;
317    }
318    // Here the directive is valid. Do what it says.
319    if (dir.argIdx != -1) {
320      // Unsupported.
321      break;
322    }
323    if (dir.suppressed)
324      continue;
325    int size = scanf_get_value_size(&dir);
326    if (size == FSS_INVALID) {
327      Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
328             SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
329      break;
330    }
331    void *argp = va_arg(aq, void *);
332    if (dir.convSpecifier != 'n')
333      --n_inputs;
334    if (n_inputs < 0)
335      break;
336    if (size == FSS_STRLEN) {
337      size = internal_strlen((const char *)argp) + 1;
338    } else if (size == FSS_WCSLEN) {
339      // FIXME: actually use wcslen() to calculate it.
340      size = 0;
341    }
342    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
343  }
344}
345
346#if SANITIZER_INTERCEPT_PRINTF
347
348struct PrintfDirective {
349  int fieldWidth;
350  int fieldPrecision;
351  int argIdx; // width argument index, or -1 if not specified ("%*n$")
352  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
353  const char *begin;
354  const char *end;
355  bool starredWidth;
356  bool starredPrecision;
357  char lengthModifier[2];
358  char convSpecifier;
359};
360
361static const char *maybe_parse_number(const char *p, int *out) {
362  if (*p >= '0' && *p <= '9')
363    p = parse_number(p, out);
364  return p;
365}
366
367static const char *maybe_parse_number_or_star(const char *p, int *out,
368                                              bool *star) {
369  if (*p == '*') {
370    *star = true;
371    ++p;
372  } else {
373    *star = false;
374    p = maybe_parse_number(p, out);
375  }
376  return p;
377}
378
379// Parse printf format string. Same as scanf_parse_next.
380static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
381  internal_memset(dir, 0, sizeof(*dir));
382  dir->argIdx = -1;
383  dir->precisionIdx = -1;
384
385  while (*p) {
386    if (*p != '%') {
387      ++p;
388      continue;
389    }
390    dir->begin = p;
391    ++p;
392    // %%
393    if (*p == '%') {
394      ++p;
395      continue;
396    }
397    if (*p == '\0') {
398      return nullptr;
399    }
400    // %n$
401    p = maybe_parse_param_index(p, &dir->precisionIdx);
402    CHECK(p);
403    // Flags
404    while (char_is_one_of(*p, "'-+ #0")) {
405      ++p;
406    }
407    // Field width
408    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
409                                   &dir->starredWidth);
410    if (!p)
411      return nullptr;
412    // Precision
413    if (*p == '.') {
414      ++p;
415      // Actual precision is optional (surprise!)
416      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
417                                     &dir->starredPrecision);
418      if (!p)
419        return nullptr;
420      // m$
421      if (dir->starredPrecision) {
422        p = maybe_parse_param_index(p, &dir->precisionIdx);
423        CHECK(p);
424      }
425    }
426    // Length modifier.
427    p = maybe_parse_length_modifier(p, dir->lengthModifier);
428    // Conversion specifier.
429    dir->convSpecifier = *p++;
430    dir->end = p;
431    break;
432  }
433  return p;
434}
435
436static int printf_get_value_size(PrintfDirective *dir) {
437  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
438    unsigned charSize =
439        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
440    if (charSize == 0)
441      return FSS_INVALID;
442    if (char_is_one_of(dir->convSpecifier, "sS")) {
443      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
444    }
445    return charSize;
446  }
447
448  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
449}
450
451#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
452  do {                                                             \
453    if (format_is_float_conv(convSpecifier)) {                     \
454      switch (size) {                                              \
455      case 8:                                                      \
456        va_arg(*aq, double);                                       \
457        break;                                                     \
458      case 12:                                                     \
459        va_arg(*aq, long double);                                  \
460        break;                                                     \
461      case 16:                                                     \
462        va_arg(*aq, long double);                                  \
463        break;                                                     \
464      default:                                                     \
465        Report("WARNING: unexpected floating-point arg size"       \
466               " in printf interceptor: %d\n", size);              \
467        return;                                                    \
468      }                                                            \
469    } else {                                                       \
470      switch (size) {                                              \
471      case 1:                                                      \
472      case 2:                                                      \
473      case 4:                                                      \
474        va_arg(*aq, u32);                                          \
475        break;                                                     \
476      case 8:                                                      \
477        va_arg(*aq, u64);                                          \
478        break;                                                     \
479      default:                                                     \
480        Report("WARNING: unexpected arg size"                      \
481               " in printf interceptor: %d\n", size);              \
482        return;                                                    \
483      }                                                            \
484    }                                                              \
485  } while (0)
486
487// Common part of *printf interceptors.
488// Process format string and va_list, and report all load ranges.
489static void printf_common(void *ctx, const char *format, va_list aq) {
490  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
491
492  const char *p = format;
493
494  while (*p) {
495    PrintfDirective dir;
496    p = printf_parse_next(p, &dir);
497    if (!p)
498      break;
499    if (dir.convSpecifier == 0) {
500      // This can only happen at the end of the format string.
501      CHECK_EQ(*p, 0);
502      break;
503    }
504    // Here the directive is valid. Do what it says.
505    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
506      // Unsupported.
507      break;
508    }
509    if (dir.starredWidth) {
510      // Dynamic width
511      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
512    }
513    if (dir.starredPrecision) {
514      // Dynamic precision
515      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
516    }
517    // %m does not require an argument: strlen(errno).
518    if (dir.convSpecifier == 'm')
519      continue;
520    int size = printf_get_value_size(&dir);
521    if (size == FSS_INVALID) {
522      static int ReportedOnce;
523      if (!ReportedOnce++)
524        Report(
525            "%s: WARNING: unexpected format specifier in printf "
526            "interceptor: %.*s (reported once per process)\n",
527            SanitizerToolName, dir.end - dir.begin, dir.begin);
528      break;
529    }
530    if (dir.convSpecifier == 'n') {
531      void *argp = va_arg(aq, void *);
532      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
533      continue;
534    } else if (size == FSS_STRLEN) {
535      if (void *argp = va_arg(aq, void *)) {
536        if (dir.starredPrecision) {
537          // FIXME: properly support starred precision for strings.
538          size = 0;
539        } else if (dir.fieldPrecision > 0) {
540          // Won't read more than "precision" symbols.
541          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
542          if (size < dir.fieldPrecision) size++;
543        } else {
544          // Whole string will be accessed.
545          size = internal_strlen((const char *)argp) + 1;
546        }
547        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
548      }
549    } else if (size == FSS_WCSLEN) {
550      if (void *argp = va_arg(aq, void *)) {
551        // FIXME: Properly support wide-character strings (via wcsrtombs).
552        size = 0;
553        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
554      }
555    } else {
556      // Skip non-pointer args
557      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
558    }
559  }
560}
561
562#endif // SANITIZER_INTERCEPT_PRINTF
563