1 //===-- dfsan_interface.h -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of DataFlowSanitizer.
10 //
11 // Public interface header.
12 //===----------------------------------------------------------------------===//
13 #ifndef DFSAN_INTERFACE_H
14 #define DFSAN_INTERFACE_H
15 
16 #include <stddef.h>
17 #include <stdint.h>
18 #include <sanitizer/common_interface_defs.h>
19 
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23 
24 typedef uint8_t dfsan_label;
25 typedef uint32_t dfsan_origin;
26 
27 /// Signature of the callback argument to dfsan_set_write_callback().
28 typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
29 
30 /// Signature of the callback argument to dfsan_set_conditional_callback().
31 typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
32                                              dfsan_origin origin);
33 
34 /// Computes the union of \c l1 and \c l2, resulting in a union label.
35 dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
36 
37 /// Sets the label for each address in [addr,addr+size) to \c label.
38 void dfsan_set_label(dfsan_label label, void *addr, size_t size);
39 
40 /// Sets the label for each address in [addr,addr+size) to the union of the
41 /// current label for that address and \c label.
42 void dfsan_add_label(dfsan_label label, void *addr, size_t size);
43 
44 /// Retrieves the label associated with the given data.
45 ///
46 /// The type of 'data' is arbitrary.  The function accepts a value of any type,
47 /// which can be truncated or extended (implicitly or explicitly) as necessary.
48 /// The truncation/extension operations will preserve the label of the original
49 /// value.
50 dfsan_label dfsan_get_label(long data);
51 
52 /// Retrieves the immediate origin associated with the given data. The returned
53 /// origin may point to another origin.
54 ///
55 /// The type of 'data' is arbitrary.
56 dfsan_origin dfsan_get_origin(long data);
57 
58 /// Retrieves the label associated with the data at the given address.
59 dfsan_label dfsan_read_label(const void *addr, size_t size);
60 
61 /// Return the origin associated with the first taint byte in the size bytes
62 /// from the address addr.
63 dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size);
64 
65 /// Returns whether the given label label contains the label elem.
66 int dfsan_has_label(dfsan_label label, dfsan_label elem);
67 
68 /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
69 /// with the application memory.  Use this call to start over the taint tracking
70 /// within the same process.
71 ///
72 /// Note: If another thread is working with tainted data during the flush, that
73 /// taint could still be written to shadow after the flush.
74 void dfsan_flush(void);
75 
76 /// Sets a callback to be invoked on calls to write().  The callback is invoked
77 /// before the write is done.  The write is not guaranteed to succeed when the
78 /// callback executes.  Pass in NULL to remove any callback.
79 void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
80 
81 /// Sets a callback to be invoked on any conditional expressions which have a
82 /// taint label set. This can be used to find where tainted data influences
83 /// the behavior of the program.
84 /// These callbacks will only be added when -dfsan-conditional-callbacks=true.
85 void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
86 
87 /// Conditional expressions occur during signal handlers.
88 /// Making callbacks that handle signals well is tricky, so when
89 /// -dfsan-conditional-callbacks=true, conditional expressions used in signal
90 /// handlers will add the labels they see into a global (bitwise-or together).
91 /// This function returns all label bits seen in signal handler conditions.
92 dfsan_label dfsan_get_labels_in_signal_conditional();
93 
94 /// Interceptor hooks.
95 /// Whenever a dfsan's custom function is called the corresponding
96 /// hook is called it non-zero. The hooks should be defined by the user.
97 /// The primary use case is taint-guided fuzzing, where the fuzzer
98 /// needs to see the parameters of the function and the labels.
99 /// FIXME: implement more hooks.
100 void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
101                             size_t n, dfsan_label s1_label,
102                             dfsan_label s2_label, dfsan_label n_label);
103 void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
104                              size_t n, dfsan_label s1_label,
105                              dfsan_label s2_label, dfsan_label n_label);
106 
107 /// Prints the origin trace of the label at the address addr to stderr. It also
108 /// prints description at the beginning of the trace. If origin tracking is not
109 /// on, or the address is not labeled, it prints nothing.
110 void dfsan_print_origin_trace(const void *addr, const char *description);
111 /// As above, but use an origin id from dfsan_get_origin() instead of address.
112 /// Does not include header line with taint label and address information.
113 void dfsan_print_origin_id_trace(dfsan_origin origin);
114 
115 /// Prints the origin trace of the label at the address \p addr to a
116 /// pre-allocated output buffer. If origin tracking is not on, or the address is
117 /// not labeled, it prints nothing.
118 ///
119 /// Typical usage:
120 /// \code
121 ///   char kDescription[] = "...";
122 ///   char buf[1024];
123 ///   dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
124 /// \endcode
125 ///
126 /// Typical usage that handles truncation:
127 /// \code
128 ///   char buf[1024];
129 ///   int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
130 ///
131 ///   if (len < sizeof(buf)) {
132 ///     ProcessOriginTrace(buf);
133 ///   } else {
134 ///     char *tmpbuf = new char[len + 1];
135 ///     dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
136 ///     ProcessOriginTrace(tmpbuf);
137 ///     delete[] tmpbuf;
138 ///   }
139 /// \endcode
140 ///
141 /// \param addr The tainted memory address whose origin we are printing.
142 /// \param description A description printed at the beginning of the trace.
143 /// \param [out] out_buf The output buffer to write the results to.
144 /// \param out_buf_size The size of \p out_buf.
145 ///
146 /// \returns The number of symbols that should have been written to \p out_buf
147 /// (not including trailing null byte '\0'). Thus, the string is truncated iff
148 /// return value is not less than \p out_buf_size.
149 size_t dfsan_sprint_origin_trace(const void *addr, const char *description,
150                                  char *out_buf, size_t out_buf_size);
151 /// As above, but use an origin id from dfsan_get_origin() instead of address.
152 /// Does not include header line with taint label and address information.
153 size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf,
154                                     size_t out_buf_size);
155 
156 /// Prints the stack trace leading to this call to a pre-allocated output
157 /// buffer.
158 ///
159 /// For usage examples, see dfsan_sprint_origin_trace.
160 ///
161 /// \param [out] out_buf The output buffer to write the results to.
162 /// \param out_buf_size The size of \p out_buf.
163 ///
164 /// \returns The number of symbols that should have been written to \p out_buf
165 /// (not including trailing null byte '\0'). Thus, the string is truncated iff
166 /// return value is not less than \p out_buf_size.
167 size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size);
168 
169 /// Retrieves the very first origin associated with the data at the given
170 /// address.
171 dfsan_origin dfsan_get_init_origin(const void *addr);
172 
173 /// Returns the value of -dfsan-track-origins.
174 /// * 0: do not track origins.
175 /// * 1: track origins at memory store operations.
176 /// * 2: track origins at memory load and store operations.
177 int dfsan_get_track_origins(void);
178 #ifdef __cplusplus
179 }  // extern "C"
180 
181 template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
182   dfsan_set_label(label, (void *)&data, sizeof(T));
183 }
184 
185 #endif
186 
187 #endif  // DFSAN_INTERFACE_H
188