1 //===-- dfsan_interface.h -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of DataFlowSanitizer.
10 //
11 // Public interface header.
12 //===----------------------------------------------------------------------===//
13 #ifndef DFSAN_INTERFACE_H
14 #define DFSAN_INTERFACE_H
15 
16 #include <sanitizer/common_interface_defs.h>
17 #include <stddef.h>
18 #include <stdint.h>
19 
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23 
24 typedef uint8_t dfsan_label;
25 typedef uint32_t dfsan_origin;
26 
27 /// Signature of the callback argument to dfsan_set_write_callback().
28 typedef void(SANITIZER_CDECL *dfsan_write_callback_t)(int fd, const void *buf,
29                                                       size_t count);
30 
31 /// Signature of the callback argument to dfsan_set_conditional_callback().
32 typedef void(SANITIZER_CDECL *dfsan_conditional_callback_t)(
33     dfsan_label label, dfsan_origin origin);
34 
35 /// Signature of the callback argument to dfsan_set_reaches_function_callback().
36 /// The description is intended to hold the name of the variable.
37 typedef void(SANITIZER_CDECL *dfsan_reaches_function_callback_t)(
38     dfsan_label label, dfsan_origin origin, const char *file, unsigned int line,
39     const char *function);
40 
41 /// Computes the union of \c l1 and \c l2, resulting in a union label.
42 dfsan_label SANITIZER_CDECL dfsan_union(dfsan_label l1, dfsan_label l2);
43 
44 /// Sets the label for each address in [addr,addr+size) to \c label.
45 void SANITIZER_CDECL dfsan_set_label(dfsan_label label, void *addr,
46                                      size_t size);
47 
48 /// Sets the label for each address in [addr,addr+size) to the union of the
49 /// current label for that address and \c label.
50 void SANITIZER_CDECL dfsan_add_label(dfsan_label label, void *addr,
51                                      size_t size);
52 
53 /// Retrieves the label associated with the given data.
54 ///
55 /// The type of 'data' is arbitrary.  The function accepts a value of any type,
56 /// which can be truncated or extended (implicitly or explicitly) as necessary.
57 /// The truncation/extension operations will preserve the label of the original
58 /// value.
59 dfsan_label SANITIZER_CDECL dfsan_get_label(long data);
60 
61 /// Retrieves the immediate origin associated with the given data. The returned
62 /// origin may point to another origin.
63 ///
64 /// The type of 'data' is arbitrary.
65 dfsan_origin SANITIZER_CDECL dfsan_get_origin(long data);
66 
67 /// Retrieves the label associated with the data at the given address.
68 dfsan_label SANITIZER_CDECL dfsan_read_label(const void *addr, size_t size);
69 
70 /// Return the origin associated with the first taint byte in the size bytes
71 /// from the address addr.
72 dfsan_origin SANITIZER_CDECL dfsan_read_origin_of_first_taint(const void *addr,
73                                                               size_t size);
74 
75 /// Returns whether the given label contains the label elem.
76 int SANITIZER_CDECL dfsan_has_label(dfsan_label label, dfsan_label elem);
77 
78 /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
79 /// with the application memory.  Use this call to start over the taint tracking
80 /// within the same process.
81 ///
82 /// Note: If another thread is working with tainted data during the flush, that
83 /// taint could still be written to shadow after the flush.
84 void SANITIZER_CDECL dfsan_flush(void);
85 
86 /// Sets a callback to be invoked on calls to write().  The callback is invoked
87 /// before the write is done.  The write is not guaranteed to succeed when the
88 /// callback executes.  Pass in NULL to remove any callback.
89 void SANITIZER_CDECL
90 dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
91 
92 /// Sets a callback to be invoked on any conditional expressions which have a
93 /// taint label set. This can be used to find where tainted data influences
94 /// the behavior of the program.
95 /// These callbacks will only be added when -dfsan-conditional-callbacks=true.
96 void SANITIZER_CDECL
97 dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
98 
99 /// Conditional expressions occur during signal handlers.
100 /// Making callbacks that handle signals well is tricky, so when
101 /// -dfsan-conditional-callbacks=true, conditional expressions used in signal
102 /// handlers will add the labels they see into a global (bitwise-or together).
103 /// This function returns all label bits seen in signal handler conditions.
104 dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_conditional();
105 
106 /// Sets a callback to be invoked when tainted data reaches a function.
107 /// This could occur at function entry, or at a load instruction.
108 /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1.
109 void SANITIZER_CDECL
110 dfsan_set_reaches_function_callback(dfsan_reaches_function_callback_t callback);
111 
112 /// Making callbacks that handle signals well is tricky, so when
113 /// -dfsan-reaches-function-callbacks=true, functions reached in signal
114 /// handlers will add the labels they see into a global (bitwise-or together).
115 /// This function returns all label bits seen during signal handlers.
116 dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_reaches_function();
117 
118 /// Interceptor hooks.
119 /// Whenever a dfsan's custom function is called the corresponding
120 /// hook is called it non-zero. The hooks should be defined by the user.
121 /// The primary use case is taint-guided fuzzing, where the fuzzer
122 /// needs to see the parameters of the function and the labels.
123 /// FIXME: implement more hooks.
124 void SANITIZER_CDECL dfsan_weak_hook_memcmp(void *caller_pc, const void *s1,
125                                             const void *s2, size_t n,
126                                             dfsan_label s1_label,
127                                             dfsan_label s2_label,
128                                             dfsan_label n_label);
129 void SANITIZER_CDECL dfsan_weak_hook_strncmp(void *caller_pc, const char *s1,
130                                              const char *s2, size_t n,
131                                              dfsan_label s1_label,
132                                              dfsan_label s2_label,
133                                              dfsan_label n_label);
134 
135 /// Prints the origin trace of the label at the address addr to stderr. It also
136 /// prints description at the beginning of the trace. If origin tracking is not
137 /// on, or the address is not labeled, it prints nothing.
138 void SANITIZER_CDECL dfsan_print_origin_trace(const void *addr,
139                                               const char *description);
140 /// As above, but use an origin id from dfsan_get_origin() instead of address.
141 /// Does not include header line with taint label and address information.
142 void SANITIZER_CDECL dfsan_print_origin_id_trace(dfsan_origin origin);
143 
144 /// Prints the origin trace of the label at the address \p addr to a
145 /// pre-allocated output buffer. If origin tracking is not on, or the address is
146 /// not labeled, it prints nothing.
147 ///
148 /// Typical usage:
149 /// \code
150 ///   char kDescription[] = "...";
151 ///   char buf[1024];
152 ///   dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
153 /// \endcode
154 ///
155 /// Typical usage that handles truncation:
156 /// \code
157 ///   char buf[1024];
158 ///   int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
159 ///
160 ///   if (len < sizeof(buf)) {
161 ///     ProcessOriginTrace(buf);
162 ///   } else {
163 ///     char *tmpbuf = new char[len + 1];
164 ///     dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
165 ///     ProcessOriginTrace(tmpbuf);
166 ///     delete[] tmpbuf;
167 ///   }
168 /// \endcode
169 ///
170 /// \param addr The tainted memory address whose origin we are printing.
171 /// \param description A description printed at the beginning of the trace.
172 /// \param [out] out_buf The output buffer to write the results to.
173 /// \param out_buf_size The size of \p out_buf.
174 ///
175 /// \returns The number of symbols that should have been written to \p out_buf
176 /// (not including trailing null byte '\0'). Thus, the string is truncated iff
177 /// return value is not less than \p out_buf_size.
178 size_t SANITIZER_CDECL dfsan_sprint_origin_trace(const void *addr,
179                                                  const char *description,
180                                                  char *out_buf,
181                                                  size_t out_buf_size);
182 /// As above, but use an origin id from dfsan_get_origin() instead of address.
183 /// Does not include header line with taint label and address information.
184 size_t SANITIZER_CDECL dfsan_sprint_origin_id_trace(dfsan_origin origin,
185                                                     char *out_buf,
186                                                     size_t out_buf_size);
187 
188 /// Prints the stack trace leading to this call to a pre-allocated output
189 /// buffer.
190 ///
191 /// For usage examples, see dfsan_sprint_origin_trace.
192 ///
193 /// \param [out] out_buf The output buffer to write the results to.
194 /// \param out_buf_size The size of \p out_buf.
195 ///
196 /// \returns The number of symbols that should have been written to \p out_buf
197 /// (not including trailing null byte '\0'). Thus, the string is truncated iff
198 /// return value is not less than \p out_buf_size.
199 size_t SANITIZER_CDECL dfsan_sprint_stack_trace(char *out_buf,
200                                                 size_t out_buf_size);
201 
202 /// Retrieves the very first origin associated with the data at the given
203 /// address.
204 dfsan_origin SANITIZER_CDECL dfsan_get_init_origin(const void *addr);
205 
206 /// Returns the value of -dfsan-track-origins.
207 /// * 0: do not track origins.
208 /// * 1: track origins at memory store operations.
209 /// * 2: track origins at memory load and store operations.
210 int SANITIZER_CDECL dfsan_get_track_origins(void);
211 #ifdef __cplusplus
212 } // extern "C"
213 
dfsan_set_label(dfsan_label label,T & data)214 template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
215   dfsan_set_label(label, (void *)&data, sizeof(T));
216 }
217 
218 #endif
219 
220 #endif // DFSAN_INTERFACE_H
221