1 //===-- sanitizer_symbolizer_mac.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is shared between various sanitizers' runtime libraries.
10 //
11 // Implementation of Mac-specific "atos" symbolizer.
12 //===----------------------------------------------------------------------===//
13 
14 #include "sanitizer_platform.h"
15 #if SANITIZER_MAC
16 
17 #include "sanitizer_allocator_internal.h"
18 #include "sanitizer_mac.h"
19 #include "sanitizer_symbolizer_mac.h"
20 
21 #include <dlfcn.h>
22 #include <errno.h>
23 #include <mach/mach.h>
24 #include <stdlib.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <util.h>
28 
29 namespace __sanitizer {
30 
31 bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
32   Dl_info info;
33   int result = dladdr((const void *)addr, &info);
34   if (!result) return false;
35 
36   // Compute offset if possible. `dladdr()` doesn't always ensure that `addr >=
37   // sym_addr` so only compute the offset when this holds. Failure to find the
38   // function offset is not treated as a failure because it might still be
39   // possible to get the symbol name.
40   uptr sym_addr = reinterpret_cast<uptr>(info.dli_saddr);
41   if (addr >= sym_addr) {
42     stack->info.function_offset = addr - sym_addr;
43   }
44 
45   const char *demangled = DemangleSwiftAndCXX(info.dli_sname);
46   if (!demangled) return false;
47   stack->info.function = internal_strdup(demangled);
48   return true;
49 }
50 
51 bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *datainfo) {
52   Dl_info info;
53   int result = dladdr((const void *)addr, &info);
54   if (!result) return false;
55   const char *demangled = DemangleSwiftAndCXX(info.dli_sname);
56   datainfo->name = internal_strdup(demangled);
57   datainfo->start = (uptr)info.dli_saddr;
58   return true;
59 }
60 
61 #define K_ATOS_ENV_VAR "__check_mach_ports_lookup"
62 
63 // This cannot live in `AtosSymbolizerProcess` because instances of that object
64 // are allocated by the internal allocator which under ASan is poisoned with
65 // kAsanInternalHeapMagic.
66 static char kAtosMachPortEnvEntry[] = K_ATOS_ENV_VAR "=000000000000000";
67 
68 class AtosSymbolizerProcess final : public SymbolizerProcess {
69  public:
70   explicit AtosSymbolizerProcess(const char *path)
71       : SymbolizerProcess(path, /*use_posix_spawn*/ true) {
72     pid_str_[0] = '\0';
73   }
74 
75   void LateInitialize() {
76     if (SANITIZER_IOSSIM) {
77       // `putenv()` may call malloc/realloc so it is only safe to do this
78       // during LateInitialize() or later (i.e. we can't do this in the
79       // constructor).  We also can't do this in `StartSymbolizerSubprocess()`
80       // because in TSan we switch allocators when we're symbolizing.
81       // We use `putenv()` rather than `setenv()` so that we can later directly
82       // write into the storage without LibC getting involved to change what the
83       // variable is set to
84       int result = putenv(kAtosMachPortEnvEntry);
85       CHECK_EQ(result, 0);
86     }
87   }
88 
89  private:
90   bool StartSymbolizerSubprocess() override {
91     // Configure sandbox before starting atos process.
92 
93     // Put the string command line argument in the object so that it outlives
94     // the call to GetArgV.
95     internal_snprintf(pid_str_, sizeof(pid_str_), "%d", internal_getpid());
96 
97     if (SANITIZER_IOSSIM) {
98       // `atos` in the simulator is restricted in its ability to retrieve the
99       // task port for the target process (us) so we need to do extra work
100       // to pass our task port to it.
101       mach_port_t ports[]{mach_task_self()};
102       kern_return_t ret =
103           mach_ports_register(mach_task_self(), ports, /*count=*/1);
104       CHECK_EQ(ret, KERN_SUCCESS);
105 
106       // Set environment variable that signals to `atos` that it should look
107       // for our task port. We can't call `setenv()` here because it might call
108       // malloc/realloc. To avoid that we instead update the
109       // `mach_port_env_var_entry_` variable with our current PID.
110       uptr count = internal_snprintf(kAtosMachPortEnvEntry,
111                                      sizeof(kAtosMachPortEnvEntry),
112                                      K_ATOS_ENV_VAR "=%s", pid_str_);
113       CHECK_GE(count, sizeof(K_ATOS_ENV_VAR) + internal_strlen(pid_str_));
114       // Document our assumption but without calling `getenv()` in normal
115       // builds.
116       DCHECK(getenv(K_ATOS_ENV_VAR));
117       DCHECK_EQ(internal_strcmp(getenv(K_ATOS_ENV_VAR), pid_str_), 0);
118     }
119 
120     return SymbolizerProcess::StartSymbolizerSubprocess();
121   }
122 
123   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
124     return (length >= 1 && buffer[length - 1] == '\n');
125   }
126 
127   void GetArgV(const char *path_to_binary,
128                const char *(&argv)[kArgVMax]) const override {
129     int i = 0;
130     argv[i++] = path_to_binary;
131     argv[i++] = "-p";
132     argv[i++] = &pid_str_[0];
133     if (GetMacosAlignedVersion() == MacosVersion(10, 9)) {
134       // On Mavericks atos prints a deprecation warning which we suppress by
135       // passing -d. The warning isn't present on other OSX versions, even the
136       // newer ones.
137       argv[i++] = "-d";
138     }
139     argv[i++] = nullptr;
140   }
141 
142   char pid_str_[16];
143   // Space for `\0` in `K_ATOS_ENV_VAR` is reused for `=`.
144   static_assert(sizeof(kAtosMachPortEnvEntry) ==
145                     (sizeof(K_ATOS_ENV_VAR) + sizeof(pid_str_)),
146                 "sizes should match");
147 };
148 
149 #undef K_ATOS_ENV_VAR
150 
151 static bool ParseCommandOutput(const char *str, uptr addr, char **out_name,
152                                char **out_module, char **out_file, uptr *line,
153                                uptr *start_address) {
154   // Trim ending newlines.
155   char *trim;
156   ExtractTokenUpToDelimiter(str, "\n", &trim);
157 
158   // The line from `atos` is in one of these formats:
159   //   myfunction (in library.dylib) (sourcefile.c:17)
160   //   myfunction (in library.dylib) + 0x1fe
161   //   myfunction (in library.dylib) + 15
162   //   0xdeadbeef (in library.dylib) + 0x1fe
163   //   0xdeadbeef (in library.dylib) + 15
164   //   0xdeadbeef (in library.dylib)
165   //   0xdeadbeef
166 
167   const char *rest = trim;
168   char *symbol_name;
169   rest = ExtractTokenUpToDelimiter(rest, " (in ", &symbol_name);
170   if (rest[0] == '\0') {
171     InternalFree(symbol_name);
172     InternalFree(trim);
173     return false;
174   }
175 
176   if (internal_strncmp(symbol_name, "0x", 2) != 0)
177     *out_name = symbol_name;
178   else
179     InternalFree(symbol_name);
180   rest = ExtractTokenUpToDelimiter(rest, ") ", out_module);
181 
182   if (rest[0] == '(') {
183     if (out_file) {
184       rest++;
185       rest = ExtractTokenUpToDelimiter(rest, ":", out_file);
186       char *extracted_line_number;
187       rest = ExtractTokenUpToDelimiter(rest, ")", &extracted_line_number);
188       if (line) *line = (uptr)internal_atoll(extracted_line_number);
189       InternalFree(extracted_line_number);
190     }
191   } else if (rest[0] == '+') {
192     rest += 2;
193     uptr offset = internal_atoll(rest);
194     if (start_address) *start_address = addr - offset;
195   }
196 
197   InternalFree(trim);
198   return true;
199 }
200 
201 AtosSymbolizer::AtosSymbolizer(const char *path, LowLevelAllocator *allocator)
202     : process_(new (*allocator) AtosSymbolizerProcess(path)) {}
203 
204 bool AtosSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
205   if (!process_) return false;
206   if (addr == 0) return false;
207   char command[32];
208   internal_snprintf(command, sizeof(command), "0x%zx\n", addr);
209   const char *buf = process_->SendCommand(command);
210   if (!buf) return false;
211   uptr line;
212   uptr start_address = AddressInfo::kUnknown;
213   if (!ParseCommandOutput(buf, addr, &stack->info.function, &stack->info.module,
214                           &stack->info.file, &line, &start_address)) {
215     process_ = nullptr;
216     return false;
217   }
218   stack->info.line = (int)line;
219 
220   if (start_address == AddressInfo::kUnknown) {
221     // Fallback to dladdr() to get function start address if atos doesn't report
222     // it.
223     Dl_info info;
224     int result = dladdr((const void *)addr, &info);
225     if (result)
226       start_address = reinterpret_cast<uptr>(info.dli_saddr);
227   }
228 
229   // Only assign to `function_offset` if we were able to get the function's
230   // start address and we got a sensible `start_address` (dladdr doesn't always
231   // ensure that `addr >= sym_addr`).
232   if (start_address != AddressInfo::kUnknown && addr >= start_address) {
233     stack->info.function_offset = addr - start_address;
234   }
235   return true;
236 }
237 
238 bool AtosSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
239   if (!process_) return false;
240   char command[32];
241   internal_snprintf(command, sizeof(command), "0x%zx\n", addr);
242   const char *buf = process_->SendCommand(command);
243   if (!buf) return false;
244   if (!ParseCommandOutput(buf, addr, &info->name, &info->module, nullptr,
245                           nullptr, &info->start)) {
246     process_ = nullptr;
247     return false;
248   }
249   return true;
250 }
251 
252 void AtosSymbolizer::LateInitialize() { process_->LateInitialize(); }
253 
254 }  // namespace __sanitizer
255 
256 #endif  // SANITIZER_MAC
257