1 //===-- sanitizer_symbolizer_posix_libcdep.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is shared between AddressSanitizer and ThreadSanitizer
10 // run-time libraries.
11 // POSIX-specific implementation of symbolizer parts.
12 //===----------------------------------------------------------------------===//
13 
14 #include "sanitizer_platform.h"
15 #if SANITIZER_POSIX
16 #  include <dlfcn.h>  // for dlsym()
17 #  include <errno.h>
18 #  include <stdint.h>
19 #  include <stdlib.h>
20 #  include <sys/wait.h>
21 #  include <unistd.h>
22 
23 #  include "sanitizer_allocator_internal.h"
24 #  include "sanitizer_common.h"
25 #  include "sanitizer_file.h"
26 #  include "sanitizer_flags.h"
27 #  include "sanitizer_internal_defs.h"
28 #  include "sanitizer_linux.h"
29 #  include "sanitizer_placement_new.h"
30 #  include "sanitizer_posix.h"
31 #  include "sanitizer_procmaps.h"
32 #  include "sanitizer_symbolizer_internal.h"
33 #  include "sanitizer_symbolizer_libbacktrace.h"
34 #  include "sanitizer_symbolizer_mac.h"
35 
36 // C++ demangling function, as required by Itanium C++ ABI. This is weak,
37 // because we do not require a C++ ABI library to be linked to a program
38 // using sanitizers; if it's not present, we'll just use the mangled name.
39 namespace __cxxabiv1 {
40   extern "C" SANITIZER_WEAK_ATTRIBUTE
41   char *__cxa_demangle(const char *mangled, char *buffer,
42                                   size_t *length, int *status);
43 }
44 
45 namespace __sanitizer {
46 
47 // Attempts to demangle the name via __cxa_demangle from __cxxabiv1.
48 const char *DemangleCXXABI(const char *name) {
49   // FIXME: __cxa_demangle aggressively insists on allocating memory.
50   // There's not much we can do about that, short of providing our
51   // own demangler (libc++abi's implementation could be adapted so that
52   // it does not allocate). For now, we just call it anyway, and we leak
53   // the returned value.
54   if (&__cxxabiv1::__cxa_demangle)
55     if (const char *demangled_name =
56           __cxxabiv1::__cxa_demangle(name, 0, 0, 0))
57       return demangled_name;
58 
59   return name;
60 }
61 
62 // As of now, there are no headers for the Swift runtime. Once they are
63 // present, we will weakly link since we do not require Swift runtime to be
64 // linked.
65 typedef char *(*swift_demangle_ft)(const char *mangledName,
66                                    size_t mangledNameLength, char *outputBuffer,
67                                    size_t *outputBufferSize, uint32_t flags);
68 static swift_demangle_ft swift_demangle_f;
69 
70 // This must not happen lazily at symbolication time, because dlsym uses
71 // malloc and thread-local storage, which is not a good thing to do during
72 // symbolication.
73 static void InitializeSwiftDemangler() {
74   swift_demangle_f = (swift_demangle_ft)dlsym(RTLD_DEFAULT, "swift_demangle");
75 }
76 
77 // Attempts to demangle a Swift name. The demangler will return nullptr if a
78 // non-Swift name is passed in.
79 const char *DemangleSwift(const char *name) {
80   if (swift_demangle_f)
81     return swift_demangle_f(name, internal_strlen(name), 0, 0, 0);
82 
83   return nullptr;
84 }
85 
86 const char *DemangleSwiftAndCXX(const char *name) {
87   if (!name) return nullptr;
88   if (const char *swift_demangled_name = DemangleSwift(name))
89     return swift_demangled_name;
90   return DemangleCXXABI(name);
91 }
92 
93 static bool CreateTwoHighNumberedPipes(int *infd_, int *outfd_) {
94   int *infd = NULL;
95   int *outfd = NULL;
96   // The client program may close its stdin and/or stdout and/or stderr
97   // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
98   // In this case the communication between the forked processes may be
99   // broken if either the parent or the child tries to close or duplicate
100   // these descriptors. The loop below produces two pairs of file
101   // descriptors, each greater than 2 (stderr).
102   int sock_pair[5][2];
103   for (int i = 0; i < 5; i++) {
104     if (pipe(sock_pair[i]) == -1) {
105       for (int j = 0; j < i; j++) {
106         internal_close(sock_pair[j][0]);
107         internal_close(sock_pair[j][1]);
108       }
109       return false;
110     } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) {
111       if (infd == NULL) {
112         infd = sock_pair[i];
113       } else {
114         outfd = sock_pair[i];
115         for (int j = 0; j < i; j++) {
116           if (sock_pair[j] == infd) continue;
117           internal_close(sock_pair[j][0]);
118           internal_close(sock_pair[j][1]);
119         }
120         break;
121       }
122     }
123   }
124   CHECK(infd);
125   CHECK(outfd);
126   infd_[0] = infd[0];
127   infd_[1] = infd[1];
128   outfd_[0] = outfd[0];
129   outfd_[1] = outfd[1];
130   return true;
131 }
132 
133 bool SymbolizerProcess::StartSymbolizerSubprocess() {
134   if (!FileExists(path_)) {
135     if (!reported_invalid_path_) {
136       Report("WARNING: invalid path to external symbolizer!\n");
137       reported_invalid_path_ = true;
138     }
139     return false;
140   }
141 
142   const char *argv[kArgVMax];
143   GetArgV(path_, argv);
144   pid_t pid;
145 
146   // Report how symbolizer is being launched for debugging purposes.
147   if (Verbosity() >= 3) {
148     // Only use `Report` for first line so subsequent prints don't get prefixed
149     // with current PID.
150     Report("Launching Symbolizer process: ");
151     for (unsigned index = 0; index < kArgVMax && argv[index]; ++index)
152       Printf("%s ", argv[index]);
153     Printf("\n");
154   }
155 
156   if (use_posix_spawn_) {
157 #if SANITIZER_APPLE
158     fd_t fd = internal_spawn(argv, const_cast<const char **>(GetEnvP()), &pid);
159     if (fd == kInvalidFd) {
160       Report("WARNING: failed to spawn external symbolizer (errno: %d)\n",
161              errno);
162       return false;
163     }
164 
165     input_fd_ = fd;
166     output_fd_ = fd;
167 #else  // SANITIZER_APPLE
168     UNIMPLEMENTED();
169 #endif  // SANITIZER_APPLE
170   } else {
171     fd_t infd[2] = {}, outfd[2] = {};
172     if (!CreateTwoHighNumberedPipes(infd, outfd)) {
173       Report("WARNING: Can't create a socket pair to start "
174              "external symbolizer (errno: %d)\n", errno);
175       return false;
176     }
177 
178     pid = StartSubprocess(path_, argv, GetEnvP(), /* stdin */ outfd[0],
179                           /* stdout */ infd[1]);
180     if (pid < 0) {
181       internal_close(infd[0]);
182       internal_close(outfd[1]);
183       return false;
184     }
185 
186     input_fd_ = infd[0];
187     output_fd_ = outfd[1];
188   }
189 
190   CHECK_GT(pid, 0);
191 
192   // Check that symbolizer subprocess started successfully.
193   SleepForMillis(kSymbolizerStartupTimeMillis);
194   if (!IsProcessRunning(pid)) {
195     // Either waitpid failed, or child has already exited.
196     Report("WARNING: external symbolizer didn't start up correctly!\n");
197     return false;
198   }
199 
200   return true;
201 }
202 
203 class Addr2LineProcess final : public SymbolizerProcess {
204  public:
205   Addr2LineProcess(const char *path, const char *module_name)
206       : SymbolizerProcess(path), module_name_(internal_strdup(module_name)) {}
207 
208   const char *module_name() const { return module_name_; }
209 
210  private:
211   void GetArgV(const char *path_to_binary,
212                const char *(&argv)[kArgVMax]) const override {
213     int i = 0;
214     argv[i++] = path_to_binary;
215     if (common_flags()->demangle)
216       argv[i++] = "-C";
217     if (common_flags()->symbolize_inline_frames)
218       argv[i++] = "-i";
219     argv[i++] = "-fe";
220     argv[i++] = module_name_;
221     argv[i++] = nullptr;
222     CHECK_LE(i, kArgVMax);
223   }
224 
225   bool ReachedEndOfOutput(const char *buffer, uptr length) const override;
226 
227   bool ReadFromSymbolizer() override {
228     if (!SymbolizerProcess::ReadFromSymbolizer())
229       return false;
230     auto &buff = GetBuff();
231     // We should cut out output_terminator_ at the end of given buffer,
232     // appended by addr2line to mark the end of its meaningful output.
233     // We cannot scan buffer from it's beginning, because it is legal for it
234     // to start with output_terminator_ in case given offset is invalid. So,
235     // scanning from second character.
236     char *garbage = internal_strstr(buff.data() + 1, output_terminator_);
237     // This should never be NULL since buffer must end up with
238     // output_terminator_.
239     CHECK(garbage);
240 
241     // Trim the buffer.
242     uintptr_t new_size = garbage - buff.data();
243     GetBuff().resize(new_size);
244     GetBuff().push_back('\0');
245     return true;
246   }
247 
248   const char *module_name_;  // Owned, leaked.
249   static const char output_terminator_[];
250 };
251 
252 const char Addr2LineProcess::output_terminator_[] = "??\n??:0\n";
253 
254 bool Addr2LineProcess::ReachedEndOfOutput(const char *buffer,
255                                           uptr length) const {
256   const size_t kTerminatorLen = sizeof(output_terminator_) - 1;
257   // Skip, if we read just kTerminatorLen bytes, because Addr2Line output
258   // should consist at least of two pairs of lines:
259   // 1. First one, corresponding to given offset to be symbolized
260   // (may be equal to output_terminator_, if offset is not valid).
261   // 2. Second one for output_terminator_, itself to mark the end of output.
262   if (length <= kTerminatorLen) return false;
263   // Addr2Line output should end up with output_terminator_.
264   return !internal_memcmp(buffer + length - kTerminatorLen,
265                           output_terminator_, kTerminatorLen);
266 }
267 
268 class Addr2LinePool final : public SymbolizerTool {
269  public:
270   explicit Addr2LinePool(const char *addr2line_path,
271                          LowLevelAllocator *allocator)
272       : addr2line_path_(addr2line_path), allocator_(allocator) {
273     addr2line_pool_.reserve(16);
274   }
275 
276   bool SymbolizePC(uptr addr, SymbolizedStack *stack) override {
277     if (const char *buf =
278             SendCommand(stack->info.module, stack->info.module_offset)) {
279       ParseSymbolizePCOutput(buf, stack);
280       return true;
281     }
282     return false;
283   }
284 
285   bool SymbolizeData(uptr addr, DataInfo *info) override {
286     return false;
287   }
288 
289  private:
290   const char *SendCommand(const char *module_name, uptr module_offset) {
291     Addr2LineProcess *addr2line = 0;
292     for (uptr i = 0; i < addr2line_pool_.size(); ++i) {
293       if (0 ==
294           internal_strcmp(module_name, addr2line_pool_[i]->module_name())) {
295         addr2line = addr2line_pool_[i];
296         break;
297       }
298     }
299     if (!addr2line) {
300       addr2line =
301           new(*allocator_) Addr2LineProcess(addr2line_path_, module_name);
302       addr2line_pool_.push_back(addr2line);
303     }
304     CHECK_EQ(0, internal_strcmp(module_name, addr2line->module_name()));
305     char buffer[kBufferSize];
306     internal_snprintf(buffer, kBufferSize, "0x%zx\n0x%zx\n",
307                       module_offset, dummy_address_);
308     return addr2line->SendCommand(buffer);
309   }
310 
311   static const uptr kBufferSize = 64;
312   const char *addr2line_path_;
313   LowLevelAllocator *allocator_;
314   InternalMmapVector<Addr2LineProcess*> addr2line_pool_;
315   static const uptr dummy_address_ =
316       FIRST_32_SECOND_64(UINT32_MAX, UINT64_MAX);
317 };
318 
319 #  if SANITIZER_SUPPORTS_WEAK_HOOKS
320 extern "C" {
321 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
322 __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
323                            char *Buffer, int MaxLength);
324 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
325 __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
326                            char *Buffer, int MaxLength);
327 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
328 __sanitizer_symbolize_flush();
329 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int
330 __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength);
331 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
332 __sanitizer_symbolize_set_demangle(bool Demangle);
333 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
334 __sanitizer_symbolize_set_inline_frames(bool InlineFrames);
335 }  // extern "C"
336 
337 class InternalSymbolizer final : public SymbolizerTool {
338  public:
339   static InternalSymbolizer *get(LowLevelAllocator *alloc) {
340     if (__sanitizer_symbolize_set_demangle)
341       CHECK(__sanitizer_symbolize_set_demangle(common_flags()->demangle));
342     if (__sanitizer_symbolize_set_inline_frames)
343       CHECK(__sanitizer_symbolize_set_inline_frames(
344           common_flags()->symbolize_inline_frames));
345     if (__sanitizer_symbolize_code && __sanitizer_symbolize_data)
346       return new (*alloc) InternalSymbolizer();
347     return 0;
348   }
349 
350   bool SymbolizePC(uptr addr, SymbolizedStack *stack) override {
351     bool result = __sanitizer_symbolize_code(
352         stack->info.module, stack->info.module_offset, buffer_, kBufferSize);
353     if (result)
354       ParseSymbolizePCOutput(buffer_, stack);
355     return result;
356   }
357 
358   bool SymbolizeData(uptr addr, DataInfo *info) override {
359     bool result = __sanitizer_symbolize_data(info->module, info->module_offset,
360                                              buffer_, kBufferSize);
361     if (result) {
362       ParseSymbolizeDataOutput(buffer_, info);
363       info->start += (addr - info->module_offset);  // Add the base address.
364     }
365     return result;
366   }
367 
368   void Flush() override {
369     if (__sanitizer_symbolize_flush)
370       __sanitizer_symbolize_flush();
371   }
372 
373   const char *Demangle(const char *name) override {
374     if (__sanitizer_symbolize_demangle) {
375       for (uptr res_length = 1024;
376            res_length <= InternalSizeClassMap::kMaxSize;) {
377         char *res_buff = static_cast<char *>(InternalAlloc(res_length));
378         uptr req_length =
379             __sanitizer_symbolize_demangle(name, res_buff, res_length);
380         if (req_length > res_length) {
381           res_length = req_length + 1;
382           InternalFree(res_buff);
383           continue;
384         }
385         return res_buff;
386       }
387     }
388     return name;
389   }
390 
391  private:
392   InternalSymbolizer() {}
393 
394   static const int kBufferSize = 16 * 1024;
395   char buffer_[kBufferSize];
396 };
397 #  else  // SANITIZER_SUPPORTS_WEAK_HOOKS
398 
399 class InternalSymbolizer final : public SymbolizerTool {
400  public:
401   static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; }
402 };
403 
404 #  endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
405 
406 const char *Symbolizer::PlatformDemangle(const char *name) {
407   return DemangleSwiftAndCXX(name);
408 }
409 
410 static SymbolizerTool *ChooseExternalSymbolizer(LowLevelAllocator *allocator) {
411   const char *path = common_flags()->external_symbolizer_path;
412 
413   if (path && internal_strchr(path, '%')) {
414     char *new_path = (char *)InternalAlloc(kMaxPathLength);
415     SubstituteForFlagValue(path, new_path, kMaxPathLength);
416     path = new_path;
417   }
418 
419   const char *binary_name = path ? StripModuleName(path) : "";
420   static const char kLLVMSymbolizerPrefix[] = "llvm-symbolizer";
421   if (path && path[0] == '\0') {
422     VReport(2, "External symbolizer is explicitly disabled.\n");
423     return nullptr;
424   } else if (!internal_strncmp(binary_name, kLLVMSymbolizerPrefix,
425                                internal_strlen(kLLVMSymbolizerPrefix))) {
426     VReport(2, "Using llvm-symbolizer at user-specified path: %s\n", path);
427     return new(*allocator) LLVMSymbolizer(path, allocator);
428   } else if (!internal_strcmp(binary_name, "atos")) {
429 #if SANITIZER_APPLE
430     VReport(2, "Using atos at user-specified path: %s\n", path);
431     return new(*allocator) AtosSymbolizer(path, allocator);
432 #else  // SANITIZER_APPLE
433     Report("ERROR: Using `atos` is only supported on Darwin.\n");
434     Die();
435 #endif  // SANITIZER_APPLE
436   } else if (!internal_strcmp(binary_name, "addr2line")) {
437     VReport(2, "Using addr2line at user-specified path: %s\n", path);
438     return new(*allocator) Addr2LinePool(path, allocator);
439   } else if (path) {
440     Report("ERROR: External symbolizer path is set to '%s' which isn't "
441            "a known symbolizer. Please set the path to the llvm-symbolizer "
442            "binary or other known tool.\n", path);
443     Die();
444   }
445 
446   // Otherwise symbolizer program is unknown, let's search $PATH
447   CHECK(path == nullptr);
448 #if SANITIZER_APPLE
449   if (const char *found_path = FindPathToBinary("atos")) {
450     VReport(2, "Using atos found at: %s\n", found_path);
451     return new(*allocator) AtosSymbolizer(found_path, allocator);
452   }
453 #endif  // SANITIZER_APPLE
454   if (const char *found_path = FindPathToBinary("llvm-symbolizer")) {
455     VReport(2, "Using llvm-symbolizer found at: %s\n", found_path);
456     return new(*allocator) LLVMSymbolizer(found_path, allocator);
457   }
458   if (common_flags()->allow_addr2line) {
459     if (const char *found_path = FindPathToBinary("addr2line")) {
460       VReport(2, "Using addr2line found at: %s\n", found_path);
461       return new(*allocator) Addr2LinePool(found_path, allocator);
462     }
463   }
464   return nullptr;
465 }
466 
467 static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list,
468                                   LowLevelAllocator *allocator) {
469   if (!common_flags()->symbolize) {
470     VReport(2, "Symbolizer is disabled.\n");
471     return;
472   }
473   if (IsAllocatorOutOfMemory()) {
474     VReport(2, "Cannot use internal symbolizer: out of memory\n");
475   } else if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
476     VReport(2, "Using internal symbolizer.\n");
477     list->push_back(tool);
478     return;
479   }
480   if (SymbolizerTool *tool = LibbacktraceSymbolizer::get(allocator)) {
481     VReport(2, "Using libbacktrace symbolizer.\n");
482     list->push_back(tool);
483     return;
484   }
485 
486   if (SymbolizerTool *tool = ChooseExternalSymbolizer(allocator)) {
487     list->push_back(tool);
488   }
489 
490 #if SANITIZER_APPLE
491   VReport(2, "Using dladdr symbolizer.\n");
492   list->push_back(new(*allocator) DlAddrSymbolizer());
493 #endif  // SANITIZER_APPLE
494 }
495 
496 Symbolizer *Symbolizer::PlatformInit() {
497   IntrusiveList<SymbolizerTool> list;
498   list.clear();
499   ChooseSymbolizerTools(&list, &symbolizer_allocator_);
500   return new(symbolizer_allocator_) Symbolizer(list);
501 }
502 
503 void Symbolizer::LateInitialize() {
504   Symbolizer::GetOrInit();
505   InitializeSwiftDemangler();
506 }
507 
508 }  // namespace __sanitizer
509 
510 #endif  // SANITIZER_POSIX
511