10b57cec5SDimitry Andric //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Symbolizer is used by sanitizers to map instruction address to a location in
100b57cec5SDimitry Andric // source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
110b57cec5SDimitry Andric // defined in the program, or (if they are missing) tries to find and
120b57cec5SDimitry Andric // launch "llvm-symbolizer" commandline tool in a separate process and
130b57cec5SDimitry Andric // communicate with it.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // Generally we should try to avoid calling system library functions during
160b57cec5SDimitry Andric // symbolization (and use their replacements from sanitizer_libc.h instead).
170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric #ifndef SANITIZER_SYMBOLIZER_H
190b57cec5SDimitry Andric #define SANITIZER_SYMBOLIZER_H
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric #include "sanitizer_common.h"
220b57cec5SDimitry Andric #include "sanitizer_mutex.h"
230b57cec5SDimitry Andric #include "sanitizer_vector.h"
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace __sanitizer {
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric struct AddressInfo {
280b57cec5SDimitry Andric   // Owns all the string members. Storage for them is
290b57cec5SDimitry Andric   // (de)allocated using sanitizer internal allocator.
300b57cec5SDimitry Andric   uptr address;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric   char *module;
330b57cec5SDimitry Andric   uptr module_offset;
340b57cec5SDimitry Andric   ModuleArch module_arch;
350eae32dcSDimitry Andric   u8 uuid[kModuleUUIDSize];
360eae32dcSDimitry Andric   uptr uuid_size;
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric   static const uptr kUnknown = ~(uptr)0;
390b57cec5SDimitry Andric   char *function;
400b57cec5SDimitry Andric   uptr function_offset;
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric   char *file;
430b57cec5SDimitry Andric   int line;
440b57cec5SDimitry Andric   int column;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   AddressInfo();
470b57cec5SDimitry Andric   // Deletes all strings and resets all fields.
480b57cec5SDimitry Andric   void Clear();
490b57cec5SDimitry Andric   void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
500eae32dcSDimitry Andric   void FillModuleInfo(const LoadedModule &mod);
module_baseAddressInfo510eae32dcSDimitry Andric   uptr module_base() const { return address - module_offset; }
520b57cec5SDimitry Andric };
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric // Linked list of symbolized frames (each frame is described by AddressInfo).
550b57cec5SDimitry Andric struct SymbolizedStack {
560b57cec5SDimitry Andric   SymbolizedStack *next;
570b57cec5SDimitry Andric   AddressInfo info;
580b57cec5SDimitry Andric   static SymbolizedStack *New(uptr addr);
590b57cec5SDimitry Andric   // Deletes current, and all subsequent frames in the linked list.
600b57cec5SDimitry Andric   // The object cannot be accessed after the call to this function.
610b57cec5SDimitry Andric   void ClearAll();
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric  private:
640b57cec5SDimitry Andric   SymbolizedStack();
650b57cec5SDimitry Andric };
660b57cec5SDimitry Andric 
671db9f3b2SDimitry Andric class SymbolizedStackHolder {
681db9f3b2SDimitry Andric   SymbolizedStack *Stack;
691db9f3b2SDimitry Andric 
clear()701db9f3b2SDimitry Andric   void clear() {
711db9f3b2SDimitry Andric     if (Stack)
721db9f3b2SDimitry Andric       Stack->ClearAll();
731db9f3b2SDimitry Andric   }
741db9f3b2SDimitry Andric 
751db9f3b2SDimitry Andric  public:
761db9f3b2SDimitry Andric   explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr)
Stack(Stack)771db9f3b2SDimitry Andric       : Stack(Stack) {}
~SymbolizedStackHolder()781db9f3b2SDimitry Andric   ~SymbolizedStackHolder() { clear(); }
791db9f3b2SDimitry Andric   void reset(SymbolizedStack *S = nullptr) {
801db9f3b2SDimitry Andric     if (Stack != S)
811db9f3b2SDimitry Andric       clear();
821db9f3b2SDimitry Andric     Stack = S;
831db9f3b2SDimitry Andric   }
get()841db9f3b2SDimitry Andric   const SymbolizedStack *get() const { return Stack; }
851db9f3b2SDimitry Andric };
861db9f3b2SDimitry Andric 
870b57cec5SDimitry Andric // For now, DataInfo is used to describe global variable.
880b57cec5SDimitry Andric struct DataInfo {
890b57cec5SDimitry Andric   // Owns all the string members. Storage for them is
900b57cec5SDimitry Andric   // (de)allocated using sanitizer internal allocator.
910b57cec5SDimitry Andric   char *module;
920b57cec5SDimitry Andric   uptr module_offset;
930b57cec5SDimitry Andric   ModuleArch module_arch;
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   char *file;
960b57cec5SDimitry Andric   uptr line;
970b57cec5SDimitry Andric   char *name;
980b57cec5SDimitry Andric   uptr start;
990b57cec5SDimitry Andric   uptr size;
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   DataInfo();
1020b57cec5SDimitry Andric   void Clear();
1030b57cec5SDimitry Andric };
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric struct LocalInfo {
1060b57cec5SDimitry Andric   char *function_name = nullptr;
1070b57cec5SDimitry Andric   char *name = nullptr;
1080b57cec5SDimitry Andric   char *decl_file = nullptr;
1090b57cec5SDimitry Andric   unsigned decl_line = 0;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric   bool has_frame_offset = false;
1120b57cec5SDimitry Andric   bool has_size = false;
1130b57cec5SDimitry Andric   bool has_tag_offset = false;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   sptr frame_offset;
1160b57cec5SDimitry Andric   uptr size;
1170b57cec5SDimitry Andric   uptr tag_offset;
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   void Clear();
1200b57cec5SDimitry Andric };
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric struct FrameInfo {
1230b57cec5SDimitry Andric   char *module;
1240b57cec5SDimitry Andric   uptr module_offset;
1250b57cec5SDimitry Andric   ModuleArch module_arch;
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   InternalMmapVector<LocalInfo> locals;
1280b57cec5SDimitry Andric   void Clear();
1290b57cec5SDimitry Andric };
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric class SymbolizerTool;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric class Symbolizer final {
1340b57cec5SDimitry Andric  public:
1350b57cec5SDimitry Andric   /// Initialize and return platform-specific implementation of symbolizer
1360b57cec5SDimitry Andric   /// (if it wasn't already initialized).
1370b57cec5SDimitry Andric   static Symbolizer *GetOrInit();
1380b57cec5SDimitry Andric   static void LateInitialize();
1390b57cec5SDimitry Andric   // Returns a list of symbolized frames for a given address (containing
1400b57cec5SDimitry Andric   // all inlined functions, if necessary).
1410b57cec5SDimitry Andric   SymbolizedStack *SymbolizePC(uptr address);
1420b57cec5SDimitry Andric   bool SymbolizeData(uptr address, DataInfo *info);
1430b57cec5SDimitry Andric   bool SymbolizeFrame(uptr address, FrameInfo *info);
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric   // The module names Symbolizer returns are stable and unique for every given
1460b57cec5SDimitry Andric   // module.  It is safe to store and compare them as pointers.
1470b57cec5SDimitry Andric   bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
1480b57cec5SDimitry Andric                                    uptr *module_address);
GetModuleNameForPc(uptr pc)1490b57cec5SDimitry Andric   const char *GetModuleNameForPc(uptr pc) {
1500b57cec5SDimitry Andric     const char *module_name = nullptr;
1510b57cec5SDimitry Andric     uptr unused;
1520b57cec5SDimitry Andric     if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
1530b57cec5SDimitry Andric       return module_name;
1540b57cec5SDimitry Andric     return nullptr;
1550b57cec5SDimitry Andric   }
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric   // Release internal caches (if any).
1580b57cec5SDimitry Andric   void Flush();
1595f757f3fSDimitry Andric   // Attempts to demangle the provided C++ mangled name. Never returns nullptr.
1600b57cec5SDimitry Andric   const char *Demangle(const char *name);
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric   // Allow user to install hooks that would be called before/after Symbolizer
1630b57cec5SDimitry Andric   // does the actual file/line info fetching. Specific sanitizers may need this
1640b57cec5SDimitry Andric   // to distinguish system library calls made in user code from calls made
1650b57cec5SDimitry Andric   // during in-process symbolization.
1660b57cec5SDimitry Andric   typedef void (*StartSymbolizationHook)();
1670b57cec5SDimitry Andric   typedef void (*EndSymbolizationHook)();
1680b57cec5SDimitry Andric   // May be called at most once.
1690b57cec5SDimitry Andric   void AddHooks(StartSymbolizationHook start_hook,
1700b57cec5SDimitry Andric                 EndSymbolizationHook end_hook);
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   void RefreshModules();
1730b57cec5SDimitry Andric   const LoadedModule *FindModuleForAddress(uptr address);
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   void InvalidateModuleList();
1760b57cec5SDimitry Andric 
1775f757f3fSDimitry Andric   const ListOfModules &GetRefreshedListOfModules();
1785f757f3fSDimitry Andric 
1790b57cec5SDimitry Andric  private:
1800b57cec5SDimitry Andric   // GetModuleNameAndOffsetForPC has to return a string to the caller.
1810b57cec5SDimitry Andric   // Since the corresponding module might get unloaded later, we should create
1820b57cec5SDimitry Andric   // our owned copies of the strings that we can safely return.
1830b57cec5SDimitry Andric   // ModuleNameOwner does not provide any synchronization, thus calls to
1840b57cec5SDimitry Andric   // its method should be protected by |mu_|.
1850b57cec5SDimitry Andric   class ModuleNameOwner {
1860b57cec5SDimitry Andric    public:
ModuleNameOwner(Mutex * synchronized_by)187349cc55cSDimitry Andric     explicit ModuleNameOwner(Mutex *synchronized_by)
1880b57cec5SDimitry Andric         : last_match_(nullptr), mu_(synchronized_by) {
1890b57cec5SDimitry Andric       storage_.reserve(kInitialCapacity);
1900b57cec5SDimitry Andric     }
1910b57cec5SDimitry Andric     const char *GetOwnedCopy(const char *str);
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric    private:
1940b57cec5SDimitry Andric     static const uptr kInitialCapacity = 1000;
1950b57cec5SDimitry Andric     InternalMmapVector<const char*> storage_;
1960b57cec5SDimitry Andric     const char *last_match_;
1970b57cec5SDimitry Andric 
198349cc55cSDimitry Andric     Mutex *mu_;
1990b57cec5SDimitry Andric   } module_names_;
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric   /// Platform-specific function for creating a Symbolizer object.
2020b57cec5SDimitry Andric   static Symbolizer *PlatformInit();
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
2050b57cec5SDimitry Andric                                          uptr *module_offset,
2060b57cec5SDimitry Andric                                          ModuleArch *module_arch);
2070b57cec5SDimitry Andric   ListOfModules modules_;
2080b57cec5SDimitry Andric   ListOfModules fallback_modules_;
2090b57cec5SDimitry Andric   // If stale, need to reload the modules before looking up addresses.
2100b57cec5SDimitry Andric   bool modules_fresh_;
2110b57cec5SDimitry Andric 
2125f757f3fSDimitry Andric   // Platform-specific default demangler, returns nullptr on failure.
2130b57cec5SDimitry Andric   const char *PlatformDemangle(const char *name);
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   static Symbolizer *symbolizer_;
2160b57cec5SDimitry Andric   static StaticSpinMutex init_mu_;
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric   // Mutex locked from public methods of |Symbolizer|, so that the internals
2190b57cec5SDimitry Andric   // (including individual symbolizer tools and platform-specific methods) are
2200b57cec5SDimitry Andric   // always synchronized.
221349cc55cSDimitry Andric   Mutex mu_;
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   IntrusiveList<SymbolizerTool> tools_;
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   static LowLevelAllocator symbolizer_allocator_;
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric   StartSymbolizationHook start_hook_;
2300b57cec5SDimitry Andric   EndSymbolizationHook end_hook_;
2310b57cec5SDimitry Andric   class SymbolizerScope {
2320b57cec5SDimitry Andric    public:
2330b57cec5SDimitry Andric     explicit SymbolizerScope(const Symbolizer *sym);
2340b57cec5SDimitry Andric     ~SymbolizerScope();
2350b57cec5SDimitry Andric    private:
2360b57cec5SDimitry Andric     const Symbolizer *sym_;
2375f757f3fSDimitry Andric     int errno_;  // Backup errno in case symbolizer change the value.
2380b57cec5SDimitry Andric   };
2390b57cec5SDimitry Andric };
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric #ifdef SANITIZER_WINDOWS
2420b57cec5SDimitry Andric void InitializeDbgHelpIfNeeded();
2430b57cec5SDimitry Andric #endif
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric }  // namespace __sanitizer
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric #endif  // SANITIZER_SYMBOLIZER_H
248