10b57cec5SDimitry Andric //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Symbolizer is used by sanitizers to map instruction address to a location in 100b57cec5SDimitry Andric // source code at run-time. Symbolizer either uses __sanitizer_symbolize_* 110b57cec5SDimitry Andric // defined in the program, or (if they are missing) tries to find and 120b57cec5SDimitry Andric // launch "llvm-symbolizer" commandline tool in a separate process and 130b57cec5SDimitry Andric // communicate with it. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // Generally we should try to avoid calling system library functions during 160b57cec5SDimitry Andric // symbolization (and use their replacements from sanitizer_libc.h instead). 170b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 180b57cec5SDimitry Andric #ifndef SANITIZER_SYMBOLIZER_H 190b57cec5SDimitry Andric #define SANITIZER_SYMBOLIZER_H 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric #include "sanitizer_common.h" 220b57cec5SDimitry Andric #include "sanitizer_mutex.h" 230b57cec5SDimitry Andric #include "sanitizer_vector.h" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric namespace __sanitizer { 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric struct AddressInfo { 280b57cec5SDimitry Andric // Owns all the string members. Storage for them is 290b57cec5SDimitry Andric // (de)allocated using sanitizer internal allocator. 300b57cec5SDimitry Andric uptr address; 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric char *module; 330b57cec5SDimitry Andric uptr module_offset; 340b57cec5SDimitry Andric ModuleArch module_arch; 350eae32dcSDimitry Andric u8 uuid[kModuleUUIDSize]; 360eae32dcSDimitry Andric uptr uuid_size; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric static const uptr kUnknown = ~(uptr)0; 390b57cec5SDimitry Andric char *function; 400b57cec5SDimitry Andric uptr function_offset; 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric char *file; 430b57cec5SDimitry Andric int line; 440b57cec5SDimitry Andric int column; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric AddressInfo(); 470b57cec5SDimitry Andric // Deletes all strings and resets all fields. 480b57cec5SDimitry Andric void Clear(); 490b57cec5SDimitry Andric void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch); 500eae32dcSDimitry Andric void FillModuleInfo(const LoadedModule &mod); module_baseAddressInfo510eae32dcSDimitry Andric uptr module_base() const { return address - module_offset; } 520b57cec5SDimitry Andric }; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric // Linked list of symbolized frames (each frame is described by AddressInfo). 550b57cec5SDimitry Andric struct SymbolizedStack { 560b57cec5SDimitry Andric SymbolizedStack *next; 570b57cec5SDimitry Andric AddressInfo info; 580b57cec5SDimitry Andric static SymbolizedStack *New(uptr addr); 590b57cec5SDimitry Andric // Deletes current, and all subsequent frames in the linked list. 600b57cec5SDimitry Andric // The object cannot be accessed after the call to this function. 610b57cec5SDimitry Andric void ClearAll(); 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric private: 640b57cec5SDimitry Andric SymbolizedStack(); 650b57cec5SDimitry Andric }; 660b57cec5SDimitry Andric 671db9f3b2SDimitry Andric class SymbolizedStackHolder { 681db9f3b2SDimitry Andric SymbolizedStack *Stack; 691db9f3b2SDimitry Andric clear()701db9f3b2SDimitry Andric void clear() { 711db9f3b2SDimitry Andric if (Stack) 721db9f3b2SDimitry Andric Stack->ClearAll(); 731db9f3b2SDimitry Andric } 741db9f3b2SDimitry Andric 751db9f3b2SDimitry Andric public: 761db9f3b2SDimitry Andric explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr) Stack(Stack)771db9f3b2SDimitry Andric : Stack(Stack) {} ~SymbolizedStackHolder()781db9f3b2SDimitry Andric ~SymbolizedStackHolder() { clear(); } 791db9f3b2SDimitry Andric void reset(SymbolizedStack *S = nullptr) { 801db9f3b2SDimitry Andric if (Stack != S) 811db9f3b2SDimitry Andric clear(); 821db9f3b2SDimitry Andric Stack = S; 831db9f3b2SDimitry Andric } get()841db9f3b2SDimitry Andric const SymbolizedStack *get() const { return Stack; } 851db9f3b2SDimitry Andric }; 861db9f3b2SDimitry Andric 870b57cec5SDimitry Andric // For now, DataInfo is used to describe global variable. 880b57cec5SDimitry Andric struct DataInfo { 890b57cec5SDimitry Andric // Owns all the string members. Storage for them is 900b57cec5SDimitry Andric // (de)allocated using sanitizer internal allocator. 910b57cec5SDimitry Andric char *module; 920b57cec5SDimitry Andric uptr module_offset; 930b57cec5SDimitry Andric ModuleArch module_arch; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric char *file; 960b57cec5SDimitry Andric uptr line; 970b57cec5SDimitry Andric char *name; 980b57cec5SDimitry Andric uptr start; 990b57cec5SDimitry Andric uptr size; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric DataInfo(); 1020b57cec5SDimitry Andric void Clear(); 1030b57cec5SDimitry Andric }; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric struct LocalInfo { 1060b57cec5SDimitry Andric char *function_name = nullptr; 1070b57cec5SDimitry Andric char *name = nullptr; 1080b57cec5SDimitry Andric char *decl_file = nullptr; 1090b57cec5SDimitry Andric unsigned decl_line = 0; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric bool has_frame_offset = false; 1120b57cec5SDimitry Andric bool has_size = false; 1130b57cec5SDimitry Andric bool has_tag_offset = false; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric sptr frame_offset; 1160b57cec5SDimitry Andric uptr size; 1170b57cec5SDimitry Andric uptr tag_offset; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric void Clear(); 1200b57cec5SDimitry Andric }; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric struct FrameInfo { 1230b57cec5SDimitry Andric char *module; 1240b57cec5SDimitry Andric uptr module_offset; 1250b57cec5SDimitry Andric ModuleArch module_arch; 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric InternalMmapVector<LocalInfo> locals; 1280b57cec5SDimitry Andric void Clear(); 1290b57cec5SDimitry Andric }; 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric class SymbolizerTool; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric class Symbolizer final { 1340b57cec5SDimitry Andric public: 1350b57cec5SDimitry Andric /// Initialize and return platform-specific implementation of symbolizer 1360b57cec5SDimitry Andric /// (if it wasn't already initialized). 1370b57cec5SDimitry Andric static Symbolizer *GetOrInit(); 1380b57cec5SDimitry Andric static void LateInitialize(); 1390b57cec5SDimitry Andric // Returns a list of symbolized frames for a given address (containing 1400b57cec5SDimitry Andric // all inlined functions, if necessary). 1410b57cec5SDimitry Andric SymbolizedStack *SymbolizePC(uptr address); 1420b57cec5SDimitry Andric bool SymbolizeData(uptr address, DataInfo *info); 1430b57cec5SDimitry Andric bool SymbolizeFrame(uptr address, FrameInfo *info); 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric // The module names Symbolizer returns are stable and unique for every given 1460b57cec5SDimitry Andric // module. It is safe to store and compare them as pointers. 1470b57cec5SDimitry Andric bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 1480b57cec5SDimitry Andric uptr *module_address); GetModuleNameForPc(uptr pc)1490b57cec5SDimitry Andric const char *GetModuleNameForPc(uptr pc) { 1500b57cec5SDimitry Andric const char *module_name = nullptr; 1510b57cec5SDimitry Andric uptr unused; 1520b57cec5SDimitry Andric if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused)) 1530b57cec5SDimitry Andric return module_name; 1540b57cec5SDimitry Andric return nullptr; 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // Release internal caches (if any). 1580b57cec5SDimitry Andric void Flush(); 1595f757f3fSDimitry Andric // Attempts to demangle the provided C++ mangled name. Never returns nullptr. 1600b57cec5SDimitry Andric const char *Demangle(const char *name); 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric // Allow user to install hooks that would be called before/after Symbolizer 1630b57cec5SDimitry Andric // does the actual file/line info fetching. Specific sanitizers may need this 1640b57cec5SDimitry Andric // to distinguish system library calls made in user code from calls made 1650b57cec5SDimitry Andric // during in-process symbolization. 1660b57cec5SDimitry Andric typedef void (*StartSymbolizationHook)(); 1670b57cec5SDimitry Andric typedef void (*EndSymbolizationHook)(); 1680b57cec5SDimitry Andric // May be called at most once. 1690b57cec5SDimitry Andric void AddHooks(StartSymbolizationHook start_hook, 1700b57cec5SDimitry Andric EndSymbolizationHook end_hook); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric void RefreshModules(); 1730b57cec5SDimitry Andric const LoadedModule *FindModuleForAddress(uptr address); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric void InvalidateModuleList(); 1760b57cec5SDimitry Andric 1775f757f3fSDimitry Andric const ListOfModules &GetRefreshedListOfModules(); 1785f757f3fSDimitry Andric 1790b57cec5SDimitry Andric private: 1800b57cec5SDimitry Andric // GetModuleNameAndOffsetForPC has to return a string to the caller. 1810b57cec5SDimitry Andric // Since the corresponding module might get unloaded later, we should create 1820b57cec5SDimitry Andric // our owned copies of the strings that we can safely return. 1830b57cec5SDimitry Andric // ModuleNameOwner does not provide any synchronization, thus calls to 1840b57cec5SDimitry Andric // its method should be protected by |mu_|. 1850b57cec5SDimitry Andric class ModuleNameOwner { 1860b57cec5SDimitry Andric public: ModuleNameOwner(Mutex * synchronized_by)187349cc55cSDimitry Andric explicit ModuleNameOwner(Mutex *synchronized_by) 1880b57cec5SDimitry Andric : last_match_(nullptr), mu_(synchronized_by) { 1890b57cec5SDimitry Andric storage_.reserve(kInitialCapacity); 1900b57cec5SDimitry Andric } 1910b57cec5SDimitry Andric const char *GetOwnedCopy(const char *str); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric private: 1940b57cec5SDimitry Andric static const uptr kInitialCapacity = 1000; 1950b57cec5SDimitry Andric InternalMmapVector<const char*> storage_; 1960b57cec5SDimitry Andric const char *last_match_; 1970b57cec5SDimitry Andric 198349cc55cSDimitry Andric Mutex *mu_; 1990b57cec5SDimitry Andric } module_names_; 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric /// Platform-specific function for creating a Symbolizer object. 2020b57cec5SDimitry Andric static Symbolizer *PlatformInit(); 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name, 2050b57cec5SDimitry Andric uptr *module_offset, 2060b57cec5SDimitry Andric ModuleArch *module_arch); 2070b57cec5SDimitry Andric ListOfModules modules_; 2080b57cec5SDimitry Andric ListOfModules fallback_modules_; 2090b57cec5SDimitry Andric // If stale, need to reload the modules before looking up addresses. 2100b57cec5SDimitry Andric bool modules_fresh_; 2110b57cec5SDimitry Andric 2125f757f3fSDimitry Andric // Platform-specific default demangler, returns nullptr on failure. 2130b57cec5SDimitry Andric const char *PlatformDemangle(const char *name); 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric static Symbolizer *symbolizer_; 2160b57cec5SDimitry Andric static StaticSpinMutex init_mu_; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric // Mutex locked from public methods of |Symbolizer|, so that the internals 2190b57cec5SDimitry Andric // (including individual symbolizer tools and platform-specific methods) are 2200b57cec5SDimitry Andric // always synchronized. 221349cc55cSDimitry Andric Mutex mu_; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric IntrusiveList<SymbolizerTool> tools_; 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric explicit Symbolizer(IntrusiveList<SymbolizerTool> tools); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric static LowLevelAllocator symbolizer_allocator_; 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric StartSymbolizationHook start_hook_; 2300b57cec5SDimitry Andric EndSymbolizationHook end_hook_; 2310b57cec5SDimitry Andric class SymbolizerScope { 2320b57cec5SDimitry Andric public: 2330b57cec5SDimitry Andric explicit SymbolizerScope(const Symbolizer *sym); 2340b57cec5SDimitry Andric ~SymbolizerScope(); 2350b57cec5SDimitry Andric private: 2360b57cec5SDimitry Andric const Symbolizer *sym_; 2375f757f3fSDimitry Andric int errno_; // Backup errno in case symbolizer change the value. 2380b57cec5SDimitry Andric }; 2390b57cec5SDimitry Andric }; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric #ifdef SANITIZER_WINDOWS 2420b57cec5SDimitry Andric void InitializeDbgHelpIfNeeded(); 2430b57cec5SDimitry Andric #endif 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric } // namespace __sanitizer 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric #endif // SANITIZER_SYMBOLIZER_H 248