1 //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbolizer is used by sanitizers to map instruction address to a location in
10 // source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11 // defined in the program, or (if they are missing) tries to find and
12 // launch "llvm-symbolizer" commandline tool in a separate process and
13 // communicate with it.
14 //
15 // Generally we should try to avoid calling system library functions during
16 // symbolization (and use their replacements from sanitizer_libc.h instead).
17 //===----------------------------------------------------------------------===//
18 #ifndef SANITIZER_SYMBOLIZER_H
19 #define SANITIZER_SYMBOLIZER_H
20 
21 #include "sanitizer_common.h"
22 #include "sanitizer_mutex.h"
23 #include "sanitizer_vector.h"
24 
25 namespace __sanitizer {
26 
27 struct AddressInfo {
28   // Owns all the string members. Storage for them is
29   // (de)allocated using sanitizer internal allocator.
30   uptr address;
31 
32   char *module;
33   uptr module_offset;
34   ModuleArch module_arch;
35 
36   static const uptr kUnknown = ~(uptr)0;
37   char *function;
38   uptr function_offset;
39 
40   char *file;
41   int line;
42   int column;
43 
44   AddressInfo();
45   // Deletes all strings and resets all fields.
46   void Clear();
47   void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
48 };
49 
50 // Linked list of symbolized frames (each frame is described by AddressInfo).
51 struct SymbolizedStack {
52   SymbolizedStack *next;
53   AddressInfo info;
54   static SymbolizedStack *New(uptr addr);
55   // Deletes current, and all subsequent frames in the linked list.
56   // The object cannot be accessed after the call to this function.
57   void ClearAll();
58 
59  private:
60   SymbolizedStack();
61 };
62 
63 // For now, DataInfo is used to describe global variable.
64 struct DataInfo {
65   // Owns all the string members. Storage for them is
66   // (de)allocated using sanitizer internal allocator.
67   char *module;
68   uptr module_offset;
69   ModuleArch module_arch;
70 
71   char *file;
72   uptr line;
73   char *name;
74   uptr start;
75   uptr size;
76 
77   DataInfo();
78   void Clear();
79 };
80 
81 struct LocalInfo {
82   char *function_name = nullptr;
83   char *name = nullptr;
84   char *decl_file = nullptr;
85   unsigned decl_line = 0;
86 
87   bool has_frame_offset = false;
88   bool has_size = false;
89   bool has_tag_offset = false;
90 
91   sptr frame_offset;
92   uptr size;
93   uptr tag_offset;
94 
95   void Clear();
96 };
97 
98 struct FrameInfo {
99   char *module;
100   uptr module_offset;
101   ModuleArch module_arch;
102 
103   InternalMmapVector<LocalInfo> locals;
104   void Clear();
105 };
106 
107 class SymbolizerTool;
108 
109 class Symbolizer final {
110  public:
111   /// Initialize and return platform-specific implementation of symbolizer
112   /// (if it wasn't already initialized).
113   static Symbolizer *GetOrInit();
114   static void LateInitialize();
115   // Returns a list of symbolized frames for a given address (containing
116   // all inlined functions, if necessary).
117   SymbolizedStack *SymbolizePC(uptr address);
118   bool SymbolizeData(uptr address, DataInfo *info);
119   bool SymbolizeFrame(uptr address, FrameInfo *info);
120 
121   // The module names Symbolizer returns are stable and unique for every given
122   // module.  It is safe to store and compare them as pointers.
123   bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
124                                    uptr *module_address);
125   const char *GetModuleNameForPc(uptr pc) {
126     const char *module_name = nullptr;
127     uptr unused;
128     if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
129       return module_name;
130     return nullptr;
131   }
132 
133   // Release internal caches (if any).
134   void Flush();
135   // Attempts to demangle the provided C++ mangled name.
136   const char *Demangle(const char *name);
137 
138   // Allow user to install hooks that would be called before/after Symbolizer
139   // does the actual file/line info fetching. Specific sanitizers may need this
140   // to distinguish system library calls made in user code from calls made
141   // during in-process symbolization.
142   typedef void (*StartSymbolizationHook)();
143   typedef void (*EndSymbolizationHook)();
144   // May be called at most once.
145   void AddHooks(StartSymbolizationHook start_hook,
146                 EndSymbolizationHook end_hook);
147 
148   void RefreshModules();
149   const LoadedModule *FindModuleForAddress(uptr address);
150 
151   void InvalidateModuleList();
152 
153  private:
154   // GetModuleNameAndOffsetForPC has to return a string to the caller.
155   // Since the corresponding module might get unloaded later, we should create
156   // our owned copies of the strings that we can safely return.
157   // ModuleNameOwner does not provide any synchronization, thus calls to
158   // its method should be protected by |mu_|.
159   class ModuleNameOwner {
160    public:
161     explicit ModuleNameOwner(BlockingMutex *synchronized_by)
162         : last_match_(nullptr), mu_(synchronized_by) {
163       storage_.reserve(kInitialCapacity);
164     }
165     const char *GetOwnedCopy(const char *str);
166 
167    private:
168     static const uptr kInitialCapacity = 1000;
169     InternalMmapVector<const char*> storage_;
170     const char *last_match_;
171 
172     BlockingMutex *mu_;
173   } module_names_;
174 
175   /// Platform-specific function for creating a Symbolizer object.
176   static Symbolizer *PlatformInit();
177 
178   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
179                                          uptr *module_offset,
180                                          ModuleArch *module_arch);
181   ListOfModules modules_;
182   ListOfModules fallback_modules_;
183   // If stale, need to reload the modules before looking up addresses.
184   bool modules_fresh_;
185 
186   // Platform-specific default demangler, must not return nullptr.
187   const char *PlatformDemangle(const char *name);
188 
189   static Symbolizer *symbolizer_;
190   static StaticSpinMutex init_mu_;
191 
192   // Mutex locked from public methods of |Symbolizer|, so that the internals
193   // (including individual symbolizer tools and platform-specific methods) are
194   // always synchronized.
195   BlockingMutex mu_;
196 
197   IntrusiveList<SymbolizerTool> tools_;
198 
199   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
200 
201   static LowLevelAllocator symbolizer_allocator_;
202 
203   StartSymbolizationHook start_hook_;
204   EndSymbolizationHook end_hook_;
205   class SymbolizerScope {
206    public:
207     explicit SymbolizerScope(const Symbolizer *sym);
208     ~SymbolizerScope();
209    private:
210     const Symbolizer *sym_;
211   };
212 
213   // Calls `LateInitialize()` on all items in `tools_`.
214   void LateInitializeTools();
215 };
216 
217 #ifdef SANITIZER_WINDOWS
218 void InitializeDbgHelpIfNeeded();
219 #endif
220 
221 }  // namespace __sanitizer
222 
223 #endif  // SANITIZER_SYMBOLIZER_H
224