1 //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // Symbolizer is used by sanitizers to map instruction address to a location in
9 // source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
10 // defined in the program, or (if they are missing) tries to find and
11 // launch "llvm-symbolizer" commandline tool in a separate process and
12 // communicate with it.
13 //
14 // Generally we should try to avoid calling system library functions during
15 // symbolization (and use their replacements from sanitizer_libc.h instead).
16 //===----------------------------------------------------------------------===//
17 #ifndef SANITIZER_SYMBOLIZER_H
18 #define SANITIZER_SYMBOLIZER_H
19 
20 #include "sanitizer_common.h"
21 #include "sanitizer_mutex.h"
22 
23 namespace __sanitizer {
24 
25 struct AddressInfo {
26   // Owns all the string members. Storage for them is
27   // (de)allocated using sanitizer internal allocator.
28   uptr address;
29 
30   char *module;
31   uptr module_offset;
32   ModuleArch module_arch;
33 
34   static const uptr kUnknown = ~(uptr)0;
35   char *function;
36   uptr function_offset;
37 
38   char *file;
39   int line;
40   int column;
41 
42   AddressInfo();
43   // Deletes all strings and resets all fields.
44   void Clear();
45   void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
46 };
47 
48 // Linked list of symbolized frames (each frame is described by AddressInfo).
49 struct SymbolizedStack {
50   SymbolizedStack *next;
51   AddressInfo info;
52   static SymbolizedStack *New(uptr addr);
53   // Deletes current, and all subsequent frames in the linked list.
54   // The object cannot be accessed after the call to this function.
55   void ClearAll();
56 
57  private:
58   SymbolizedStack();
59 };
60 
61 // For now, DataInfo is used to describe global variable.
62 struct DataInfo {
63   // Owns all the string members. Storage for them is
64   // (de)allocated using sanitizer internal allocator.
65   char *module;
66   uptr module_offset;
67   ModuleArch module_arch;
68 
69   char *file;
70   uptr line;
71   char *name;
72   uptr start;
73   uptr size;
74 
75   DataInfo();
76   void Clear();
77 };
78 
79 class SymbolizerTool;
80 
81 class Symbolizer final {
82  public:
83   /// Initialize and return platform-specific implementation of symbolizer
84   /// (if it wasn't already initialized).
85   static Symbolizer *GetOrInit();
86   static void LateInitialize();
87   // Returns a list of symbolized frames for a given address (containing
88   // all inlined functions, if necessary).
89   SymbolizedStack *SymbolizePC(uptr address);
90   bool SymbolizeData(uptr address, DataInfo *info);
91 
92   // The module names Symbolizer returns are stable and unique for every given
93   // module.  It is safe to store and compare them as pointers.
94   bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
95                                    uptr *module_address);
GetModuleNameForPc(uptr pc)96   const char *GetModuleNameForPc(uptr pc) {
97     const char *module_name = nullptr;
98     uptr unused;
99     if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
100       return module_name;
101     return nullptr;
102   }
103 
104   // Release internal caches (if any).
105   void Flush();
106   // Attempts to demangle the provided C++ mangled name.
107   const char *Demangle(const char *name);
108   void PrepareForSandboxing();
109 
110   // Allow user to install hooks that would be called before/after Symbolizer
111   // does the actual file/line info fetching. Specific sanitizers may need this
112   // to distinguish system library calls made in user code from calls made
113   // during in-process symbolization.
114   typedef void (*StartSymbolizationHook)();
115   typedef void (*EndSymbolizationHook)();
116   // May be called at most once.
117   void AddHooks(StartSymbolizationHook start_hook,
118                 EndSymbolizationHook end_hook);
119 
120   void RefreshModules();
121   const LoadedModule *FindModuleForAddress(uptr address);
122 
123   void InvalidateModuleList();
124 
125  private:
126   // GetModuleNameAndOffsetForPC has to return a string to the caller.
127   // Since the corresponding module might get unloaded later, we should create
128   // our owned copies of the strings that we can safely return.
129   // ModuleNameOwner does not provide any synchronization, thus calls to
130   // its method should be protected by |mu_|.
131   class ModuleNameOwner {
132    public:
ModuleNameOwner(BlockingMutex * synchronized_by)133     explicit ModuleNameOwner(BlockingMutex *synchronized_by)
134         : storage_(kInitialCapacity), last_match_(nullptr),
135           mu_(synchronized_by) {}
136     const char *GetOwnedCopy(const char *str);
137 
138    private:
139     static const uptr kInitialCapacity = 1000;
140     InternalMmapVector<const char*> storage_;
141     const char *last_match_;
142 
143     BlockingMutex *mu_;
144   } module_names_;
145 
146   /// Platform-specific function for creating a Symbolizer object.
147   static Symbolizer *PlatformInit();
148 
149   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
150                                          uptr *module_offset,
151                                          ModuleArch *module_arch);
152   ListOfModules modules_;
153   ListOfModules fallback_modules_;
154   // If stale, need to reload the modules before looking up addresses.
155   bool modules_fresh_;
156 
157   // Platform-specific default demangler, must not return nullptr.
158   const char *PlatformDemangle(const char *name);
159   void PlatformPrepareForSandboxing();
160 
161   static Symbolizer *symbolizer_;
162   static StaticSpinMutex init_mu_;
163 
164   // Mutex locked from public methods of |Symbolizer|, so that the internals
165   // (including individual symbolizer tools and platform-specific methods) are
166   // always synchronized.
167   BlockingMutex mu_;
168 
169   IntrusiveList<SymbolizerTool> tools_;
170 
171   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
172 
173   static LowLevelAllocator symbolizer_allocator_;
174 
175   StartSymbolizationHook start_hook_;
176   EndSymbolizationHook end_hook_;
177   class SymbolizerScope {
178    public:
179     explicit SymbolizerScope(const Symbolizer *sym);
180     ~SymbolizerScope();
181    private:
182     const Symbolizer *sym_;
183   };
184 };
185 
186 #ifdef SANITIZER_WINDOWS
187 void InitializeDbgHelpIfNeeded();
188 #endif
189 
190 }  // namespace __sanitizer
191 
192 #endif  // SANITIZER_SYMBOLIZER_H
193