1 //===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Header for internal classes and functions to be used by implementations of
10 // symbolizers.
11 //
12 //===----------------------------------------------------------------------===//
13 #ifndef SANITIZER_SYMBOLIZER_INTERNAL_H
14 #define SANITIZER_SYMBOLIZER_INTERNAL_H
15 
16 #include "sanitizer_symbolizer.h"
17 #include "sanitizer_file.h"
18 #include "sanitizer_vector.h"
19 
20 namespace __sanitizer {
21 
22 // Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr
23 // is extracted. When extracting a string, a newly allocated (using
24 // InternalAlloc) and null-terminated buffer is returned. They return a pointer
25 // to the next characted after the found delimiter.
26 const char *ExtractToken(const char *str, const char *delims, char **result);
27 const char *ExtractInt(const char *str, const char *delims, int *result);
28 const char *ExtractUptr(const char *str, const char *delims, uptr *result);
29 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
30                                       char **result);
31 
32 const char *DemangleSwiftAndCXX(const char *name);
33 
34 // SymbolizerTool is an interface that is implemented by individual "tools"
35 // that can perform symbolication (external llvm-symbolizer, libbacktrace,
36 // Windows DbgHelp symbolizer, etc.).
37 class SymbolizerTool {
38  public:
39   // The main |Symbolizer| class implements a "fallback chain" of symbolizer
40   // tools. In a request to symbolize an address, if one tool returns false,
41   // the next tool in the chain will be tried.
42   SymbolizerTool *next;
43 
SymbolizerTool()44   SymbolizerTool() : next(nullptr) { }
45 
46   // Can't declare pure virtual functions in sanitizer runtimes:
47   // __cxa_pure_virtual might be unavailable.
48 
49   // The |stack| parameter is inout. It is pre-filled with the address,
50   // module base and module offset values and is to be used to construct
51   // other stack frames.
SymbolizePC(uptr addr,SymbolizedStack * stack)52   virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) {
53     UNIMPLEMENTED();
54   }
55 
56   // The |info| parameter is inout. It is pre-filled with the module base
57   // and module offset values.
SymbolizeData(uptr addr,DataInfo * info)58   virtual bool SymbolizeData(uptr addr, DataInfo *info) {
59     UNIMPLEMENTED();
60   }
61 
SymbolizeFrame(uptr addr,FrameInfo * info)62   virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) {
63     return false;
64   }
65 
Flush()66   virtual void Flush() {}
67 
68   // Return nullptr to fallback to the default platform-specific demangler.
Demangle(const char * name)69   virtual const char *Demangle(const char *name) {
70     return nullptr;
71   }
72 
73   // Called during the LateInitialize phase of Sanitizer initialization.
74   // Usually this is a safe place to call code that might need to use user
75   // memory allocators.
LateInitialize()76   virtual void LateInitialize() {}
77 
78  protected:
~SymbolizerTool()79   ~SymbolizerTool() {}
80 };
81 
82 // SymbolizerProcess encapsulates communication between the tool and
83 // external symbolizer program, running in a different subprocess.
84 // SymbolizerProcess may not be used from two threads simultaneously.
85 class SymbolizerProcess {
86  public:
87   explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false);
88   const char *SendCommand(const char *command);
89 
90  protected:
~SymbolizerProcess()91   ~SymbolizerProcess() {}
92 
93   /// The maximum number of arguments required to invoke a tool process.
94   static const unsigned kArgVMax = 6;
95 
96   // Customizable by subclasses.
97   virtual bool StartSymbolizerSubprocess();
98   virtual bool ReadFromSymbolizer(char *buffer, uptr max_length);
99   // Return the environment to run the symbolizer in.
GetEnvP()100   virtual char **GetEnvP() { return GetEnviron(); }
101 
102  private:
ReachedEndOfOutput(const char * buffer,uptr length)103   virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
104     UNIMPLEMENTED();
105   }
106 
107   /// Fill in an argv array to invoke the child process.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax])108   virtual void GetArgV(const char *path_to_binary,
109                        const char *(&argv)[kArgVMax]) const {
110     UNIMPLEMENTED();
111   }
112 
113   bool Restart();
114   const char *SendCommandImpl(const char *command);
115   bool WriteToSymbolizer(const char *buffer, uptr length);
116 
117   const char *path_;
118   fd_t input_fd_;
119   fd_t output_fd_;
120 
121   static const uptr kBufferSize = 16 * 1024;
122   char buffer_[kBufferSize];
123 
124   static const uptr kMaxTimesRestarted = 5;
125   static const int kSymbolizerStartupTimeMillis = 10;
126   uptr times_restarted_;
127   bool failed_to_start_;
128   bool reported_invalid_path_;
129   bool use_posix_spawn_;
130 };
131 
132 class LLVMSymbolizerProcess;
133 
134 // This tool invokes llvm-symbolizer in a subprocess. It should be as portable
135 // as the llvm-symbolizer tool is.
136 class LLVMSymbolizer final : public SymbolizerTool {
137  public:
138   explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator);
139 
140   bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
141   bool SymbolizeData(uptr addr, DataInfo *info) override;
142   bool SymbolizeFrame(uptr addr, FrameInfo *info) override;
143 
144  private:
145   const char *FormatAndSendCommand(const char *command_prefix,
146                                    const char *module_name, uptr module_offset,
147                                    ModuleArch arch);
148 
149   LLVMSymbolizerProcess *symbolizer_process_;
150   static const uptr kBufferSize = 16 * 1024;
151   char buffer_[kBufferSize];
152 };
153 
154 // Parses one or more two-line strings in the following format:
155 //   <function_name>
156 //   <file_name>:<line_number>[:<column_number>]
157 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
158 // them use the same output format.  Returns true if any useful debug
159 // information was found.
160 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
161 
162 // Parses a two-line string in the following format:
163 //   <symbol_name>
164 //   <start_address> <size>
165 // Used by LLVMSymbolizer and InternalSymbolizer.
166 void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
167 
168 }  // namespace __sanitizer
169 
170 #endif  // SANITIZER_SYMBOLIZER_INTERNAL_H
171