1 //===------------------------- MicrosoftDemangle.h --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
10 #define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
11 
12 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
13 
14 #include <cassert>
15 #include <string_view>
16 #include <utility>
17 
18 namespace llvm {
19 namespace ms_demangle {
20 // This memory allocator is extremely fast, but it doesn't call dtors
21 // for allocated objects. That means you can't use STL containers
22 // (such as std::vector) with this allocator. But it pays off --
23 // the demangler is 3x faster with this allocator compared to one with
24 // STL containers.
25 constexpr size_t AllocUnit = 4096;
26 
27 class ArenaAllocator {
28   struct AllocatorNode {
29     uint8_t *Buf = nullptr;
30     size_t Used = 0;
31     size_t Capacity = 0;
32     AllocatorNode *Next = nullptr;
33   };
34 
35   void addNode(size_t Capacity) {
36     AllocatorNode *NewHead = new AllocatorNode;
37     NewHead->Buf = new uint8_t[Capacity];
38     NewHead->Next = Head;
39     NewHead->Capacity = Capacity;
40     Head = NewHead;
41     NewHead->Used = 0;
42   }
43 
44 public:
45   ArenaAllocator() { addNode(AllocUnit); }
46 
47   ~ArenaAllocator() {
48     while (Head) {
49       assert(Head->Buf);
50       delete[] Head->Buf;
51       AllocatorNode *Next = Head->Next;
52       delete Head;
53       Head = Next;
54     }
55   }
56 
57   char *allocUnalignedBuffer(size_t Size) {
58     assert(Head && Head->Buf);
59 
60     uint8_t *P = Head->Buf + Head->Used;
61 
62     Head->Used += Size;
63     if (Head->Used <= Head->Capacity)
64       return reinterpret_cast<char *>(P);
65 
66     addNode(std::max(AllocUnit, Size));
67     Head->Used = Size;
68     return reinterpret_cast<char *>(Head->Buf);
69   }
70 
71   template <typename T, typename... Args> T *allocArray(size_t Count) {
72     size_t Size = Count * sizeof(T);
73     assert(Head && Head->Buf);
74 
75     size_t P = (size_t)Head->Buf + Head->Used;
76     uintptr_t AlignedP =
77         (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
78     uint8_t *PP = (uint8_t *)AlignedP;
79     size_t Adjustment = AlignedP - P;
80 
81     Head->Used += Size + Adjustment;
82     if (Head->Used <= Head->Capacity)
83       return new (PP) T[Count]();
84 
85     addNode(std::max(AllocUnit, Size));
86     Head->Used = Size;
87     return new (Head->Buf) T[Count]();
88   }
89 
90   template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
91     constexpr size_t Size = sizeof(T);
92     assert(Head && Head->Buf);
93 
94     size_t P = (size_t)Head->Buf + Head->Used;
95     uintptr_t AlignedP =
96         (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
97     uint8_t *PP = (uint8_t *)AlignedP;
98     size_t Adjustment = AlignedP - P;
99 
100     Head->Used += Size + Adjustment;
101     if (Head->Used <= Head->Capacity)
102       return new (PP) T(std::forward<Args>(ConstructorArgs)...);
103 
104     static_assert(Size < AllocUnit);
105     addNode(AllocUnit);
106     Head->Used = Size;
107     return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
108   }
109 
110 private:
111   AllocatorNode *Head = nullptr;
112 };
113 
114 struct BackrefContext {
115   static constexpr size_t Max = 10;
116 
117   TypeNode *FunctionParams[Max];
118   size_t FunctionParamCount = 0;
119 
120   // The first 10 BackReferences in a mangled name can be back-referenced by
121   // special name @[0-9]. This is a storage for the first 10 BackReferences.
122   NamedIdentifierNode *Names[Max];
123   size_t NamesCount = 0;
124 };
125 
126 enum class QualifierMangleMode { Drop, Mangle, Result };
127 
128 enum NameBackrefBehavior : uint8_t {
129   NBB_None = 0,          // don't save any names as backrefs.
130   NBB_Template = 1 << 0, // save template instanations.
131   NBB_Simple = 1 << 1,   // save simple names.
132 };
133 
134 enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
135 
136 // Demangler class takes the main role in demangling symbols.
137 // It has a set of functions to parse mangled symbols into Type instances.
138 // It also has a set of functions to convert Type instances to strings.
139 class Demangler {
140 public:
141   Demangler() = default;
142   virtual ~Demangler() = default;
143 
144   // You are supposed to call parse() first and then check if error is true.  If
145   // it is false, call output() to write the formatted name to the given stream.
146   SymbolNode *parse(std::string_view &MangledName);
147 
148   TagTypeNode *parseTagUniqueName(std::string_view &MangledName);
149 
150   // True if an error occurred.
151   bool Error = false;
152 
153   void dumpBackReferences();
154 
155 private:
156   SymbolNode *demangleEncodedSymbol(std::string_view &MangledName,
157                                     QualifiedNameNode *QN);
158   SymbolNode *demangleDeclarator(std::string_view &MangledName);
159   SymbolNode *demangleMD5Name(std::string_view &MangledName);
160   SymbolNode *demangleTypeinfoName(std::string_view &MangledName);
161 
162   VariableSymbolNode *demangleVariableEncoding(std::string_view &MangledName,
163                                                StorageClass SC);
164   FunctionSymbolNode *demangleFunctionEncoding(std::string_view &MangledName);
165 
166   Qualifiers demanglePointerExtQualifiers(std::string_view &MangledName);
167 
168   // Parser functions. This is a recursive-descent parser.
169   TypeNode *demangleType(std::string_view &MangledName,
170                          QualifierMangleMode QMM);
171   PrimitiveTypeNode *demanglePrimitiveType(std::string_view &MangledName);
172   CustomTypeNode *demangleCustomType(std::string_view &MangledName);
173   TagTypeNode *demangleClassType(std::string_view &MangledName);
174   PointerTypeNode *demanglePointerType(std::string_view &MangledName);
175   PointerTypeNode *demangleMemberPointerType(std::string_view &MangledName);
176   FunctionSignatureNode *demangleFunctionType(std::string_view &MangledName,
177                                               bool HasThisQuals);
178 
179   ArrayTypeNode *demangleArrayType(std::string_view &MangledName);
180 
181   NodeArrayNode *demangleFunctionParameterList(std::string_view &MangledName,
182                                                bool &IsVariadic);
183   NodeArrayNode *demangleTemplateParameterList(std::string_view &MangledName);
184 
185   std::pair<uint64_t, bool> demangleNumber(std::string_view &MangledName);
186   uint64_t demangleUnsigned(std::string_view &MangledName);
187   int64_t demangleSigned(std::string_view &MangledName);
188 
189   void memorizeString(std::string_view s);
190   void memorizeIdentifier(IdentifierNode *Identifier);
191 
192   /// Allocate a copy of \p Borrowed into memory that we own.
193   std::string_view copyString(std::string_view Borrowed);
194 
195   QualifiedNameNode *
196   demangleFullyQualifiedTypeName(std::string_view &MangledName);
197   QualifiedNameNode *
198   demangleFullyQualifiedSymbolName(std::string_view &MangledName);
199 
200   IdentifierNode *demangleUnqualifiedTypeName(std::string_view &MangledName,
201                                               bool Memorize);
202   IdentifierNode *demangleUnqualifiedSymbolName(std::string_view &MangledName,
203                                                 NameBackrefBehavior NBB);
204 
205   QualifiedNameNode *demangleNameScopeChain(std::string_view &MangledName,
206                                             IdentifierNode *UnqualifiedName);
207   IdentifierNode *demangleNameScopePiece(std::string_view &MangledName);
208 
209   NamedIdentifierNode *demangleBackRefName(std::string_view &MangledName);
210   IdentifierNode *
211   demangleTemplateInstantiationName(std::string_view &MangledName,
212                                     NameBackrefBehavior NBB);
213   IntrinsicFunctionKind
214   translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group);
215   IdentifierNode *demangleFunctionIdentifierCode(std::string_view &MangledName);
216   IdentifierNode *
217   demangleFunctionIdentifierCode(std::string_view &MangledName,
218                                  FunctionIdentifierCodeGroup Group);
219   StructorIdentifierNode *
220   demangleStructorIdentifier(std::string_view &MangledName, bool IsDestructor);
221   ConversionOperatorIdentifierNode *
222   demangleConversionOperatorIdentifier(std::string_view &MangledName);
223   LiteralOperatorIdentifierNode *
224   demangleLiteralOperatorIdentifier(std::string_view &MangledName);
225 
226   SymbolNode *demangleSpecialIntrinsic(std::string_view &MangledName);
227   SpecialTableSymbolNode *
228   demangleSpecialTableSymbolNode(std::string_view &MangledName,
229                                  SpecialIntrinsicKind SIK);
230   LocalStaticGuardVariableNode *
231   demangleLocalStaticGuard(std::string_view &MangledName, bool IsThread);
232   VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
233                                               std::string_view &MangledName,
234                                               std::string_view VariableName);
235   VariableSymbolNode *
236   demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
237                                       std::string_view &MangledName);
238   FunctionSymbolNode *demangleInitFiniStub(std::string_view &MangledName,
239                                            bool IsDestructor);
240 
241   NamedIdentifierNode *demangleSimpleName(std::string_view &MangledName,
242                                           bool Memorize);
243   NamedIdentifierNode *
244   demangleAnonymousNamespaceName(std::string_view &MangledName);
245   NamedIdentifierNode *
246   demangleLocallyScopedNamePiece(std::string_view &MangledName);
247   EncodedStringLiteralNode *
248   demangleStringLiteral(std::string_view &MangledName);
249   FunctionSymbolNode *demangleVcallThunkNode(std::string_view &MangledName);
250 
251   std::string_view demangleSimpleString(std::string_view &MangledName,
252                                         bool Memorize);
253 
254   FuncClass demangleFunctionClass(std::string_view &MangledName);
255   CallingConv demangleCallingConvention(std::string_view &MangledName);
256   StorageClass demangleVariableStorageClass(std::string_view &MangledName);
257   bool demangleThrowSpecification(std::string_view &MangledName);
258   wchar_t demangleWcharLiteral(std::string_view &MangledName);
259   uint8_t demangleCharLiteral(std::string_view &MangledName);
260 
261   std::pair<Qualifiers, bool> demangleQualifiers(std::string_view &MangledName);
262 
263   // Memory allocator.
264   ArenaAllocator Arena;
265 
266   // A single type uses one global back-ref table for all function params.
267   // This means back-refs can even go "into" other types.  Examples:
268   //
269   //  // Second int* is a back-ref to first.
270   //  void foo(int *, int*);
271   //
272   //  // Second int* is not a back-ref to first (first is not a function param).
273   //  int* foo(int*);
274   //
275   //  // Second int* is a back-ref to first (ALL function types share the same
276   //  // back-ref map.
277   //  using F = void(*)(int*);
278   //  F G(int *);
279   BackrefContext Backrefs;
280 };
281 
282 } // namespace ms_demangle
283 } // namespace llvm
284 
285 #endif // LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
286