1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "lldb/Core/Mangled.h"
10
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
21
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Support/Compiler.h"
25
26 #include <mutex>
27 #include <string>
28 #include <utility>
29
30 #include <cstdlib>
31 #include <cstring>
32 using namespace lldb_private;
33
cstring_is_mangled(llvm::StringRef s)34 static inline bool cstring_is_mangled(llvm::StringRef s) {
35 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
36 }
37
38 #pragma mark Mangled
39
GetManglingScheme(llvm::StringRef const name)40 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
41 if (name.empty())
42 return Mangled::eManglingSchemeNone;
43
44 if (name.startswith("?"))
45 return Mangled::eManglingSchemeMSVC;
46
47 if (name.startswith("_R"))
48 return Mangled::eManglingSchemeRustV0;
49
50 if (name.startswith("_D"))
51 return Mangled::eManglingSchemeD;
52
53 if (name.startswith("_Z"))
54 return Mangled::eManglingSchemeItanium;
55
56 // ___Z is a clang extension of block invocations
57 if (name.startswith("___Z"))
58 return Mangled::eManglingSchemeItanium;
59
60 return Mangled::eManglingSchemeNone;
61 }
62
Mangled(ConstString s)63 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
64 if (s)
65 SetValue(s);
66 }
67
Mangled(llvm::StringRef name)68 Mangled::Mangled(llvm::StringRef name) {
69 if (!name.empty())
70 SetValue(ConstString(name));
71 }
72
73 // Convert to bool operator. This allows code to check any Mangled objects
74 // to see if they contain anything valid using code such as:
75 //
76 // Mangled mangled(...);
77 // if (mangled)
78 // { ...
operator bool() const79 Mangled::operator bool() const { return m_mangled || m_demangled; }
80
81 // Clear the mangled and demangled values.
Clear()82 void Mangled::Clear() {
83 m_mangled.Clear();
84 m_demangled.Clear();
85 }
86
87 // Compare the string values.
Compare(const Mangled & a,const Mangled & b)88 int Mangled::Compare(const Mangled &a, const Mangled &b) {
89 return ConstString::Compare(a.GetName(ePreferMangled),
90 b.GetName(ePreferMangled));
91 }
92
93 // Set the string value in this objects. If "mangled" is true, then the mangled
94 // named is set with the new value in "s", else the demangled name is set.
SetValue(ConstString s,bool mangled)95 void Mangled::SetValue(ConstString s, bool mangled) {
96 if (s) {
97 if (mangled) {
98 m_demangled.Clear();
99 m_mangled = s;
100 } else {
101 m_demangled = s;
102 m_mangled.Clear();
103 }
104 } else {
105 m_demangled.Clear();
106 m_mangled.Clear();
107 }
108 }
109
SetValue(ConstString name)110 void Mangled::SetValue(ConstString name) {
111 if (name) {
112 if (cstring_is_mangled(name.GetStringRef())) {
113 m_demangled.Clear();
114 m_mangled = name;
115 } else {
116 m_demangled = name;
117 m_mangled.Clear();
118 }
119 } else {
120 m_demangled.Clear();
121 m_mangled.Clear();
122 }
123 }
124
125 // Local helpers for different demangling implementations.
GetMSVCDemangledStr(const char * M)126 static char *GetMSVCDemangledStr(const char *M) {
127 char *demangled_cstr = llvm::microsoftDemangle(
128 M, nullptr, nullptr, nullptr, nullptr,
129 llvm::MSDemangleFlags(
130 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
131 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
132
133 if (Log *log = GetLog(LLDBLog::Demangle)) {
134 if (demangled_cstr && demangled_cstr[0])
135 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M, demangled_cstr);
136 else
137 LLDB_LOGF(log, "demangled msvc: %s -> error", M);
138 }
139
140 return demangled_cstr;
141 }
142
GetItaniumDemangledStr(const char * M)143 static char *GetItaniumDemangledStr(const char *M) {
144 char *demangled_cstr = nullptr;
145
146 llvm::ItaniumPartialDemangler ipd;
147 bool err = ipd.partialDemangle(M);
148 if (!err) {
149 // Default buffer and size (will realloc in case it's too small).
150 size_t demangled_size = 80;
151 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
152 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
153
154 assert(demangled_cstr &&
155 "finishDemangle must always succeed if partialDemangle did");
156 assert(demangled_cstr[demangled_size - 1] == '\0' &&
157 "Expected demangled_size to return length including trailing null");
158 }
159
160 if (Log *log = GetLog(LLDBLog::Demangle)) {
161 if (demangled_cstr)
162 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
163 else
164 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
165 }
166
167 return demangled_cstr;
168 }
169
GetRustV0DemangledStr(const char * M)170 static char *GetRustV0DemangledStr(const char *M) {
171 char *demangled_cstr = llvm::rustDemangle(M);
172
173 if (Log *log = GetLog(LLDBLog::Demangle)) {
174 if (demangled_cstr && demangled_cstr[0])
175 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
176 else
177 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
178 }
179
180 return demangled_cstr;
181 }
182
GetDLangDemangledStr(const char * M)183 static char *GetDLangDemangledStr(const char *M) {
184 char *demangled_cstr = llvm::dlangDemangle(M);
185
186 if (Log *log = GetLog(LLDBLog::Demangle)) {
187 if (demangled_cstr && demangled_cstr[0])
188 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
189 else
190 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
191 }
192
193 return demangled_cstr;
194 }
195
196 // Explicit demangling for scheduled requests during batch processing. This
197 // makes use of ItaniumPartialDemangler's rich demangle info
GetRichManglingInfo(RichManglingContext & context,SkipMangledNameFn * skip_mangled_name)198 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
199 SkipMangledNameFn *skip_mangled_name) {
200 // Others are not meant to arrive here. ObjC names or C's main() for example
201 // have their names stored in m_demangled, while m_mangled is empty.
202 assert(m_mangled);
203
204 // Check whether or not we are interested in this name at all.
205 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
206 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
207 return false;
208
209 switch (scheme) {
210 case eManglingSchemeNone:
211 // The current mangled_name_filter would allow llvm_unreachable here.
212 return false;
213
214 case eManglingSchemeItanium:
215 // We want the rich mangling info here, so we don't care whether or not
216 // there is a demangled string in the pool already.
217 return context.FromItaniumName(m_mangled);
218
219 case eManglingSchemeMSVC: {
220 // We have no rich mangling for MSVC-mangled names yet, so first try to
221 // demangle it if necessary.
222 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
223 if (char *d = GetMSVCDemangledStr(m_mangled.GetCString())) {
224 // Without the rich mangling info we have to demangle the full name.
225 // Copy it to string pool and connect the counterparts to accelerate
226 // later access in GetDemangledName().
227 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
228 m_mangled);
229 ::free(d);
230 } else {
231 m_demangled.SetCString("");
232 }
233 }
234
235 if (m_demangled.IsEmpty()) {
236 // Cannot demangle it, so don't try parsing.
237 return false;
238 } else {
239 // Demangled successfully, we can try and parse it with
240 // CPlusPlusLanguage::MethodName.
241 return context.FromCxxMethodName(m_demangled);
242 }
243 }
244
245 case eManglingSchemeRustV0:
246 case eManglingSchemeD:
247 // Rich demangling scheme is not supported
248 return false;
249 }
250 llvm_unreachable("Fully covered switch above!");
251 }
252
253 // Generate the demangled name on demand using this accessor. Code in this
254 // class will need to use this accessor if it wishes to decode the demangled
255 // name. The result is cached and will be kept until a new string value is
256 // supplied to this object, or until the end of the object's lifetime.
GetDemangledName() const257 ConstString Mangled::GetDemangledName() const {
258 // Check to make sure we have a valid mangled name and that we haven't
259 // already decoded our mangled name.
260 if (m_mangled && m_demangled.IsNull()) {
261 // Don't bother running anything that isn't mangled
262 const char *mangled_name = m_mangled.GetCString();
263 ManglingScheme mangling_scheme =
264 GetManglingScheme(m_mangled.GetStringRef());
265 if (mangling_scheme != eManglingSchemeNone &&
266 !m_mangled.GetMangledCounterpart(m_demangled)) {
267 // We didn't already mangle this name, demangle it and if all goes well
268 // add it to our map.
269 char *demangled_name = nullptr;
270 switch (mangling_scheme) {
271 case eManglingSchemeMSVC:
272 demangled_name = GetMSVCDemangledStr(mangled_name);
273 break;
274 case eManglingSchemeItanium: {
275 demangled_name = GetItaniumDemangledStr(mangled_name);
276 break;
277 }
278 case eManglingSchemeRustV0:
279 demangled_name = GetRustV0DemangledStr(mangled_name);
280 break;
281 case eManglingSchemeD:
282 demangled_name = GetDLangDemangledStr(mangled_name);
283 break;
284 case eManglingSchemeNone:
285 llvm_unreachable("eManglingSchemeNone was handled already");
286 }
287 if (demangled_name) {
288 m_demangled.SetStringWithMangledCounterpart(
289 llvm::StringRef(demangled_name), m_mangled);
290 free(demangled_name);
291 }
292 }
293 if (m_demangled.IsNull()) {
294 // Set the demangled string to the empty string to indicate we tried to
295 // parse it once and failed.
296 m_demangled.SetCString("");
297 }
298 }
299
300 return m_demangled;
301 }
302
GetDisplayDemangledName() const303 ConstString Mangled::GetDisplayDemangledName() const {
304 return GetDemangledName();
305 }
306
NameMatches(const RegularExpression & regex) const307 bool Mangled::NameMatches(const RegularExpression ®ex) const {
308 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
309 return true;
310
311 ConstString demangled = GetDemangledName();
312 return demangled && regex.Execute(demangled.GetStringRef());
313 }
314
315 // Get the demangled name if there is one, else return the mangled name.
GetName(Mangled::NamePreference preference) const316 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
317 if (preference == ePreferMangled && m_mangled)
318 return m_mangled;
319
320 // Call the accessor to make sure we get a demangled name in case it hasn't
321 // been demangled yet...
322 ConstString demangled = GetDemangledName();
323
324 if (preference == ePreferDemangledWithoutArguments) {
325 if (Language *lang = Language::FindPlugin(GuessLanguage())) {
326 return lang->GetDemangledFunctionNameWithoutArguments(*this);
327 }
328 }
329 if (preference == ePreferDemangled) {
330 if (demangled)
331 return demangled;
332 return m_mangled;
333 }
334 return demangled;
335 }
336
337 // Dump a Mangled object to stream "s". We don't force our demangled name to be
338 // computed currently (we don't use the accessor).
Dump(Stream * s) const339 void Mangled::Dump(Stream *s) const {
340 if (m_mangled) {
341 *s << ", mangled = " << m_mangled;
342 }
343 if (m_demangled) {
344 const char *demangled = m_demangled.AsCString();
345 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
346 }
347 }
348
349 // Dumps a debug version of this string with extra object and state information
350 // to stream "s".
DumpDebug(Stream * s) const351 void Mangled::DumpDebug(Stream *s) const {
352 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
353 static_cast<const void *>(this));
354 m_mangled.DumpDebug(s);
355 s->Printf(", demangled = ");
356 m_demangled.DumpDebug(s);
357 }
358
359 // Return the size in byte that this object takes in memory. The size includes
360 // the size of the objects it owns, and not the strings that it references
361 // because they are shared strings.
MemorySize() const362 size_t Mangled::MemorySize() const {
363 return m_mangled.MemorySize() + m_demangled.MemorySize();
364 }
365
366 // We "guess" the language because we can't determine a symbol's language from
367 // it's name. For example, a Pascal symbol can be mangled using the C++
368 // Itanium scheme, and defined in a compilation unit within the same module as
369 // other C++ units. In addition, different targets could have different ways
370 // of mangling names from a given language, likewise the compilation units
371 // within those targets.
GuessLanguage() const372 lldb::LanguageType Mangled::GuessLanguage() const {
373 lldb::LanguageType result = lldb::eLanguageTypeUnknown;
374 // Ask each language plugin to check if the mangled name belongs to it.
375 Language::ForEach([this, &result](Language *l) {
376 if (l->SymbolNameFitsToLanguage(*this)) {
377 result = l->GetLanguageType();
378 return false;
379 }
380 return true;
381 });
382 return result;
383 }
384
385 // Dump OBJ to the supplied stream S.
operator <<(Stream & s,const Mangled & obj)386 Stream &operator<<(Stream &s, const Mangled &obj) {
387 if (obj.GetMangledName())
388 s << "mangled = '" << obj.GetMangledName() << "'";
389
390 ConstString demangled = obj.GetDemangledName();
391 if (demangled)
392 s << ", demangled = '" << demangled << '\'';
393 else
394 s << ", demangled = <error>";
395 return s;
396 }
397
398 // When encoding Mangled objects we can get away with encoding as little
399 // information as is required. The enumeration below helps us to efficiently
400 // encode Mangled objects.
401 enum MangledEncoding {
402 /// If the Mangled object has neither a mangled name or demangled name we can
403 /// encode the object with one zero byte using the Empty enumeration.
404 Empty = 0u,
405 /// If the Mangled object has only a demangled name and no mangled named, we
406 /// can encode only the demangled name.
407 DemangledOnly = 1u,
408 /// If the mangle name can calculate the demangled name (it is the
409 /// mangled/demangled counterpart), then we only need to encode the mangled
410 /// name as the demangled name can be recomputed.
411 MangledOnly = 2u,
412 /// If we have a Mangled object with two different names that are not related
413 /// then we need to save both strings. This can happen if we have a name that
414 /// isn't a true mangled name, but we want to be able to lookup a symbol by
415 /// name and type in the symbol table. We do this for Objective C symbols like
416 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
417 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
418 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
419 /// would fail, but in these cases we want these unrelated names to be
420 /// preserved.
421 MangledAndDemangled = 3u
422 };
423
Decode(const DataExtractor & data,lldb::offset_t * offset_ptr,const StringTableReader & strtab)424 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
425 const StringTableReader &strtab) {
426 m_mangled.Clear();
427 m_demangled.Clear();
428 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
429 switch (encoding) {
430 case Empty:
431 return true;
432
433 case DemangledOnly:
434 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
435 return true;
436
437 case MangledOnly:
438 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
439 return true;
440
441 case MangledAndDemangled:
442 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
443 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
444 return true;
445 }
446 return false;
447 }
448 /// The encoding format for the Mangled object is as follows:
449 ///
450 /// uint8_t encoding;
451 /// char str1[]; (only if DemangledOnly, MangledOnly)
452 /// char str2[]; (only if MangledAndDemangled)
453 ///
454 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
455 /// are only saved if we need them based on the encoding.
456 ///
457 /// Some mangled names have a mangled name that can be demangled by the built
458 /// in demanglers. These kinds of mangled objects know when the mangled and
459 /// demangled names are the counterparts for each other. This is done because
460 /// demangling is very expensive and avoiding demangling the same name twice
461 /// saves us a lot of compute time. For these kinds of names we only need to
462 /// save the mangled name and have the encoding set to "MangledOnly".
463 ///
464 /// If a mangled obejct has only a demangled name, then we save only that string
465 /// and have the encoding set to "DemangledOnly".
466 ///
467 /// Some mangled objects have both mangled and demangled names, but the
468 /// demangled name can not be computed from the mangled name. This is often used
469 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
470 /// names must be saved and the encoding is set to "MangledAndDemangled".
471 ///
472 /// For a Mangled object with no names, we only need to set the encoding to
473 /// "Empty" and not store any string values.
Encode(DataEncoder & file,ConstStringTable & strtab) const474 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
475 MangledEncoding encoding = Empty;
476 if (m_mangled) {
477 encoding = MangledOnly;
478 if (m_demangled) {
479 // We have both mangled and demangled names. If the demangled name is the
480 // counterpart of the mangled name, then we only need to save the mangled
481 // named. If they are different, we need to save both.
482 ConstString s;
483 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
484 encoding = MangledAndDemangled;
485 }
486 } else if (m_demangled) {
487 encoding = DemangledOnly;
488 }
489 file.AppendU8(encoding);
490 switch (encoding) {
491 case Empty:
492 break;
493 case DemangledOnly:
494 file.AppendU32(strtab.Add(m_demangled));
495 break;
496 case MangledOnly:
497 file.AppendU32(strtab.Add(m_mangled));
498 break;
499 case MangledAndDemangled:
500 file.AppendU32(strtab.Add(m_mangled));
501 file.AppendU32(strtab.Add(m_demangled));
502 break;
503 }
504 }
505