1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Mangled.h"
10 
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
21 
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Support/Compiler.h"
25 
26 #include <mutex>
27 #include <string>
28 #include <string_view>
29 #include <utility>
30 
31 #include <cstdlib>
32 #include <cstring>
33 using namespace lldb_private;
34 
cstring_is_mangled(llvm::StringRef s)35 static inline bool cstring_is_mangled(llvm::StringRef s) {
36   return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
37 }
38 
39 #pragma mark Mangled
40 
GetManglingScheme(llvm::StringRef const name)41 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
42   if (name.empty())
43     return Mangled::eManglingSchemeNone;
44 
45   if (name.starts_with("?"))
46     return Mangled::eManglingSchemeMSVC;
47 
48   if (name.starts_with("_R"))
49     return Mangled::eManglingSchemeRustV0;
50 
51   if (name.starts_with("_D"))
52     return Mangled::eManglingSchemeD;
53 
54   if (name.starts_with("_Z"))
55     return Mangled::eManglingSchemeItanium;
56 
57   // ___Z is a clang extension of block invocations
58   if (name.starts_with("___Z"))
59     return Mangled::eManglingSchemeItanium;
60 
61   // Swift's older style of mangling used "_T" as a mangling prefix. This can
62   // lead to false positives with other symbols that just so happen to start
63   // with "_T". To minimize the chance of that happening, we only return true
64   // for select old-style swift mangled names. The known cases are ObjC classes
65   // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
66   // Protocols are prefixed with "_TtP".
67   if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
68       name.starts_with("_TtP"))
69     return Mangled::eManglingSchemeSwift;
70 
71   // Swift 4.2 used "$S" and "_$S".
72   // Swift 5 and onward uses "$s" and "_$s".
73   // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
74   if (name.starts_with("$S") || name.starts_with("_$S") ||
75       name.starts_with("$s") || name.starts_with("_$s") ||
76       name.starts_with("@__swiftmacro_"))
77     return Mangled::eManglingSchemeSwift;
78 
79   return Mangled::eManglingSchemeNone;
80 }
81 
Mangled(ConstString s)82 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
83   if (s)
84     SetValue(s);
85 }
86 
Mangled(llvm::StringRef name)87 Mangled::Mangled(llvm::StringRef name) {
88   if (!name.empty())
89     SetValue(ConstString(name));
90 }
91 
92 // Convert to bool operator. This allows code to check any Mangled objects
93 // to see if they contain anything valid using code such as:
94 //
95 //  Mangled mangled(...);
96 //  if (mangled)
97 //  { ...
operator bool() const98 Mangled::operator bool() const { return m_mangled || m_demangled; }
99 
100 // Clear the mangled and demangled values.
Clear()101 void Mangled::Clear() {
102   m_mangled.Clear();
103   m_demangled.Clear();
104 }
105 
106 // Compare the string values.
Compare(const Mangled & a,const Mangled & b)107 int Mangled::Compare(const Mangled &a, const Mangled &b) {
108   return ConstString::Compare(a.GetName(ePreferMangled),
109                               b.GetName(ePreferMangled));
110 }
111 
SetValue(ConstString name)112 void Mangled::SetValue(ConstString name) {
113   if (name) {
114     if (cstring_is_mangled(name.GetStringRef())) {
115       m_demangled.Clear();
116       m_mangled = name;
117     } else {
118       m_demangled = name;
119       m_mangled.Clear();
120     }
121   } else {
122     m_demangled.Clear();
123     m_mangled.Clear();
124   }
125 }
126 
127 // Local helpers for different demangling implementations.
GetMSVCDemangledStr(std::string_view M)128 static char *GetMSVCDemangledStr(std::string_view M) {
129   char *demangled_cstr = llvm::microsoftDemangle(
130       M, nullptr, nullptr,
131       llvm::MSDemangleFlags(
132           llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
133           llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
134 
135   if (Log *log = GetLog(LLDBLog::Demangle)) {
136     if (demangled_cstr && demangled_cstr[0])
137       LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
138     else
139       LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
140   }
141 
142   return demangled_cstr;
143 }
144 
GetItaniumDemangledStr(const char * M)145 static char *GetItaniumDemangledStr(const char *M) {
146   char *demangled_cstr = nullptr;
147 
148   llvm::ItaniumPartialDemangler ipd;
149   bool err = ipd.partialDemangle(M);
150   if (!err) {
151     // Default buffer and size (will realloc in case it's too small).
152     size_t demangled_size = 80;
153     demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
154     demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
155 
156     assert(demangled_cstr &&
157            "finishDemangle must always succeed if partialDemangle did");
158     assert(demangled_cstr[demangled_size - 1] == '\0' &&
159            "Expected demangled_size to return length including trailing null");
160   }
161 
162   if (Log *log = GetLog(LLDBLog::Demangle)) {
163     if (demangled_cstr)
164       LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
165     else
166       LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
167   }
168 
169   return demangled_cstr;
170 }
171 
GetRustV0DemangledStr(std::string_view M)172 static char *GetRustV0DemangledStr(std::string_view M) {
173   char *demangled_cstr = llvm::rustDemangle(M);
174 
175   if (Log *log = GetLog(LLDBLog::Demangle)) {
176     if (demangled_cstr && demangled_cstr[0])
177       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
178     else
179       LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
180   }
181 
182   return demangled_cstr;
183 }
184 
GetDLangDemangledStr(std::string_view M)185 static char *GetDLangDemangledStr(std::string_view M) {
186   char *demangled_cstr = llvm::dlangDemangle(M);
187 
188   if (Log *log = GetLog(LLDBLog::Demangle)) {
189     if (demangled_cstr && demangled_cstr[0])
190       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
191     else
192       LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
193   }
194 
195   return demangled_cstr;
196 }
197 
198 // Explicit demangling for scheduled requests during batch processing. This
199 // makes use of ItaniumPartialDemangler's rich demangle info
GetRichManglingInfo(RichManglingContext & context,SkipMangledNameFn * skip_mangled_name)200 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
201                                   SkipMangledNameFn *skip_mangled_name) {
202   // Others are not meant to arrive here. ObjC names or C's main() for example
203   // have their names stored in m_demangled, while m_mangled is empty.
204   assert(m_mangled);
205 
206   // Check whether or not we are interested in this name at all.
207   ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
208   if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
209     return false;
210 
211   switch (scheme) {
212   case eManglingSchemeNone:
213     // The current mangled_name_filter would allow llvm_unreachable here.
214     return false;
215 
216   case eManglingSchemeItanium:
217     // We want the rich mangling info here, so we don't care whether or not
218     // there is a demangled string in the pool already.
219     return context.FromItaniumName(m_mangled);
220 
221   case eManglingSchemeMSVC: {
222     // We have no rich mangling for MSVC-mangled names yet, so first try to
223     // demangle it if necessary.
224     if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
225       if (char *d = GetMSVCDemangledStr(m_mangled)) {
226         // Without the rich mangling info we have to demangle the full name.
227         // Copy it to string pool and connect the counterparts to accelerate
228         // later access in GetDemangledName().
229         m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
230                                                     m_mangled);
231         ::free(d);
232       } else {
233         m_demangled.SetCString("");
234       }
235     }
236 
237     if (m_demangled.IsEmpty()) {
238       // Cannot demangle it, so don't try parsing.
239       return false;
240     } else {
241       // Demangled successfully, we can try and parse it with
242       // CPlusPlusLanguage::MethodName.
243       return context.FromCxxMethodName(m_demangled);
244     }
245   }
246 
247   case eManglingSchemeRustV0:
248   case eManglingSchemeD:
249   case eManglingSchemeSwift:
250     // Rich demangling scheme is not supported
251     return false;
252   }
253   llvm_unreachable("Fully covered switch above!");
254 }
255 
256 // Generate the demangled name on demand using this accessor. Code in this
257 // class will need to use this accessor if it wishes to decode the demangled
258 // name. The result is cached and will be kept until a new string value is
259 // supplied to this object, or until the end of the object's lifetime.
GetDemangledName() const260 ConstString Mangled::GetDemangledName() const {
261   // Check to make sure we have a valid mangled name and that we haven't
262   // already decoded our mangled name.
263   if (m_mangled && m_demangled.IsNull()) {
264     // Don't bother running anything that isn't mangled
265     const char *mangled_name = m_mangled.GetCString();
266     ManglingScheme mangling_scheme =
267         GetManglingScheme(m_mangled.GetStringRef());
268     if (mangling_scheme != eManglingSchemeNone &&
269         !m_mangled.GetMangledCounterpart(m_demangled)) {
270       // We didn't already mangle this name, demangle it and if all goes well
271       // add it to our map.
272       char *demangled_name = nullptr;
273       switch (mangling_scheme) {
274       case eManglingSchemeMSVC:
275         demangled_name = GetMSVCDemangledStr(mangled_name);
276         break;
277       case eManglingSchemeItanium: {
278         demangled_name = GetItaniumDemangledStr(mangled_name);
279         break;
280       }
281       case eManglingSchemeRustV0:
282         demangled_name = GetRustV0DemangledStr(m_mangled);
283         break;
284       case eManglingSchemeD:
285         demangled_name = GetDLangDemangledStr(m_mangled);
286         break;
287       case eManglingSchemeSwift:
288         // Demangling a swift name requires the swift compiler. This is
289         // explicitly unsupported on llvm.org.
290         break;
291       case eManglingSchemeNone:
292         llvm_unreachable("eManglingSchemeNone was handled already");
293       }
294       if (demangled_name) {
295         m_demangled.SetStringWithMangledCounterpart(
296             llvm::StringRef(demangled_name), m_mangled);
297         free(demangled_name);
298       }
299     }
300     if (m_demangled.IsNull()) {
301       // Set the demangled string to the empty string to indicate we tried to
302       // parse it once and failed.
303       m_demangled.SetCString("");
304     }
305   }
306 
307   return m_demangled;
308 }
309 
GetDisplayDemangledName() const310 ConstString Mangled::GetDisplayDemangledName() const {
311   return GetDemangledName();
312 }
313 
NameMatches(const RegularExpression & regex) const314 bool Mangled::NameMatches(const RegularExpression &regex) const {
315   if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
316     return true;
317 
318   ConstString demangled = GetDemangledName();
319   return demangled && regex.Execute(demangled.GetStringRef());
320 }
321 
322 // Get the demangled name if there is one, else return the mangled name.
GetName(Mangled::NamePreference preference) const323 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
324   if (preference == ePreferMangled && m_mangled)
325     return m_mangled;
326 
327   // Call the accessor to make sure we get a demangled name in case it hasn't
328   // been demangled yet...
329   ConstString demangled = GetDemangledName();
330 
331   if (preference == ePreferDemangledWithoutArguments) {
332     if (Language *lang = Language::FindPlugin(GuessLanguage())) {
333       return lang->GetDemangledFunctionNameWithoutArguments(*this);
334     }
335   }
336   if (preference == ePreferDemangled) {
337     if (demangled)
338       return demangled;
339     return m_mangled;
340   }
341   return demangled;
342 }
343 
344 // Dump a Mangled object to stream "s". We don't force our demangled name to be
345 // computed currently (we don't use the accessor).
Dump(Stream * s) const346 void Mangled::Dump(Stream *s) const {
347   if (m_mangled) {
348     *s << ", mangled = " << m_mangled;
349   }
350   if (m_demangled) {
351     const char *demangled = m_demangled.AsCString();
352     s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
353   }
354 }
355 
356 // Dumps a debug version of this string with extra object and state information
357 // to stream "s".
DumpDebug(Stream * s) const358 void Mangled::DumpDebug(Stream *s) const {
359   s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
360             static_cast<const void *>(this));
361   m_mangled.DumpDebug(s);
362   s->Printf(", demangled = ");
363   m_demangled.DumpDebug(s);
364 }
365 
366 // Return the size in byte that this object takes in memory. The size includes
367 // the size of the objects it owns, and not the strings that it references
368 // because they are shared strings.
MemorySize() const369 size_t Mangled::MemorySize() const {
370   return m_mangled.MemorySize() + m_demangled.MemorySize();
371 }
372 
373 // We "guess" the language because we can't determine a symbol's language from
374 // it's name.  For example, a Pascal symbol can be mangled using the C++
375 // Itanium scheme, and defined in a compilation unit within the same module as
376 // other C++ units.  In addition, different targets could have different ways
377 // of mangling names from a given language, likewise the compilation units
378 // within those targets.
GuessLanguage() const379 lldb::LanguageType Mangled::GuessLanguage() const {
380   lldb::LanguageType result = lldb::eLanguageTypeUnknown;
381   // Ask each language plugin to check if the mangled name belongs to it.
382   Language::ForEach([this, &result](Language *l) {
383     if (l->SymbolNameFitsToLanguage(*this)) {
384       result = l->GetLanguageType();
385       return false;
386     }
387     return true;
388   });
389   return result;
390 }
391 
392 // Dump OBJ to the supplied stream S.
operator <<(Stream & s,const Mangled & obj)393 Stream &operator<<(Stream &s, const Mangled &obj) {
394   if (obj.GetMangledName())
395     s << "mangled = '" << obj.GetMangledName() << "'";
396 
397   ConstString demangled = obj.GetDemangledName();
398   if (demangled)
399     s << ", demangled = '" << demangled << '\'';
400   else
401     s << ", demangled = <error>";
402   return s;
403 }
404 
405 // When encoding Mangled objects we can get away with encoding as little
406 // information as is required. The enumeration below helps us to efficiently
407 // encode Mangled objects.
408 enum MangledEncoding {
409   /// If the Mangled object has neither a mangled name or demangled name we can
410   /// encode the object with one zero byte using the Empty enumeration.
411   Empty = 0u,
412   /// If the Mangled object has only a demangled name and no mangled named, we
413   /// can encode only the demangled name.
414   DemangledOnly = 1u,
415   /// If the mangle name can calculate the demangled name (it is the
416   /// mangled/demangled counterpart), then we only need to encode the mangled
417   /// name as the demangled name can be recomputed.
418   MangledOnly = 2u,
419   /// If we have a Mangled object with two different names that are not related
420   /// then we need to save both strings. This can happen if we have a name that
421   /// isn't a true mangled name, but we want to be able to lookup a symbol by
422   /// name and type in the symbol table. We do this for Objective C symbols like
423   /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
424   /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
425   /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
426   /// would fail, but in these cases we want these unrelated names to be
427   /// preserved.
428   MangledAndDemangled = 3u
429 };
430 
Decode(const DataExtractor & data,lldb::offset_t * offset_ptr,const StringTableReader & strtab)431 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
432                      const StringTableReader &strtab) {
433   m_mangled.Clear();
434   m_demangled.Clear();
435   MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
436   switch (encoding) {
437     case Empty:
438       return true;
439 
440     case DemangledOnly:
441       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
442       return true;
443 
444     case MangledOnly:
445       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
446       return true;
447 
448     case MangledAndDemangled:
449       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
450       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
451       return true;
452   }
453   return false;
454 }
455 /// The encoding format for the Mangled object is as follows:
456 ///
457 /// uint8_t encoding;
458 /// char str1[]; (only if DemangledOnly, MangledOnly)
459 /// char str2[]; (only if MangledAndDemangled)
460 ///
461 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
462 /// are only saved if we need them based on the encoding.
463 ///
464 /// Some mangled names have a mangled name that can be demangled by the built
465 /// in demanglers. These kinds of mangled objects know when the mangled and
466 /// demangled names are the counterparts for each other. This is done because
467 /// demangling is very expensive and avoiding demangling the same name twice
468 /// saves us a lot of compute time. For these kinds of names we only need to
469 /// save the mangled name and have the encoding set to "MangledOnly".
470 ///
471 /// If a mangled obejct has only a demangled name, then we save only that string
472 /// and have the encoding set to "DemangledOnly".
473 ///
474 /// Some mangled objects have both mangled and demangled names, but the
475 /// demangled name can not be computed from the mangled name. This is often used
476 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
477 /// names must be saved and the encoding is set to "MangledAndDemangled".
478 ///
479 /// For a Mangled object with no names, we only need to set the encoding to
480 /// "Empty" and not store any string values.
Encode(DataEncoder & file,ConstStringTable & strtab) const481 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
482   MangledEncoding encoding = Empty;
483   if (m_mangled) {
484     encoding = MangledOnly;
485     if (m_demangled) {
486       // We have both mangled and demangled names. If the demangled name is the
487       // counterpart of the mangled name, then we only need to save the mangled
488       // named. If they are different, we need to save both.
489       ConstString s;
490       if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
491         encoding = MangledAndDemangled;
492     }
493   } else if (m_demangled) {
494     encoding = DemangledOnly;
495   }
496   file.AppendU8(encoding);
497   switch (encoding) {
498     case Empty:
499       break;
500     case DemangledOnly:
501       file.AppendU32(strtab.Add(m_demangled));
502       break;
503     case MangledOnly:
504       file.AppendU32(strtab.Add(m_mangled));
505       break;
506     case MangledAndDemangled:
507       file.AppendU32(strtab.Add(m_mangled));
508       file.AppendU32(strtab.Add(m_demangled));
509       break;
510   }
511 }
512