1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Mangled.h"
10 
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
21 
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/Support/Compiler.h"
25 
26 #include <mutex>
27 #include <string>
28 #include <string_view>
29 #include <utility>
30 
31 #include <cstdlib>
32 #include <cstring>
33 using namespace lldb_private;
34 
35 static inline bool cstring_is_mangled(llvm::StringRef s) {
36   return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
37 }
38 
39 #pragma mark Mangled
40 
41 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
42   if (name.empty())
43     return Mangled::eManglingSchemeNone;
44 
45   if (name.startswith("?"))
46     return Mangled::eManglingSchemeMSVC;
47 
48   if (name.startswith("_R"))
49     return Mangled::eManglingSchemeRustV0;
50 
51   if (name.startswith("_D"))
52     return Mangled::eManglingSchemeD;
53 
54   if (name.startswith("_Z"))
55     return Mangled::eManglingSchemeItanium;
56 
57   // ___Z is a clang extension of block invocations
58   if (name.startswith("___Z"))
59     return Mangled::eManglingSchemeItanium;
60 
61   return Mangled::eManglingSchemeNone;
62 }
63 
64 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
65   if (s)
66     SetValue(s);
67 }
68 
69 Mangled::Mangled(llvm::StringRef name) {
70   if (!name.empty())
71     SetValue(ConstString(name));
72 }
73 
74 // Convert to bool operator. This allows code to check any Mangled objects
75 // to see if they contain anything valid using code such as:
76 //
77 //  Mangled mangled(...);
78 //  if (mangled)
79 //  { ...
80 Mangled::operator bool() const { return m_mangled || m_demangled; }
81 
82 // Clear the mangled and demangled values.
83 void Mangled::Clear() {
84   m_mangled.Clear();
85   m_demangled.Clear();
86 }
87 
88 // Compare the string values.
89 int Mangled::Compare(const Mangled &a, const Mangled &b) {
90   return ConstString::Compare(a.GetName(ePreferMangled),
91                               b.GetName(ePreferMangled));
92 }
93 
94 void Mangled::SetValue(ConstString name) {
95   if (name) {
96     if (cstring_is_mangled(name.GetStringRef())) {
97       m_demangled.Clear();
98       m_mangled = name;
99     } else {
100       m_demangled = name;
101       m_mangled.Clear();
102     }
103   } else {
104     m_demangled.Clear();
105     m_mangled.Clear();
106   }
107 }
108 
109 // Local helpers for different demangling implementations.
110 static char *GetMSVCDemangledStr(std::string_view M) {
111   char *demangled_cstr = llvm::microsoftDemangle(
112       M, nullptr, nullptr,
113       llvm::MSDemangleFlags(
114           llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
115           llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
116 
117   if (Log *log = GetLog(LLDBLog::Demangle)) {
118     if (demangled_cstr && demangled_cstr[0])
119       LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
120     else
121       LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
122   }
123 
124   return demangled_cstr;
125 }
126 
127 static char *GetItaniumDemangledStr(const char *M) {
128   char *demangled_cstr = nullptr;
129 
130   llvm::ItaniumPartialDemangler ipd;
131   bool err = ipd.partialDemangle(M);
132   if (!err) {
133     // Default buffer and size (will realloc in case it's too small).
134     size_t demangled_size = 80;
135     demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
136     demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
137 
138     assert(demangled_cstr &&
139            "finishDemangle must always succeed if partialDemangle did");
140     assert(demangled_cstr[demangled_size - 1] == '\0' &&
141            "Expected demangled_size to return length including trailing null");
142   }
143 
144   if (Log *log = GetLog(LLDBLog::Demangle)) {
145     if (demangled_cstr)
146       LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
147     else
148       LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
149   }
150 
151   return demangled_cstr;
152 }
153 
154 static char *GetRustV0DemangledStr(std::string_view M) {
155   char *demangled_cstr = llvm::rustDemangle(M);
156 
157   if (Log *log = GetLog(LLDBLog::Demangle)) {
158     if (demangled_cstr && demangled_cstr[0])
159       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
160     else
161       LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
162   }
163 
164   return demangled_cstr;
165 }
166 
167 static char *GetDLangDemangledStr(std::string_view M) {
168   char *demangled_cstr = llvm::dlangDemangle(M);
169 
170   if (Log *log = GetLog(LLDBLog::Demangle)) {
171     if (demangled_cstr && demangled_cstr[0])
172       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
173     else
174       LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
175   }
176 
177   return demangled_cstr;
178 }
179 
180 // Explicit demangling for scheduled requests during batch processing. This
181 // makes use of ItaniumPartialDemangler's rich demangle info
182 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
183                                   SkipMangledNameFn *skip_mangled_name) {
184   // Others are not meant to arrive here. ObjC names or C's main() for example
185   // have their names stored in m_demangled, while m_mangled is empty.
186   assert(m_mangled);
187 
188   // Check whether or not we are interested in this name at all.
189   ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
190   if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
191     return false;
192 
193   switch (scheme) {
194   case eManglingSchemeNone:
195     // The current mangled_name_filter would allow llvm_unreachable here.
196     return false;
197 
198   case eManglingSchemeItanium:
199     // We want the rich mangling info here, so we don't care whether or not
200     // there is a demangled string in the pool already.
201     return context.FromItaniumName(m_mangled);
202 
203   case eManglingSchemeMSVC: {
204     // We have no rich mangling for MSVC-mangled names yet, so first try to
205     // demangle it if necessary.
206     if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
207       if (char *d = GetMSVCDemangledStr(m_mangled)) {
208         // Without the rich mangling info we have to demangle the full name.
209         // Copy it to string pool and connect the counterparts to accelerate
210         // later access in GetDemangledName().
211         m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
212                                                     m_mangled);
213         ::free(d);
214       } else {
215         m_demangled.SetCString("");
216       }
217     }
218 
219     if (m_demangled.IsEmpty()) {
220       // Cannot demangle it, so don't try parsing.
221       return false;
222     } else {
223       // Demangled successfully, we can try and parse it with
224       // CPlusPlusLanguage::MethodName.
225       return context.FromCxxMethodName(m_demangled);
226     }
227   }
228 
229   case eManglingSchemeRustV0:
230   case eManglingSchemeD:
231     // Rich demangling scheme is not supported
232     return false;
233   }
234   llvm_unreachable("Fully covered switch above!");
235 }
236 
237 // Generate the demangled name on demand using this accessor. Code in this
238 // class will need to use this accessor if it wishes to decode the demangled
239 // name. The result is cached and will be kept until a new string value is
240 // supplied to this object, or until the end of the object's lifetime.
241 ConstString Mangled::GetDemangledName() const {
242   // Check to make sure we have a valid mangled name and that we haven't
243   // already decoded our mangled name.
244   if (m_mangled && m_demangled.IsNull()) {
245     // Don't bother running anything that isn't mangled
246     const char *mangled_name = m_mangled.GetCString();
247     ManglingScheme mangling_scheme =
248         GetManglingScheme(m_mangled.GetStringRef());
249     if (mangling_scheme != eManglingSchemeNone &&
250         !m_mangled.GetMangledCounterpart(m_demangled)) {
251       // We didn't already mangle this name, demangle it and if all goes well
252       // add it to our map.
253       char *demangled_name = nullptr;
254       switch (mangling_scheme) {
255       case eManglingSchemeMSVC:
256         demangled_name = GetMSVCDemangledStr(mangled_name);
257         break;
258       case eManglingSchemeItanium: {
259         demangled_name = GetItaniumDemangledStr(mangled_name);
260         break;
261       }
262       case eManglingSchemeRustV0:
263         demangled_name = GetRustV0DemangledStr(m_mangled);
264         break;
265       case eManglingSchemeD:
266         demangled_name = GetDLangDemangledStr(m_mangled);
267         break;
268       case eManglingSchemeNone:
269         llvm_unreachable("eManglingSchemeNone was handled already");
270       }
271       if (demangled_name) {
272         m_demangled.SetStringWithMangledCounterpart(
273             llvm::StringRef(demangled_name), m_mangled);
274         free(demangled_name);
275       }
276     }
277     if (m_demangled.IsNull()) {
278       // Set the demangled string to the empty string to indicate we tried to
279       // parse it once and failed.
280       m_demangled.SetCString("");
281     }
282   }
283 
284   return m_demangled;
285 }
286 
287 ConstString Mangled::GetDisplayDemangledName() const {
288   return GetDemangledName();
289 }
290 
291 bool Mangled::NameMatches(const RegularExpression &regex) const {
292   if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
293     return true;
294 
295   ConstString demangled = GetDemangledName();
296   return demangled && regex.Execute(demangled.GetStringRef());
297 }
298 
299 // Get the demangled name if there is one, else return the mangled name.
300 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
301   if (preference == ePreferMangled && m_mangled)
302     return m_mangled;
303 
304   // Call the accessor to make sure we get a demangled name in case it hasn't
305   // been demangled yet...
306   ConstString demangled = GetDemangledName();
307 
308   if (preference == ePreferDemangledWithoutArguments) {
309     if (Language *lang = Language::FindPlugin(GuessLanguage())) {
310       return lang->GetDemangledFunctionNameWithoutArguments(*this);
311     }
312   }
313   if (preference == ePreferDemangled) {
314     if (demangled)
315       return demangled;
316     return m_mangled;
317   }
318   return demangled;
319 }
320 
321 // Dump a Mangled object to stream "s". We don't force our demangled name to be
322 // computed currently (we don't use the accessor).
323 void Mangled::Dump(Stream *s) const {
324   if (m_mangled) {
325     *s << ", mangled = " << m_mangled;
326   }
327   if (m_demangled) {
328     const char *demangled = m_demangled.AsCString();
329     s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
330   }
331 }
332 
333 // Dumps a debug version of this string with extra object and state information
334 // to stream "s".
335 void Mangled::DumpDebug(Stream *s) const {
336   s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
337             static_cast<const void *>(this));
338   m_mangled.DumpDebug(s);
339   s->Printf(", demangled = ");
340   m_demangled.DumpDebug(s);
341 }
342 
343 // Return the size in byte that this object takes in memory. The size includes
344 // the size of the objects it owns, and not the strings that it references
345 // because they are shared strings.
346 size_t Mangled::MemorySize() const {
347   return m_mangled.MemorySize() + m_demangled.MemorySize();
348 }
349 
350 // We "guess" the language because we can't determine a symbol's language from
351 // it's name.  For example, a Pascal symbol can be mangled using the C++
352 // Itanium scheme, and defined in a compilation unit within the same module as
353 // other C++ units.  In addition, different targets could have different ways
354 // of mangling names from a given language, likewise the compilation units
355 // within those targets.
356 lldb::LanguageType Mangled::GuessLanguage() const {
357   lldb::LanguageType result = lldb::eLanguageTypeUnknown;
358   // Ask each language plugin to check if the mangled name belongs to it.
359   Language::ForEach([this, &result](Language *l) {
360     if (l->SymbolNameFitsToLanguage(*this)) {
361       result = l->GetLanguageType();
362       return false;
363     }
364     return true;
365   });
366   return result;
367 }
368 
369 // Dump OBJ to the supplied stream S.
370 Stream &operator<<(Stream &s, const Mangled &obj) {
371   if (obj.GetMangledName())
372     s << "mangled = '" << obj.GetMangledName() << "'";
373 
374   ConstString demangled = obj.GetDemangledName();
375   if (demangled)
376     s << ", demangled = '" << demangled << '\'';
377   else
378     s << ", demangled = <error>";
379   return s;
380 }
381 
382 // When encoding Mangled objects we can get away with encoding as little
383 // information as is required. The enumeration below helps us to efficiently
384 // encode Mangled objects.
385 enum MangledEncoding {
386   /// If the Mangled object has neither a mangled name or demangled name we can
387   /// encode the object with one zero byte using the Empty enumeration.
388   Empty = 0u,
389   /// If the Mangled object has only a demangled name and no mangled named, we
390   /// can encode only the demangled name.
391   DemangledOnly = 1u,
392   /// If the mangle name can calculate the demangled name (it is the
393   /// mangled/demangled counterpart), then we only need to encode the mangled
394   /// name as the demangled name can be recomputed.
395   MangledOnly = 2u,
396   /// If we have a Mangled object with two different names that are not related
397   /// then we need to save both strings. This can happen if we have a name that
398   /// isn't a true mangled name, but we want to be able to lookup a symbol by
399   /// name and type in the symbol table. We do this for Objective C symbols like
400   /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
401   /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
402   /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
403   /// would fail, but in these cases we want these unrelated names to be
404   /// preserved.
405   MangledAndDemangled = 3u
406 };
407 
408 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
409                      const StringTableReader &strtab) {
410   m_mangled.Clear();
411   m_demangled.Clear();
412   MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
413   switch (encoding) {
414     case Empty:
415       return true;
416 
417     case DemangledOnly:
418       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
419       return true;
420 
421     case MangledOnly:
422       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
423       return true;
424 
425     case MangledAndDemangled:
426       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
427       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
428       return true;
429   }
430   return false;
431 }
432 /// The encoding format for the Mangled object is as follows:
433 ///
434 /// uint8_t encoding;
435 /// char str1[]; (only if DemangledOnly, MangledOnly)
436 /// char str2[]; (only if MangledAndDemangled)
437 ///
438 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
439 /// are only saved if we need them based on the encoding.
440 ///
441 /// Some mangled names have a mangled name that can be demangled by the built
442 /// in demanglers. These kinds of mangled objects know when the mangled and
443 /// demangled names are the counterparts for each other. This is done because
444 /// demangling is very expensive and avoiding demangling the same name twice
445 /// saves us a lot of compute time. For these kinds of names we only need to
446 /// save the mangled name and have the encoding set to "MangledOnly".
447 ///
448 /// If a mangled obejct has only a demangled name, then we save only that string
449 /// and have the encoding set to "DemangledOnly".
450 ///
451 /// Some mangled objects have both mangled and demangled names, but the
452 /// demangled name can not be computed from the mangled name. This is often used
453 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
454 /// names must be saved and the encoding is set to "MangledAndDemangled".
455 ///
456 /// For a Mangled object with no names, we only need to set the encoding to
457 /// "Empty" and not store any string values.
458 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
459   MangledEncoding encoding = Empty;
460   if (m_mangled) {
461     encoding = MangledOnly;
462     if (m_demangled) {
463       // We have both mangled and demangled names. If the demangled name is the
464       // counterpart of the mangled name, then we only need to save the mangled
465       // named. If they are different, we need to save both.
466       ConstString s;
467       if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
468         encoding = MangledAndDemangled;
469     }
470   } else if (m_demangled) {
471     encoding = DemangledOnly;
472   }
473   file.AppendU8(encoding);
474   switch (encoding) {
475     case Empty:
476       break;
477     case DemangledOnly:
478       file.AppendU32(strtab.Add(m_demangled));
479       break;
480     case MangledOnly:
481       file.AppendU32(strtab.Add(m_mangled));
482       break;
483     case MangledAndDemangled:
484       file.AppendU32(strtab.Add(m_mangled));
485       file.AppendU32(strtab.Add(m_demangled));
486       break;
487   }
488 }
489