1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/Log.h" 17 #include "lldb/Utility/Logging.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Demangle/Demangle.h" 24 #include "llvm/Support/Compiler.h" 25 26 #include <mutex> 27 #include <string> 28 #include <utility> 29 30 #include <cstdlib> 31 #include <cstring> 32 using namespace lldb_private; 33 34 static inline bool cstring_is_mangled(llvm::StringRef s) { 35 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 36 } 37 38 #pragma mark Mangled 39 40 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 41 if (name.empty()) 42 return Mangled::eManglingSchemeNone; 43 44 if (name.startswith("?")) 45 return Mangled::eManglingSchemeMSVC; 46 47 if (name.startswith("_R")) 48 return Mangled::eManglingSchemeRustV0; 49 50 if (name.startswith("_D")) 51 return Mangled::eManglingSchemeD; 52 53 if (name.startswith("_Z")) 54 return Mangled::eManglingSchemeItanium; 55 56 // ___Z is a clang extension of block invocations 57 if (name.startswith("___Z")) 58 return Mangled::eManglingSchemeItanium; 59 60 return Mangled::eManglingSchemeNone; 61 } 62 63 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 64 if (s) 65 SetValue(s); 66 } 67 68 Mangled::Mangled(llvm::StringRef name) { 69 if (!name.empty()) 70 SetValue(ConstString(name)); 71 } 72 73 // Convert to bool operator. This allows code to check any Mangled objects 74 // to see if they contain anything valid using code such as: 75 // 76 // Mangled mangled(...); 77 // if (mangled) 78 // { ... 79 Mangled::operator bool() const { return m_mangled || m_demangled; } 80 81 // Clear the mangled and demangled values. 82 void Mangled::Clear() { 83 m_mangled.Clear(); 84 m_demangled.Clear(); 85 } 86 87 // Compare the string values. 88 int Mangled::Compare(const Mangled &a, const Mangled &b) { 89 return ConstString::Compare(a.GetName(ePreferMangled), 90 b.GetName(ePreferMangled)); 91 } 92 93 // Set the string value in this objects. If "mangled" is true, then the mangled 94 // named is set with the new value in "s", else the demangled name is set. 95 void Mangled::SetValue(ConstString s, bool mangled) { 96 if (s) { 97 if (mangled) { 98 m_demangled.Clear(); 99 m_mangled = s; 100 } else { 101 m_demangled = s; 102 m_mangled.Clear(); 103 } 104 } else { 105 m_demangled.Clear(); 106 m_mangled.Clear(); 107 } 108 } 109 110 void Mangled::SetValue(ConstString name) { 111 if (name) { 112 if (cstring_is_mangled(name.GetStringRef())) { 113 m_demangled.Clear(); 114 m_mangled = name; 115 } else { 116 m_demangled = name; 117 m_mangled.Clear(); 118 } 119 } else { 120 m_demangled.Clear(); 121 m_mangled.Clear(); 122 } 123 } 124 125 // Local helpers for different demangling implementations. 126 static char *GetMSVCDemangledStr(const char *M) { 127 char *demangled_cstr = llvm::microsoftDemangle( 128 M, nullptr, nullptr, nullptr, nullptr, 129 llvm::MSDemangleFlags( 130 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 131 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 132 133 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 134 if (demangled_cstr && demangled_cstr[0]) 135 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M, demangled_cstr); 136 else 137 LLDB_LOGF(log, "demangled msvc: %s -> error", M); 138 } 139 140 return demangled_cstr; 141 } 142 143 static char *GetItaniumDemangledStr(const char *M) { 144 char *demangled_cstr = nullptr; 145 146 llvm::ItaniumPartialDemangler ipd; 147 bool err = ipd.partialDemangle(M); 148 if (!err) { 149 // Default buffer and size (will realloc in case it's too small). 150 size_t demangled_size = 80; 151 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 152 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 153 154 assert(demangled_cstr && 155 "finishDemangle must always succeed if partialDemangle did"); 156 assert(demangled_cstr[demangled_size - 1] == '\0' && 157 "Expected demangled_size to return length including trailing null"); 158 } 159 160 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 161 if (demangled_cstr) 162 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 163 else 164 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 165 } 166 167 return demangled_cstr; 168 } 169 170 static char *GetRustV0DemangledStr(const char *M) { 171 char *demangled_cstr = llvm::rustDemangle(M, nullptr, nullptr, nullptr); 172 173 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 174 if (demangled_cstr && demangled_cstr[0]) 175 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 176 else 177 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M); 178 } 179 180 return demangled_cstr; 181 } 182 183 static char *GetDLangDemangledStr(const char *M) { 184 char *demangled_cstr = llvm::dlangDemangle(M); 185 186 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 187 if (demangled_cstr && demangled_cstr[0]) 188 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 189 else 190 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M); 191 } 192 193 return demangled_cstr; 194 } 195 196 // Explicit demangling for scheduled requests during batch processing. This 197 // makes use of ItaniumPartialDemangler's rich demangle info 198 bool Mangled::GetRichManglingInfo(RichManglingContext &context, 199 SkipMangledNameFn *skip_mangled_name) { 200 // Others are not meant to arrive here. ObjC names or C's main() for example 201 // have their names stored in m_demangled, while m_mangled is empty. 202 assert(m_mangled); 203 204 // Check whether or not we are interested in this name at all. 205 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 206 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 207 return false; 208 209 switch (scheme) { 210 case eManglingSchemeNone: 211 // The current mangled_name_filter would allow llvm_unreachable here. 212 return false; 213 214 case eManglingSchemeItanium: 215 // We want the rich mangling info here, so we don't care whether or not 216 // there is a demangled string in the pool already. 217 return context.FromItaniumName(m_mangled); 218 219 case eManglingSchemeMSVC: { 220 // We have no rich mangling for MSVC-mangled names yet, so first try to 221 // demangle it if necessary. 222 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 223 if (char *d = GetMSVCDemangledStr(m_mangled.GetCString())) { 224 // Without the rich mangling info we have to demangle the full name. 225 // Copy it to string pool and connect the counterparts to accelerate 226 // later access in GetDemangledName(). 227 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 228 m_mangled); 229 ::free(d); 230 } else { 231 m_demangled.SetCString(""); 232 } 233 } 234 235 if (m_demangled.IsEmpty()) { 236 // Cannot demangle it, so don't try parsing. 237 return false; 238 } else { 239 // Demangled successfully, we can try and parse it with 240 // CPlusPlusLanguage::MethodName. 241 return context.FromCxxMethodName(m_demangled); 242 } 243 } 244 245 case eManglingSchemeRustV0: 246 case eManglingSchemeD: 247 // Rich demangling scheme is not supported 248 return false; 249 } 250 llvm_unreachable("Fully covered switch above!"); 251 } 252 253 // Generate the demangled name on demand using this accessor. Code in this 254 // class will need to use this accessor if it wishes to decode the demangled 255 // name. The result is cached and will be kept until a new string value is 256 // supplied to this object, or until the end of the object's lifetime. 257 ConstString Mangled::GetDemangledName() const { 258 // Check to make sure we have a valid mangled name and that we haven't 259 // already decoded our mangled name. 260 if (m_mangled && m_demangled.IsNull()) { 261 // Don't bother running anything that isn't mangled 262 const char *mangled_name = m_mangled.GetCString(); 263 ManglingScheme mangling_scheme = 264 GetManglingScheme(m_mangled.GetStringRef()); 265 if (mangling_scheme != eManglingSchemeNone && 266 !m_mangled.GetMangledCounterpart(m_demangled)) { 267 // We didn't already mangle this name, demangle it and if all goes well 268 // add it to our map. 269 char *demangled_name = nullptr; 270 switch (mangling_scheme) { 271 case eManglingSchemeMSVC: 272 demangled_name = GetMSVCDemangledStr(mangled_name); 273 break; 274 case eManglingSchemeItanium: { 275 demangled_name = GetItaniumDemangledStr(mangled_name); 276 break; 277 } 278 case eManglingSchemeRustV0: 279 demangled_name = GetRustV0DemangledStr(mangled_name); 280 break; 281 case eManglingSchemeD: 282 demangled_name = GetDLangDemangledStr(mangled_name); 283 break; 284 case eManglingSchemeNone: 285 llvm_unreachable("eManglingSchemeNone was handled already"); 286 } 287 if (demangled_name) { 288 m_demangled.SetStringWithMangledCounterpart( 289 llvm::StringRef(demangled_name), m_mangled); 290 free(demangled_name); 291 } 292 } 293 if (m_demangled.IsNull()) { 294 // Set the demangled string to the empty string to indicate we tried to 295 // parse it once and failed. 296 m_demangled.SetCString(""); 297 } 298 } 299 300 return m_demangled; 301 } 302 303 ConstString Mangled::GetDisplayDemangledName() const { 304 return GetDemangledName(); 305 } 306 307 bool Mangled::NameMatches(const RegularExpression ®ex) const { 308 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 309 return true; 310 311 ConstString demangled = GetDemangledName(); 312 return demangled && regex.Execute(demangled.GetStringRef()); 313 } 314 315 // Get the demangled name if there is one, else return the mangled name. 316 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 317 if (preference == ePreferMangled && m_mangled) 318 return m_mangled; 319 320 // Call the accessor to make sure we get a demangled name in case it hasn't 321 // been demangled yet... 322 ConstString demangled = GetDemangledName(); 323 324 if (preference == ePreferDemangledWithoutArguments) { 325 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 326 return lang->GetDemangledFunctionNameWithoutArguments(*this); 327 } 328 } 329 if (preference == ePreferDemangled) { 330 if (demangled) 331 return demangled; 332 return m_mangled; 333 } 334 return demangled; 335 } 336 337 // Dump a Mangled object to stream "s". We don't force our demangled name to be 338 // computed currently (we don't use the accessor). 339 void Mangled::Dump(Stream *s) const { 340 if (m_mangled) { 341 *s << ", mangled = " << m_mangled; 342 } 343 if (m_demangled) { 344 const char *demangled = m_demangled.AsCString(); 345 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 346 } 347 } 348 349 // Dumps a debug version of this string with extra object and state information 350 // to stream "s". 351 void Mangled::DumpDebug(Stream *s) const { 352 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 353 static_cast<const void *>(this)); 354 m_mangled.DumpDebug(s); 355 s->Printf(", demangled = "); 356 m_demangled.DumpDebug(s); 357 } 358 359 // Return the size in byte that this object takes in memory. The size includes 360 // the size of the objects it owns, and not the strings that it references 361 // because they are shared strings. 362 size_t Mangled::MemorySize() const { 363 return m_mangled.MemorySize() + m_demangled.MemorySize(); 364 } 365 366 // We "guess" the language because we can't determine a symbol's language from 367 // it's name. For example, a Pascal symbol can be mangled using the C++ 368 // Itanium scheme, and defined in a compilation unit within the same module as 369 // other C++ units. In addition, different targets could have different ways 370 // of mangling names from a given language, likewise the compilation units 371 // within those targets. 372 lldb::LanguageType Mangled::GuessLanguage() const { 373 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 374 // Ask each language plugin to check if the mangled name belongs to it. 375 Language::ForEach([this, &result](Language *l) { 376 if (l->SymbolNameFitsToLanguage(*this)) { 377 result = l->GetLanguageType(); 378 return false; 379 } 380 return true; 381 }); 382 return result; 383 } 384 385 // Dump OBJ to the supplied stream S. 386 Stream &operator<<(Stream &s, const Mangled &obj) { 387 if (obj.GetMangledName()) 388 s << "mangled = '" << obj.GetMangledName() << "'"; 389 390 ConstString demangled = obj.GetDemangledName(); 391 if (demangled) 392 s << ", demangled = '" << demangled << '\''; 393 else 394 s << ", demangled = <error>"; 395 return s; 396 } 397 398 // When encoding Mangled objects we can get away with encoding as little 399 // information as is required. The enumeration below helps us to efficiently 400 // encode Mangled objects. 401 enum MangledEncoding { 402 /// If the Mangled object has neither a mangled name or demangled name we can 403 /// encode the object with one zero byte using the Empty enumeration. 404 Empty = 0u, 405 /// If the Mangled object has only a demangled name and no mangled named, we 406 /// can encode only the demangled name. 407 DemangledOnly = 1u, 408 /// If the mangle name can calculate the demangled name (it is the 409 /// mangled/demangled counterpart), then we only need to encode the mangled 410 /// name as the demangled name can be recomputed. 411 MangledOnly = 2u, 412 /// If we have a Mangled object with two different names that are not related 413 /// then we need to save both strings. This can happen if we have a name that 414 /// isn't a true mangled name, but we want to be able to lookup a symbol by 415 /// name and type in the symbol table. We do this for Objective C symbols like 416 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 417 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 418 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 419 /// would fail, but in these cases we want these unrelated names to be 420 /// preserved. 421 MangledAndDemangled = 3u 422 }; 423 424 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 425 const StringTableReader &strtab) { 426 m_mangled.Clear(); 427 m_demangled.Clear(); 428 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 429 switch (encoding) { 430 case Empty: 431 return true; 432 433 case DemangledOnly: 434 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 435 return true; 436 437 case MangledOnly: 438 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 439 return true; 440 441 case MangledAndDemangled: 442 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 443 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 444 return true; 445 } 446 return false; 447 } 448 /// The encoding format for the Mangled object is as follows: 449 /// 450 /// uint8_t encoding; 451 /// char str1[]; (only if DemangledOnly, MangledOnly) 452 /// char str2[]; (only if MangledAndDemangled) 453 /// 454 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 455 /// are only saved if we need them based on the encoding. 456 /// 457 /// Some mangled names have a mangled name that can be demangled by the built 458 /// in demanglers. These kinds of mangled objects know when the mangled and 459 /// demangled names are the counterparts for each other. This is done because 460 /// demangling is very expensive and avoiding demangling the same name twice 461 /// saves us a lot of compute time. For these kinds of names we only need to 462 /// save the mangled name and have the encoding set to "MangledOnly". 463 /// 464 /// If a mangled obejct has only a demangled name, then we save only that string 465 /// and have the encoding set to "DemangledOnly". 466 /// 467 /// Some mangled objects have both mangled and demangled names, but the 468 /// demangled name can not be computed from the mangled name. This is often used 469 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 470 /// names must be saved and the encoding is set to "MangledAndDemangled". 471 /// 472 /// For a Mangled object with no names, we only need to set the encoding to 473 /// "Empty" and not store any string values. 474 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 475 MangledEncoding encoding = Empty; 476 if (m_mangled) { 477 encoding = MangledOnly; 478 if (m_demangled) { 479 // We have both mangled and demangled names. If the demangled name is the 480 // counterpart of the mangled name, then we only need to save the mangled 481 // named. If they are different, we need to save both. 482 ConstString s; 483 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 484 encoding = MangledAndDemangled; 485 } 486 } else if (m_demangled) { 487 encoding = DemangledOnly; 488 } 489 file.AppendU8(encoding); 490 switch (encoding) { 491 case Empty: 492 break; 493 case DemangledOnly: 494 file.AppendU32(strtab.Add(m_demangled)); 495 break; 496 case MangledOnly: 497 file.AppendU32(strtab.Add(m_mangled)); 498 break; 499 case MangledAndDemangled: 500 file.AppendU32(strtab.Add(m_mangled)); 501 file.AppendU32(strtab.Add(m_demangled)); 502 break; 503 } 504 } 505