1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/LLDBLog.h" 17 #include "lldb/Utility/Log.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Demangle/Demangle.h" 24 #include "llvm/Support/Compiler.h" 25 26 #include <mutex> 27 #include <string> 28 #include <string_view> 29 #include <utility> 30 31 #include <cstdlib> 32 #include <cstring> 33 using namespace lldb_private; 34 35 static inline bool cstring_is_mangled(llvm::StringRef s) { 36 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 37 } 38 39 #pragma mark Mangled 40 41 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 42 if (name.empty()) 43 return Mangled::eManglingSchemeNone; 44 45 if (name.starts_with("?")) 46 return Mangled::eManglingSchemeMSVC; 47 48 if (name.starts_with("_R")) 49 return Mangled::eManglingSchemeRustV0; 50 51 if (name.starts_with("_D")) 52 return Mangled::eManglingSchemeD; 53 54 if (name.starts_with("_Z")) 55 return Mangled::eManglingSchemeItanium; 56 57 // ___Z is a clang extension of block invocations 58 if (name.starts_with("___Z")) 59 return Mangled::eManglingSchemeItanium; 60 61 // Swift's older style of mangling used "_T" as a mangling prefix. This can 62 // lead to false positives with other symbols that just so happen to start 63 // with "_T". To minimize the chance of that happening, we only return true 64 // for select old-style swift mangled names. The known cases are ObjC classes 65 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC". 66 // Protocols are prefixed with "_TtP". 67 if (name.starts_with("_TtC") || name.starts_with("_TtGC") || 68 name.starts_with("_TtP")) 69 return Mangled::eManglingSchemeSwift; 70 71 // Swift 4.2 used "$S" and "_$S". 72 // Swift 5 and onward uses "$s" and "_$s". 73 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames. 74 if (name.starts_with("$S") || name.starts_with("_$S") || 75 name.starts_with("$s") || name.starts_with("_$s") || 76 name.starts_with("@__swiftmacro_")) 77 return Mangled::eManglingSchemeSwift; 78 79 return Mangled::eManglingSchemeNone; 80 } 81 82 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 83 if (s) 84 SetValue(s); 85 } 86 87 Mangled::Mangled(llvm::StringRef name) { 88 if (!name.empty()) 89 SetValue(ConstString(name)); 90 } 91 92 // Convert to bool operator. This allows code to check any Mangled objects 93 // to see if they contain anything valid using code such as: 94 // 95 // Mangled mangled(...); 96 // if (mangled) 97 // { ... 98 Mangled::operator bool() const { return m_mangled || m_demangled; } 99 100 // Clear the mangled and demangled values. 101 void Mangled::Clear() { 102 m_mangled.Clear(); 103 m_demangled.Clear(); 104 } 105 106 // Compare the string values. 107 int Mangled::Compare(const Mangled &a, const Mangled &b) { 108 return ConstString::Compare(a.GetName(ePreferMangled), 109 b.GetName(ePreferMangled)); 110 } 111 112 void Mangled::SetValue(ConstString name) { 113 if (name) { 114 if (cstring_is_mangled(name.GetStringRef())) { 115 m_demangled.Clear(); 116 m_mangled = name; 117 } else { 118 m_demangled = name; 119 m_mangled.Clear(); 120 } 121 } else { 122 m_demangled.Clear(); 123 m_mangled.Clear(); 124 } 125 } 126 127 // Local helpers for different demangling implementations. 128 static char *GetMSVCDemangledStr(std::string_view M) { 129 char *demangled_cstr = llvm::microsoftDemangle( 130 M, nullptr, nullptr, 131 llvm::MSDemangleFlags( 132 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 133 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 134 135 if (Log *log = GetLog(LLDBLog::Demangle)) { 136 if (demangled_cstr && demangled_cstr[0]) 137 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr); 138 else 139 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data()); 140 } 141 142 return demangled_cstr; 143 } 144 145 static char *GetItaniumDemangledStr(const char *M) { 146 char *demangled_cstr = nullptr; 147 148 llvm::ItaniumPartialDemangler ipd; 149 bool err = ipd.partialDemangle(M); 150 if (!err) { 151 // Default buffer and size (will realloc in case it's too small). 152 size_t demangled_size = 80; 153 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 154 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 155 156 assert(demangled_cstr && 157 "finishDemangle must always succeed if partialDemangle did"); 158 assert(demangled_cstr[demangled_size - 1] == '\0' && 159 "Expected demangled_size to return length including trailing null"); 160 } 161 162 if (Log *log = GetLog(LLDBLog::Demangle)) { 163 if (demangled_cstr) 164 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 165 else 166 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 167 } 168 169 return demangled_cstr; 170 } 171 172 static char *GetRustV0DemangledStr(std::string_view M) { 173 char *demangled_cstr = llvm::rustDemangle(M); 174 175 if (Log *log = GetLog(LLDBLog::Demangle)) { 176 if (demangled_cstr && demangled_cstr[0]) 177 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 178 else 179 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M); 180 } 181 182 return demangled_cstr; 183 } 184 185 static char *GetDLangDemangledStr(std::string_view M) { 186 char *demangled_cstr = llvm::dlangDemangle(M); 187 188 if (Log *log = GetLog(LLDBLog::Demangle)) { 189 if (demangled_cstr && demangled_cstr[0]) 190 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 191 else 192 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M); 193 } 194 195 return demangled_cstr; 196 } 197 198 // Explicit demangling for scheduled requests during batch processing. This 199 // makes use of ItaniumPartialDemangler's rich demangle info 200 bool Mangled::GetRichManglingInfo(RichManglingContext &context, 201 SkipMangledNameFn *skip_mangled_name) { 202 // Others are not meant to arrive here. ObjC names or C's main() for example 203 // have their names stored in m_demangled, while m_mangled is empty. 204 assert(m_mangled); 205 206 // Check whether or not we are interested in this name at all. 207 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 208 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 209 return false; 210 211 switch (scheme) { 212 case eManglingSchemeNone: 213 // The current mangled_name_filter would allow llvm_unreachable here. 214 return false; 215 216 case eManglingSchemeItanium: 217 // We want the rich mangling info here, so we don't care whether or not 218 // there is a demangled string in the pool already. 219 return context.FromItaniumName(m_mangled); 220 221 case eManglingSchemeMSVC: { 222 // We have no rich mangling for MSVC-mangled names yet, so first try to 223 // demangle it if necessary. 224 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 225 if (char *d = GetMSVCDemangledStr(m_mangled)) { 226 // Without the rich mangling info we have to demangle the full name. 227 // Copy it to string pool and connect the counterparts to accelerate 228 // later access in GetDemangledName(). 229 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 230 m_mangled); 231 ::free(d); 232 } else { 233 m_demangled.SetCString(""); 234 } 235 } 236 237 if (m_demangled.IsEmpty()) { 238 // Cannot demangle it, so don't try parsing. 239 return false; 240 } else { 241 // Demangled successfully, we can try and parse it with 242 // CPlusPlusLanguage::MethodName. 243 return context.FromCxxMethodName(m_demangled); 244 } 245 } 246 247 case eManglingSchemeRustV0: 248 case eManglingSchemeD: 249 case eManglingSchemeSwift: 250 // Rich demangling scheme is not supported 251 return false; 252 } 253 llvm_unreachable("Fully covered switch above!"); 254 } 255 256 // Generate the demangled name on demand using this accessor. Code in this 257 // class will need to use this accessor if it wishes to decode the demangled 258 // name. The result is cached and will be kept until a new string value is 259 // supplied to this object, or until the end of the object's lifetime. 260 ConstString Mangled::GetDemangledName() const { 261 // Check to make sure we have a valid mangled name and that we haven't 262 // already decoded our mangled name. 263 if (m_mangled && m_demangled.IsNull()) { 264 // Don't bother running anything that isn't mangled 265 const char *mangled_name = m_mangled.GetCString(); 266 ManglingScheme mangling_scheme = 267 GetManglingScheme(m_mangled.GetStringRef()); 268 if (mangling_scheme != eManglingSchemeNone && 269 !m_mangled.GetMangledCounterpart(m_demangled)) { 270 // We didn't already mangle this name, demangle it and if all goes well 271 // add it to our map. 272 char *demangled_name = nullptr; 273 switch (mangling_scheme) { 274 case eManglingSchemeMSVC: 275 demangled_name = GetMSVCDemangledStr(mangled_name); 276 break; 277 case eManglingSchemeItanium: { 278 demangled_name = GetItaniumDemangledStr(mangled_name); 279 break; 280 } 281 case eManglingSchemeRustV0: 282 demangled_name = GetRustV0DemangledStr(m_mangled); 283 break; 284 case eManglingSchemeD: 285 demangled_name = GetDLangDemangledStr(m_mangled); 286 break; 287 case eManglingSchemeSwift: 288 // Demangling a swift name requires the swift compiler. This is 289 // explicitly unsupported on llvm.org. 290 break; 291 case eManglingSchemeNone: 292 llvm_unreachable("eManglingSchemeNone was handled already"); 293 } 294 if (demangled_name) { 295 m_demangled.SetStringWithMangledCounterpart( 296 llvm::StringRef(demangled_name), m_mangled); 297 free(demangled_name); 298 } 299 } 300 if (m_demangled.IsNull()) { 301 // Set the demangled string to the empty string to indicate we tried to 302 // parse it once and failed. 303 m_demangled.SetCString(""); 304 } 305 } 306 307 return m_demangled; 308 } 309 310 ConstString Mangled::GetDisplayDemangledName() const { 311 return GetDemangledName(); 312 } 313 314 bool Mangled::NameMatches(const RegularExpression ®ex) const { 315 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 316 return true; 317 318 ConstString demangled = GetDemangledName(); 319 return demangled && regex.Execute(demangled.GetStringRef()); 320 } 321 322 // Get the demangled name if there is one, else return the mangled name. 323 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 324 if (preference == ePreferMangled && m_mangled) 325 return m_mangled; 326 327 // Call the accessor to make sure we get a demangled name in case it hasn't 328 // been demangled yet... 329 ConstString demangled = GetDemangledName(); 330 331 if (preference == ePreferDemangledWithoutArguments) { 332 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 333 return lang->GetDemangledFunctionNameWithoutArguments(*this); 334 } 335 } 336 if (preference == ePreferDemangled) { 337 if (demangled) 338 return demangled; 339 return m_mangled; 340 } 341 return demangled; 342 } 343 344 // Dump a Mangled object to stream "s". We don't force our demangled name to be 345 // computed currently (we don't use the accessor). 346 void Mangled::Dump(Stream *s) const { 347 if (m_mangled) { 348 *s << ", mangled = " << m_mangled; 349 } 350 if (m_demangled) { 351 const char *demangled = m_demangled.AsCString(); 352 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 353 } 354 } 355 356 // Dumps a debug version of this string with extra object and state information 357 // to stream "s". 358 void Mangled::DumpDebug(Stream *s) const { 359 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 360 static_cast<const void *>(this)); 361 m_mangled.DumpDebug(s); 362 s->Printf(", demangled = "); 363 m_demangled.DumpDebug(s); 364 } 365 366 // Return the size in byte that this object takes in memory. The size includes 367 // the size of the objects it owns, and not the strings that it references 368 // because they are shared strings. 369 size_t Mangled::MemorySize() const { 370 return m_mangled.MemorySize() + m_demangled.MemorySize(); 371 } 372 373 // We "guess" the language because we can't determine a symbol's language from 374 // it's name. For example, a Pascal symbol can be mangled using the C++ 375 // Itanium scheme, and defined in a compilation unit within the same module as 376 // other C++ units. In addition, different targets could have different ways 377 // of mangling names from a given language, likewise the compilation units 378 // within those targets. 379 lldb::LanguageType Mangled::GuessLanguage() const { 380 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 381 // Ask each language plugin to check if the mangled name belongs to it. 382 Language::ForEach([this, &result](Language *l) { 383 if (l->SymbolNameFitsToLanguage(*this)) { 384 result = l->GetLanguageType(); 385 return false; 386 } 387 return true; 388 }); 389 return result; 390 } 391 392 // Dump OBJ to the supplied stream S. 393 Stream &operator<<(Stream &s, const Mangled &obj) { 394 if (obj.GetMangledName()) 395 s << "mangled = '" << obj.GetMangledName() << "'"; 396 397 ConstString demangled = obj.GetDemangledName(); 398 if (demangled) 399 s << ", demangled = '" << demangled << '\''; 400 else 401 s << ", demangled = <error>"; 402 return s; 403 } 404 405 // When encoding Mangled objects we can get away with encoding as little 406 // information as is required. The enumeration below helps us to efficiently 407 // encode Mangled objects. 408 enum MangledEncoding { 409 /// If the Mangled object has neither a mangled name or demangled name we can 410 /// encode the object with one zero byte using the Empty enumeration. 411 Empty = 0u, 412 /// If the Mangled object has only a demangled name and no mangled named, we 413 /// can encode only the demangled name. 414 DemangledOnly = 1u, 415 /// If the mangle name can calculate the demangled name (it is the 416 /// mangled/demangled counterpart), then we only need to encode the mangled 417 /// name as the demangled name can be recomputed. 418 MangledOnly = 2u, 419 /// If we have a Mangled object with two different names that are not related 420 /// then we need to save both strings. This can happen if we have a name that 421 /// isn't a true mangled name, but we want to be able to lookup a symbol by 422 /// name and type in the symbol table. We do this for Objective C symbols like 423 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 424 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 425 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 426 /// would fail, but in these cases we want these unrelated names to be 427 /// preserved. 428 MangledAndDemangled = 3u 429 }; 430 431 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 432 const StringTableReader &strtab) { 433 m_mangled.Clear(); 434 m_demangled.Clear(); 435 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 436 switch (encoding) { 437 case Empty: 438 return true; 439 440 case DemangledOnly: 441 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 442 return true; 443 444 case MangledOnly: 445 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 446 return true; 447 448 case MangledAndDemangled: 449 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 450 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 451 return true; 452 } 453 return false; 454 } 455 /// The encoding format for the Mangled object is as follows: 456 /// 457 /// uint8_t encoding; 458 /// char str1[]; (only if DemangledOnly, MangledOnly) 459 /// char str2[]; (only if MangledAndDemangled) 460 /// 461 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 462 /// are only saved if we need them based on the encoding. 463 /// 464 /// Some mangled names have a mangled name that can be demangled by the built 465 /// in demanglers. These kinds of mangled objects know when the mangled and 466 /// demangled names are the counterparts for each other. This is done because 467 /// demangling is very expensive and avoiding demangling the same name twice 468 /// saves us a lot of compute time. For these kinds of names we only need to 469 /// save the mangled name and have the encoding set to "MangledOnly". 470 /// 471 /// If a mangled obejct has only a demangled name, then we save only that string 472 /// and have the encoding set to "DemangledOnly". 473 /// 474 /// Some mangled objects have both mangled and demangled names, but the 475 /// demangled name can not be computed from the mangled name. This is often used 476 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 477 /// names must be saved and the encoding is set to "MangledAndDemangled". 478 /// 479 /// For a Mangled object with no names, we only need to set the encoding to 480 /// "Empty" and not store any string values. 481 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 482 MangledEncoding encoding = Empty; 483 if (m_mangled) { 484 encoding = MangledOnly; 485 if (m_demangled) { 486 // We have both mangled and demangled names. If the demangled name is the 487 // counterpart of the mangled name, then we only need to save the mangled 488 // named. If they are different, we need to save both. 489 ConstString s; 490 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 491 encoding = MangledAndDemangled; 492 } 493 } else if (m_demangled) { 494 encoding = DemangledOnly; 495 } 496 file.AppendU8(encoding); 497 switch (encoding) { 498 case Empty: 499 break; 500 case DemangledOnly: 501 file.AppendU32(strtab.Add(m_demangled)); 502 break; 503 case MangledOnly: 504 file.AppendU32(strtab.Add(m_mangled)); 505 break; 506 case MangledAndDemangled: 507 file.AppendU32(strtab.Add(m_mangled)); 508 file.AppendU32(strtab.Add(m_demangled)); 509 break; 510 } 511 } 512