1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/LLDBLog.h" 17 #include "lldb/Utility/Log.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Demangle/Demangle.h" 24 #include "llvm/Support/Compiler.h" 25 26 #include <mutex> 27 #include <string> 28 #include <string_view> 29 #include <utility> 30 31 #include <cstdlib> 32 #include <cstring> 33 using namespace lldb_private; 34 35 static inline bool cstring_is_mangled(llvm::StringRef s) { 36 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 37 } 38 39 #pragma mark Mangled 40 41 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 42 if (name.empty()) 43 return Mangled::eManglingSchemeNone; 44 45 if (name.startswith("?")) 46 return Mangled::eManglingSchemeMSVC; 47 48 if (name.startswith("_R")) 49 return Mangled::eManglingSchemeRustV0; 50 51 if (name.startswith("_D")) 52 return Mangled::eManglingSchemeD; 53 54 if (name.startswith("_Z")) 55 return Mangled::eManglingSchemeItanium; 56 57 // ___Z is a clang extension of block invocations 58 if (name.startswith("___Z")) 59 return Mangled::eManglingSchemeItanium; 60 61 return Mangled::eManglingSchemeNone; 62 } 63 64 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 65 if (s) 66 SetValue(s); 67 } 68 69 Mangled::Mangled(llvm::StringRef name) { 70 if (!name.empty()) 71 SetValue(ConstString(name)); 72 } 73 74 // Convert to bool operator. This allows code to check any Mangled objects 75 // to see if they contain anything valid using code such as: 76 // 77 // Mangled mangled(...); 78 // if (mangled) 79 // { ... 80 Mangled::operator bool() const { return m_mangled || m_demangled; } 81 82 // Clear the mangled and demangled values. 83 void Mangled::Clear() { 84 m_mangled.Clear(); 85 m_demangled.Clear(); 86 } 87 88 // Compare the string values. 89 int Mangled::Compare(const Mangled &a, const Mangled &b) { 90 return ConstString::Compare(a.GetName(ePreferMangled), 91 b.GetName(ePreferMangled)); 92 } 93 94 void Mangled::SetValue(ConstString name) { 95 if (name) { 96 if (cstring_is_mangled(name.GetStringRef())) { 97 m_demangled.Clear(); 98 m_mangled = name; 99 } else { 100 m_demangled = name; 101 m_mangled.Clear(); 102 } 103 } else { 104 m_demangled.Clear(); 105 m_mangled.Clear(); 106 } 107 } 108 109 // Local helpers for different demangling implementations. 110 static char *GetMSVCDemangledStr(std::string_view M) { 111 char *demangled_cstr = llvm::microsoftDemangle( 112 M, nullptr, nullptr, 113 llvm::MSDemangleFlags( 114 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 115 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 116 117 if (Log *log = GetLog(LLDBLog::Demangle)) { 118 if (demangled_cstr && demangled_cstr[0]) 119 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr); 120 else 121 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data()); 122 } 123 124 return demangled_cstr; 125 } 126 127 static char *GetItaniumDemangledStr(const char *M) { 128 char *demangled_cstr = nullptr; 129 130 llvm::ItaniumPartialDemangler ipd; 131 bool err = ipd.partialDemangle(M); 132 if (!err) { 133 // Default buffer and size (will realloc in case it's too small). 134 size_t demangled_size = 80; 135 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 136 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 137 138 assert(demangled_cstr && 139 "finishDemangle must always succeed if partialDemangle did"); 140 assert(demangled_cstr[demangled_size - 1] == '\0' && 141 "Expected demangled_size to return length including trailing null"); 142 } 143 144 if (Log *log = GetLog(LLDBLog::Demangle)) { 145 if (demangled_cstr) 146 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 147 else 148 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 149 } 150 151 return demangled_cstr; 152 } 153 154 static char *GetRustV0DemangledStr(std::string_view M) { 155 char *demangled_cstr = llvm::rustDemangle(M); 156 157 if (Log *log = GetLog(LLDBLog::Demangle)) { 158 if (demangled_cstr && demangled_cstr[0]) 159 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 160 else 161 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M); 162 } 163 164 return demangled_cstr; 165 } 166 167 static char *GetDLangDemangledStr(std::string_view M) { 168 char *demangled_cstr = llvm::dlangDemangle(M); 169 170 if (Log *log = GetLog(LLDBLog::Demangle)) { 171 if (demangled_cstr && demangled_cstr[0]) 172 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 173 else 174 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M); 175 } 176 177 return demangled_cstr; 178 } 179 180 // Explicit demangling for scheduled requests during batch processing. This 181 // makes use of ItaniumPartialDemangler's rich demangle info 182 bool Mangled::GetRichManglingInfo(RichManglingContext &context, 183 SkipMangledNameFn *skip_mangled_name) { 184 // Others are not meant to arrive here. ObjC names or C's main() for example 185 // have their names stored in m_demangled, while m_mangled is empty. 186 assert(m_mangled); 187 188 // Check whether or not we are interested in this name at all. 189 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 190 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 191 return false; 192 193 switch (scheme) { 194 case eManglingSchemeNone: 195 // The current mangled_name_filter would allow llvm_unreachable here. 196 return false; 197 198 case eManglingSchemeItanium: 199 // We want the rich mangling info here, so we don't care whether or not 200 // there is a demangled string in the pool already. 201 return context.FromItaniumName(m_mangled); 202 203 case eManglingSchemeMSVC: { 204 // We have no rich mangling for MSVC-mangled names yet, so first try to 205 // demangle it if necessary. 206 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 207 if (char *d = GetMSVCDemangledStr(m_mangled)) { 208 // Without the rich mangling info we have to demangle the full name. 209 // Copy it to string pool and connect the counterparts to accelerate 210 // later access in GetDemangledName(). 211 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 212 m_mangled); 213 ::free(d); 214 } else { 215 m_demangled.SetCString(""); 216 } 217 } 218 219 if (m_demangled.IsEmpty()) { 220 // Cannot demangle it, so don't try parsing. 221 return false; 222 } else { 223 // Demangled successfully, we can try and parse it with 224 // CPlusPlusLanguage::MethodName. 225 return context.FromCxxMethodName(m_demangled); 226 } 227 } 228 229 case eManglingSchemeRustV0: 230 case eManglingSchemeD: 231 // Rich demangling scheme is not supported 232 return false; 233 } 234 llvm_unreachable("Fully covered switch above!"); 235 } 236 237 // Generate the demangled name on demand using this accessor. Code in this 238 // class will need to use this accessor if it wishes to decode the demangled 239 // name. The result is cached and will be kept until a new string value is 240 // supplied to this object, or until the end of the object's lifetime. 241 ConstString Mangled::GetDemangledName() const { 242 // Check to make sure we have a valid mangled name and that we haven't 243 // already decoded our mangled name. 244 if (m_mangled && m_demangled.IsNull()) { 245 // Don't bother running anything that isn't mangled 246 const char *mangled_name = m_mangled.GetCString(); 247 ManglingScheme mangling_scheme = 248 GetManglingScheme(m_mangled.GetStringRef()); 249 if (mangling_scheme != eManglingSchemeNone && 250 !m_mangled.GetMangledCounterpart(m_demangled)) { 251 // We didn't already mangle this name, demangle it and if all goes well 252 // add it to our map. 253 char *demangled_name = nullptr; 254 switch (mangling_scheme) { 255 case eManglingSchemeMSVC: 256 demangled_name = GetMSVCDemangledStr(mangled_name); 257 break; 258 case eManglingSchemeItanium: { 259 demangled_name = GetItaniumDemangledStr(mangled_name); 260 break; 261 } 262 case eManglingSchemeRustV0: 263 demangled_name = GetRustV0DemangledStr(m_mangled); 264 break; 265 case eManglingSchemeD: 266 demangled_name = GetDLangDemangledStr(m_mangled); 267 break; 268 case eManglingSchemeNone: 269 llvm_unreachable("eManglingSchemeNone was handled already"); 270 } 271 if (demangled_name) { 272 m_demangled.SetStringWithMangledCounterpart( 273 llvm::StringRef(demangled_name), m_mangled); 274 free(demangled_name); 275 } 276 } 277 if (m_demangled.IsNull()) { 278 // Set the demangled string to the empty string to indicate we tried to 279 // parse it once and failed. 280 m_demangled.SetCString(""); 281 } 282 } 283 284 return m_demangled; 285 } 286 287 ConstString Mangled::GetDisplayDemangledName() const { 288 return GetDemangledName(); 289 } 290 291 bool Mangled::NameMatches(const RegularExpression ®ex) const { 292 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 293 return true; 294 295 ConstString demangled = GetDemangledName(); 296 return demangled && regex.Execute(demangled.GetStringRef()); 297 } 298 299 // Get the demangled name if there is one, else return the mangled name. 300 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 301 if (preference == ePreferMangled && m_mangled) 302 return m_mangled; 303 304 // Call the accessor to make sure we get a demangled name in case it hasn't 305 // been demangled yet... 306 ConstString demangled = GetDemangledName(); 307 308 if (preference == ePreferDemangledWithoutArguments) { 309 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 310 return lang->GetDemangledFunctionNameWithoutArguments(*this); 311 } 312 } 313 if (preference == ePreferDemangled) { 314 if (demangled) 315 return demangled; 316 return m_mangled; 317 } 318 return demangled; 319 } 320 321 // Dump a Mangled object to stream "s". We don't force our demangled name to be 322 // computed currently (we don't use the accessor). 323 void Mangled::Dump(Stream *s) const { 324 if (m_mangled) { 325 *s << ", mangled = " << m_mangled; 326 } 327 if (m_demangled) { 328 const char *demangled = m_demangled.AsCString(); 329 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 330 } 331 } 332 333 // Dumps a debug version of this string with extra object and state information 334 // to stream "s". 335 void Mangled::DumpDebug(Stream *s) const { 336 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 337 static_cast<const void *>(this)); 338 m_mangled.DumpDebug(s); 339 s->Printf(", demangled = "); 340 m_demangled.DumpDebug(s); 341 } 342 343 // Return the size in byte that this object takes in memory. The size includes 344 // the size of the objects it owns, and not the strings that it references 345 // because they are shared strings. 346 size_t Mangled::MemorySize() const { 347 return m_mangled.MemorySize() + m_demangled.MemorySize(); 348 } 349 350 // We "guess" the language because we can't determine a symbol's language from 351 // it's name. For example, a Pascal symbol can be mangled using the C++ 352 // Itanium scheme, and defined in a compilation unit within the same module as 353 // other C++ units. In addition, different targets could have different ways 354 // of mangling names from a given language, likewise the compilation units 355 // within those targets. 356 lldb::LanguageType Mangled::GuessLanguage() const { 357 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 358 // Ask each language plugin to check if the mangled name belongs to it. 359 Language::ForEach([this, &result](Language *l) { 360 if (l->SymbolNameFitsToLanguage(*this)) { 361 result = l->GetLanguageType(); 362 return false; 363 } 364 return true; 365 }); 366 return result; 367 } 368 369 // Dump OBJ to the supplied stream S. 370 Stream &operator<<(Stream &s, const Mangled &obj) { 371 if (obj.GetMangledName()) 372 s << "mangled = '" << obj.GetMangledName() << "'"; 373 374 ConstString demangled = obj.GetDemangledName(); 375 if (demangled) 376 s << ", demangled = '" << demangled << '\''; 377 else 378 s << ", demangled = <error>"; 379 return s; 380 } 381 382 // When encoding Mangled objects we can get away with encoding as little 383 // information as is required. The enumeration below helps us to efficiently 384 // encode Mangled objects. 385 enum MangledEncoding { 386 /// If the Mangled object has neither a mangled name or demangled name we can 387 /// encode the object with one zero byte using the Empty enumeration. 388 Empty = 0u, 389 /// If the Mangled object has only a demangled name and no mangled named, we 390 /// can encode only the demangled name. 391 DemangledOnly = 1u, 392 /// If the mangle name can calculate the demangled name (it is the 393 /// mangled/demangled counterpart), then we only need to encode the mangled 394 /// name as the demangled name can be recomputed. 395 MangledOnly = 2u, 396 /// If we have a Mangled object with two different names that are not related 397 /// then we need to save both strings. This can happen if we have a name that 398 /// isn't a true mangled name, but we want to be able to lookup a symbol by 399 /// name and type in the symbol table. We do this for Objective C symbols like 400 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 401 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 402 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 403 /// would fail, but in these cases we want these unrelated names to be 404 /// preserved. 405 MangledAndDemangled = 3u 406 }; 407 408 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 409 const StringTableReader &strtab) { 410 m_mangled.Clear(); 411 m_demangled.Clear(); 412 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 413 switch (encoding) { 414 case Empty: 415 return true; 416 417 case DemangledOnly: 418 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 419 return true; 420 421 case MangledOnly: 422 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 423 return true; 424 425 case MangledAndDemangled: 426 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 427 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 428 return true; 429 } 430 return false; 431 } 432 /// The encoding format for the Mangled object is as follows: 433 /// 434 /// uint8_t encoding; 435 /// char str1[]; (only if DemangledOnly, MangledOnly) 436 /// char str2[]; (only if MangledAndDemangled) 437 /// 438 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 439 /// are only saved if we need them based on the encoding. 440 /// 441 /// Some mangled names have a mangled name that can be demangled by the built 442 /// in demanglers. These kinds of mangled objects know when the mangled and 443 /// demangled names are the counterparts for each other. This is done because 444 /// demangling is very expensive and avoiding demangling the same name twice 445 /// saves us a lot of compute time. For these kinds of names we only need to 446 /// save the mangled name and have the encoding set to "MangledOnly". 447 /// 448 /// If a mangled obejct has only a demangled name, then we save only that string 449 /// and have the encoding set to "DemangledOnly". 450 /// 451 /// Some mangled objects have both mangled and demangled names, but the 452 /// demangled name can not be computed from the mangled name. This is often used 453 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 454 /// names must be saved and the encoding is set to "MangledAndDemangled". 455 /// 456 /// For a Mangled object with no names, we only need to set the encoding to 457 /// "Empty" and not store any string values. 458 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 459 MangledEncoding encoding = Empty; 460 if (m_mangled) { 461 encoding = MangledOnly; 462 if (m_demangled) { 463 // We have both mangled and demangled names. If the demangled name is the 464 // counterpart of the mangled name, then we only need to save the mangled 465 // named. If they are different, we need to save both. 466 ConstString s; 467 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 468 encoding = MangledAndDemangled; 469 } 470 } else if (m_demangled) { 471 encoding = DemangledOnly; 472 } 473 file.AppendU8(encoding); 474 switch (encoding) { 475 case Empty: 476 break; 477 case DemangledOnly: 478 file.AppendU32(strtab.Add(m_demangled)); 479 break; 480 case MangledOnly: 481 file.AppendU32(strtab.Add(m_mangled)); 482 break; 483 case MangledAndDemangled: 484 file.AppendU32(strtab.Add(m_mangled)); 485 file.AppendU32(strtab.Add(m_demangled)); 486 break; 487 } 488 } 489