1 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file contains support for writing accelerator tables. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_CODEGEN_ACCELTABLE_H 14 #define LLVM_CODEGEN_ACCELTABLE_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/BinaryFormat/Dwarf.h" 21 #include "llvm/CodeGen/DIE.h" 22 #include "llvm/CodeGen/DwarfStringPoolEntry.h" 23 #include "llvm/Support/Allocator.h" 24 #include "llvm/Support/DJB.h" 25 #include "llvm/Support/Debug.h" 26 #include <cstdint> 27 #include <variant> 28 #include <vector> 29 30 /// \file 31 /// The DWARF and Apple accelerator tables are an indirect hash table optimized 32 /// for null lookup rather than access to known data. The Apple accelerator 33 /// tables are a precursor of the newer DWARF v5 accelerator tables. Both 34 /// formats share common design ideas. 35 /// 36 /// The Apple accelerator table are output into an on-disk format that looks 37 /// like this: 38 /// 39 /// .------------------. 40 /// | HEADER | 41 /// |------------------| 42 /// | BUCKETS | 43 /// |------------------| 44 /// | HASHES | 45 /// |------------------| 46 /// | OFFSETS | 47 /// |------------------| 48 /// | DATA | 49 /// `------------------' 50 /// 51 /// The header contains a magic number, version, type of hash function, 52 /// the number of buckets, total number of hashes, and room for a special struct 53 /// of data and the length of that struct. 54 /// 55 /// The buckets contain an index (e.g. 6) into the hashes array. The hashes 56 /// section contains all of the 32-bit hash values in contiguous memory, and the 57 /// offsets contain the offset into the data area for the particular hash. 58 /// 59 /// For a lookup example, we could hash a function name and take it modulo the 60 /// number of buckets giving us our bucket. From there we take the bucket value 61 /// as an index into the hashes table and look at each successive hash as long 62 /// as the hash value is still the same modulo result (bucket value) as earlier. 63 /// If we have a match we look at that same entry in the offsets table and grab 64 /// the offset in the data for our final match. 65 /// 66 /// The DWARF v5 accelerator table consists of zero or more name indices that 67 /// are output into an on-disk format that looks like this: 68 /// 69 /// .------------------. 70 /// | HEADER | 71 /// |------------------| 72 /// | CU LIST | 73 /// |------------------| 74 /// | LOCAL TU LIST | 75 /// |------------------| 76 /// | FOREIGN TU LIST | 77 /// |------------------| 78 /// | HASH TABLE | 79 /// |------------------| 80 /// | NAME TABLE | 81 /// |------------------| 82 /// | ABBREV TABLE | 83 /// |------------------| 84 /// | ENTRY POOL | 85 /// `------------------' 86 /// 87 /// For the full documentation please refer to the DWARF 5 standard. 88 /// 89 /// 90 /// This file defines the class template AccelTable, which is represents an 91 /// abstract view of an Accelerator table, without any notion of an on-disk 92 /// layout. This class is parameterized by an entry type, which should derive 93 /// from AccelTableData. This is the type of individual entries in the table, 94 /// and it should store the data necessary to emit them. AppleAccelTableData is 95 /// the base class for Apple Accelerator Table entries, which have a uniform 96 /// structure based on a sequence of Atoms. There are different sub-classes 97 /// derived from AppleAccelTable, which differ in the set of Atoms and how they 98 /// obtain their values. 99 /// 100 /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable 101 /// function. 102 103 namespace llvm { 104 105 class AsmPrinter; 106 class DwarfDebug; 107 class DwarfTypeUnit; 108 class MCSymbol; 109 class raw_ostream; 110 111 /// Interface which the different types of accelerator table data have to 112 /// conform. It serves as a base class for different values of the template 113 /// argument of the AccelTable class template. 114 class AccelTableData { 115 public: 116 virtual ~AccelTableData() = default; 117 118 bool operator<(const AccelTableData &Other) const { 119 return order() < Other.order(); 120 } 121 122 // Subclasses should implement: 123 // static uint32_t hash(StringRef Name); 124 125 #ifndef NDEBUG 126 virtual void print(raw_ostream &OS) const = 0; 127 #endif 128 protected: 129 virtual uint64_t order() const = 0; 130 }; 131 132 /// A base class holding non-template-dependant functionality of the AccelTable 133 /// class. Clients should not use this class directly but rather instantiate 134 /// AccelTable with a type derived from AccelTableData. 135 class AccelTableBase { 136 public: 137 using HashFn = uint32_t(StringRef); 138 139 /// Represents a group of entries with identical name (and hence, hash value). 140 struct HashData { 141 DwarfStringPoolEntryRef Name; 142 uint32_t HashValue; 143 std::vector<AccelTableData *> Values; 144 MCSymbol *Sym; 145 146 #ifndef NDEBUG 147 void print(raw_ostream &OS) const; 148 void dump() const { print(dbgs()); } 149 #endif 150 }; 151 using HashList = std::vector<HashData *>; 152 using BucketList = std::vector<HashList>; 153 154 protected: 155 /// Allocator for HashData and Values. 156 BumpPtrAllocator Allocator; 157 158 using StringEntries = MapVector<StringRef, HashData>; 159 StringEntries Entries; 160 161 HashFn *Hash; 162 uint32_t BucketCount = 0; 163 uint32_t UniqueHashCount = 0; 164 165 HashList Hashes; 166 BucketList Buckets; 167 168 void computeBucketCount(); 169 170 AccelTableBase(HashFn *Hash) : Hash(Hash) {} 171 172 public: 173 void finalize(AsmPrinter *Asm, StringRef Prefix); 174 ArrayRef<HashList> getBuckets() const { return Buckets; } 175 uint32_t getBucketCount() const { return BucketCount; } 176 uint32_t getUniqueHashCount() const { return UniqueHashCount; } 177 uint32_t getUniqueNameCount() const { return Entries.size(); } 178 179 #ifndef NDEBUG 180 void print(raw_ostream &OS) const; 181 void dump() const { print(dbgs()); } 182 #endif 183 184 AccelTableBase(const AccelTableBase &) = delete; 185 void operator=(const AccelTableBase &) = delete; 186 }; 187 188 /// This class holds an abstract representation of an Accelerator Table, 189 /// consisting of a sequence of buckets, each bucket containint a sequence of 190 /// HashData entries. The class is parameterized by the type of entries it 191 /// holds. The type template parameter also defines the hash function to use for 192 /// hashing names. 193 template <typename DataT> class AccelTable : public AccelTableBase { 194 public: 195 AccelTable() : AccelTableBase(DataT::hash) {} 196 197 template <typename... Types> 198 void addName(DwarfStringPoolEntryRef Name, Types &&... Args); 199 void clear() { Entries.clear(); } 200 void addEntries(AccelTable<DataT> &Table); 201 const StringEntries getEntries() const { return Entries; } 202 }; 203 204 template <typename AccelTableDataT> 205 template <typename... Types> 206 void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name, 207 Types &&... Args) { 208 assert(Buckets.empty() && "Already finalized!"); 209 // If the string is in the list already then add this die to the list 210 // otherwise add a new one. 211 auto &It = Entries[Name.getString()]; 212 if (It.Values.empty()) { 213 It.Name = Name; 214 It.HashValue = Hash(Name.getString()); 215 } 216 It.Values.push_back(new (Allocator) 217 AccelTableDataT(std::forward<Types>(Args)...)); 218 } 219 220 /// A base class for different implementations of Data classes for Apple 221 /// Accelerator Tables. The columns in the table are defined by the static Atoms 222 /// variable defined on the subclasses. 223 class AppleAccelTableData : public AccelTableData { 224 public: 225 /// An Atom defines the form of the data in an Apple accelerator table. 226 /// Conceptually it is a column in the accelerator consisting of a type and a 227 /// specification of the form of its data. 228 struct Atom { 229 /// Atom Type. 230 const uint16_t Type; 231 /// DWARF Form. 232 const uint16_t Form; 233 234 constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {} 235 236 #ifndef NDEBUG 237 void print(raw_ostream &OS) const; 238 void dump() const { print(dbgs()); } 239 #endif 240 }; 241 // Subclasses should define: 242 // static constexpr Atom Atoms[]; 243 244 virtual void emit(AsmPrinter *Asm) const = 0; 245 246 static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); } 247 }; 248 249 /// The Data class implementation for DWARF v5 accelerator table. Unlike the 250 /// Apple Data classes, this class is just a DIE wrapper, and does not know to 251 /// serialize itself. The complete serialization logic is in the 252 /// emitDWARF5AccelTable function. 253 class DWARF5AccelTableData : public AccelTableData { 254 public: 255 struct AttributeEncoding { 256 dwarf::Index Index; 257 dwarf::Form Form; 258 }; 259 260 static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); } 261 262 DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, 263 const bool IsTU = false); 264 DWARF5AccelTableData(const uint64_t DieOffset, const unsigned DieTag, 265 const unsigned UnitID, const bool IsTU = false) 266 : OffsetVal(DieOffset), DieTag(DieTag), UnitID(UnitID), IsTU(IsTU) {} 267 268 #ifndef NDEBUG 269 void print(raw_ostream &OS) const override; 270 #endif 271 272 uint64_t getDieOffset() const { 273 assert(isNormalized() && "Accessing DIE Offset before normalizing."); 274 return std::get<uint64_t>(OffsetVal); 275 } 276 unsigned getDieTag() const { return DieTag; } 277 unsigned getUnitID() const { return UnitID; } 278 bool isTU() const { return IsTU; } 279 void normalizeDIEToOffset() { 280 assert(!isNormalized() && "Accessing offset after normalizing."); 281 OffsetVal = std::get<const DIE *>(OffsetVal)->getOffset(); 282 } 283 bool isNormalized() const { 284 return std::holds_alternative<uint64_t>(OffsetVal); 285 } 286 287 protected: 288 std::variant<const DIE *, uint64_t> OffsetVal; 289 uint32_t DieTag : 16; 290 uint32_t UnitID : 15; 291 uint32_t IsTU : 1; 292 293 uint64_t order() const override { return getDieOffset(); } 294 }; 295 296 struct TypeUnitMetaInfo { 297 // Symbol for start of the TU section or signature if this is SplitDwarf. 298 std::variant<MCSymbol *, uint64_t> LabelOrSignature; 299 // Unique ID of Type Unit. 300 unsigned UniqueID; 301 }; 302 using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>; 303 class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> { 304 // Symbols to start of all the TU sections that were generated. 305 TUVectorTy TUSymbolsOrHashes; 306 307 public: 308 struct UnitIndexAndEncoding { 309 unsigned Index; 310 DWARF5AccelTableData::AttributeEncoding Encoding; 311 }; 312 /// Returns type units that were constructed. 313 const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; } 314 /// Add a type unit start symbol. 315 void addTypeUnitSymbol(DwarfTypeUnit &U); 316 /// Add a type unit Signature. 317 void addTypeUnitSignature(DwarfTypeUnit &U); 318 /// Convert DIE entries to explicit offset. 319 /// Needs to be called after DIE offsets are computed. 320 void convertDieToOffset() { 321 for (auto &Entry : Entries) { 322 for (AccelTableData *Value : Entry.second.Values) { 323 DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value); 324 // For TU we normalize as each Unit is emitted. 325 // So when this is invoked after CU construction we will be in mixed 326 // state. 327 if (!Data->isNormalized()) 328 Data->normalizeDIEToOffset(); 329 } 330 } 331 } 332 333 void addTypeEntries(DWARF5AccelTable &Table) { 334 for (auto &Entry : Table.getEntries()) { 335 for (AccelTableData *Value : Entry.second.Values) { 336 DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value); 337 addName(Entry.second.Name, Data->getDieOffset(), Data->getDieTag(), 338 Data->getUnitID(), true); 339 } 340 } 341 } 342 }; 343 344 void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, 345 StringRef Prefix, const MCSymbol *SecBegin, 346 ArrayRef<AppleAccelTableData::Atom> Atoms); 347 348 /// Emit an Apple Accelerator Table consisting of entries in the specified 349 /// AccelTable. The DataT template parameter should be derived from 350 /// AppleAccelTableData. 351 template <typename DataT> 352 void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents, 353 StringRef Prefix, const MCSymbol *SecBegin) { 354 static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value); 355 emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms); 356 } 357 358 void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents, 359 const DwarfDebug &DD, 360 ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs); 361 362 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified 363 /// AccelTable. The \p CUs contains either symbols keeping offsets to the 364 /// start of compilation unit, either offsets to the start of compilation 365 /// unit themselves. 366 void emitDWARF5AccelTable( 367 AsmPrinter *Asm, DWARF5AccelTable &Contents, 368 ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs, 369 llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>( 370 const DWARF5AccelTableData &)> 371 getIndexForEntry); 372 373 /// Accelerator table data implementation for simple Apple accelerator tables 374 /// with just a DIE reference. 375 class AppleAccelTableOffsetData : public AppleAccelTableData { 376 public: 377 AppleAccelTableOffsetData(const DIE &D) : Die(D) {} 378 379 void emit(AsmPrinter *Asm) const override; 380 381 static constexpr Atom Atoms[] = { 382 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; 383 384 #ifndef NDEBUG 385 void print(raw_ostream &OS) const override; 386 #endif 387 protected: 388 uint64_t order() const override { return Die.getOffset(); } 389 390 const DIE &Die; 391 }; 392 393 /// Accelerator table data implementation for Apple type accelerator tables. 394 class AppleAccelTableTypeData : public AppleAccelTableOffsetData { 395 public: 396 AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {} 397 398 void emit(AsmPrinter *Asm) const override; 399 400 static constexpr Atom Atoms[] = { 401 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), 402 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), 403 Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; 404 405 #ifndef NDEBUG 406 void print(raw_ostream &OS) const override; 407 #endif 408 }; 409 410 /// Accelerator table data implementation for simple Apple accelerator tables 411 /// with a DIE offset but no actual DIE pointer. 412 class AppleAccelTableStaticOffsetData : public AppleAccelTableData { 413 public: 414 AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {} 415 416 void emit(AsmPrinter *Asm) const override; 417 418 static constexpr Atom Atoms[] = { 419 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; 420 421 #ifndef NDEBUG 422 void print(raw_ostream &OS) const override; 423 #endif 424 protected: 425 uint64_t order() const override { return Offset; } 426 427 uint32_t Offset; 428 }; 429 430 /// Accelerator table data implementation for type accelerator tables with 431 /// a DIE offset but no actual DIE pointer. 432 class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData { 433 public: 434 AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag, 435 bool ObjCClassIsImplementation, 436 uint32_t QualifiedNameHash) 437 : AppleAccelTableStaticOffsetData(Offset), 438 QualifiedNameHash(QualifiedNameHash), Tag(Tag), 439 ObjCClassIsImplementation(ObjCClassIsImplementation) {} 440 441 void emit(AsmPrinter *Asm) const override; 442 443 static constexpr Atom Atoms[] = { 444 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), 445 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), 446 Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)}; 447 448 #ifndef NDEBUG 449 void print(raw_ostream &OS) const override; 450 #endif 451 protected: 452 uint64_t order() const override { return Offset; } 453 454 uint32_t QualifiedNameHash; 455 uint16_t Tag; 456 bool ObjCClassIsImplementation; 457 }; 458 459 } // end namespace llvm 460 461 #endif // LLVM_CODEGEN_ACCELTABLE_H 462