1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/Hashing.h" 14 #include "llvm/ADT/StringRef.h" 15 16 #include "llvm/DebugInfo/CodeView/CVRecord.h" 17 #include "llvm/DebugInfo/CodeView/TypeCollection.h" 18 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 19 20 #include "llvm/Support/FormatProviders.h" 21 22 #include <type_traits> 23 24 namespace llvm { 25 class raw_ostream; 26 namespace codeview { 27 28 /// A locally hashed type represents a straightforward hash code of a serialized 29 /// record. The record is simply serialized, and then the bytes are hashed by 30 /// a standard algorithm. This is sufficient for the case of de-duplicating 31 /// records within a single sequence of types, because if two records both have 32 /// a back-reference to the same type in the same stream, they will both have 33 /// the same numeric value for the TypeIndex of the back reference. 34 struct LocallyHashedType { 35 hash_code Hash; 36 ArrayRef<uint8_t> RecordData; 37 38 /// Given a type, compute its local hash. 39 static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); 40 41 /// Given a sequence of types, compute all of the local hashes. 42 template <typename Range> 43 static std::vector<LocallyHashedType> hashTypes(Range &&Records) { 44 std::vector<LocallyHashedType> Hashes; 45 Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); 46 for (const auto &R : Records) 47 Hashes.push_back(hashType(R)); 48 49 return Hashes; 50 } 51 52 static std::vector<LocallyHashedType> 53 hashTypeCollection(TypeCollection &Types) { 54 std::vector<LocallyHashedType> Hashes; 55 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 56 Hashes.push_back(hashType(Type.RecordData)); 57 }); 58 return Hashes; 59 } 60 }; 61 62 enum class GlobalTypeHashAlg : uint16_t { 63 SHA1 = 0, // standard 20-byte SHA1 hash 64 SHA1_8, // last 8-bytes of standard SHA1 hash 65 BLAKE3, // truncated 8-bytes BLAKE3 66 }; 67 68 /// A globally hashed type represents a hash value that is sufficient to 69 /// uniquely identify a record across multiple type streams or type sequences. 70 /// This works by, for any given record A which references B, replacing the 71 /// TypeIndex that refers to B with a previously-computed global hash for B. As 72 /// this is a recursive algorithm (e.g. the global hash of B also depends on the 73 /// global hashes of the types that B refers to), a global hash can uniquely 74 /// identify that A occurs in another stream that has a completely 75 /// different graph structure. Although the hash itself is slower to compute, 76 /// probing is much faster with a globally hashed type, because the hash itself 77 /// is considered "as good as" the original type. Since type records can be 78 /// quite large, this makes the equality comparison of the hash much faster than 79 /// equality comparison of a full record. 80 struct GloballyHashedType { 81 GloballyHashedType() = default; 82 GloballyHashedType(StringRef H) 83 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} 84 GloballyHashedType(ArrayRef<uint8_t> H) { 85 assert(H.size() == 8); 86 ::memcpy(Hash.data(), H.data(), 8); 87 } 88 std::array<uint8_t, 8> Hash; 89 90 bool empty() const { return *(const uint64_t*)Hash.data() == 0; } 91 92 friend inline bool operator==(const GloballyHashedType &L, 93 const GloballyHashedType &R) { 94 return L.Hash == R.Hash; 95 } 96 97 friend inline bool operator!=(const GloballyHashedType &L, 98 const GloballyHashedType &R) { 99 return !(L.Hash == R.Hash); 100 } 101 102 /// Given a sequence of bytes representing a record, compute a global hash for 103 /// this record. Due to the nature of global hashes incorporating the hashes 104 /// of referenced records, this function requires a list of types and ids 105 /// that RecordData might reference, indexable by TypeIndex. 106 static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData, 107 ArrayRef<GloballyHashedType> PreviousTypes, 108 ArrayRef<GloballyHashedType> PreviousIds); 109 110 /// Given a sequence of bytes representing a record, compute a global hash for 111 /// this record. Due to the nature of global hashes incorporating the hashes 112 /// of referenced records, this function requires a list of types and ids 113 /// that RecordData might reference, indexable by TypeIndex. 114 static GloballyHashedType hashType(CVType Type, 115 ArrayRef<GloballyHashedType> PreviousTypes, 116 ArrayRef<GloballyHashedType> PreviousIds) { 117 return hashType(Type.RecordData, PreviousTypes, PreviousIds); 118 } 119 120 /// Given a sequence of combined type and ID records, compute global hashes 121 /// for each of them, returning the results in a vector of hashed types. 122 template <typename Range> 123 static std::vector<GloballyHashedType> hashTypes(Range &&Records) { 124 std::vector<GloballyHashedType> Hashes; 125 bool UnresolvedRecords = false; 126 for (const auto &R : Records) { 127 GloballyHashedType H = hashType(R, Hashes, Hashes); 128 if (H.empty()) 129 UnresolvedRecords = true; 130 Hashes.push_back(H); 131 } 132 133 // In some rare cases, there might be records with forward references in the 134 // stream. Several passes might be needed to fully hash each record in the 135 // Type stream. However this occurs on very small OBJs generated by MASM, 136 // with a dozen records at most. Therefore this codepath isn't 137 // time-critical, as it isn't taken in 99% of cases. 138 while (UnresolvedRecords) { 139 UnresolvedRecords = false; 140 auto HashIt = Hashes.begin(); 141 for (const auto &R : Records) { 142 if (HashIt->empty()) { 143 GloballyHashedType H = hashType(R, Hashes, Hashes); 144 if (H.empty()) 145 UnresolvedRecords = true; 146 else 147 *HashIt = H; 148 } 149 ++HashIt; 150 } 151 } 152 153 return Hashes; 154 } 155 156 /// Given a sequence of combined type and ID records, compute global hashes 157 /// for each of them, returning the results in a vector of hashed types. 158 template <typename Range> 159 static std::vector<GloballyHashedType> 160 hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { 161 std::vector<GloballyHashedType> IdHashes; 162 for (const auto &R : Records) 163 IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); 164 165 return IdHashes; 166 } 167 168 static std::vector<GloballyHashedType> 169 hashTypeCollection(TypeCollection &Types) { 170 std::vector<GloballyHashedType> Hashes; 171 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 172 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); 173 }); 174 return Hashes; 175 } 176 }; 177 static_assert(std::is_trivially_copyable<GloballyHashedType>::value, 178 "GloballyHashedType must be trivially copyable so that we can " 179 "reinterpret_cast arrays of hash data to arrays of " 180 "GloballyHashedType"); 181 } // namespace codeview 182 183 template <> struct DenseMapInfo<codeview::LocallyHashedType> { 184 static codeview::LocallyHashedType Empty; 185 static codeview::LocallyHashedType Tombstone; 186 187 static codeview::LocallyHashedType getEmptyKey() { return Empty; } 188 189 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } 190 191 static unsigned getHashValue(codeview::LocallyHashedType Val) { 192 return Val.Hash; 193 } 194 195 static bool isEqual(codeview::LocallyHashedType LHS, 196 codeview::LocallyHashedType RHS) { 197 if (LHS.Hash != RHS.Hash) 198 return false; 199 return LHS.RecordData == RHS.RecordData; 200 } 201 }; 202 203 template <> struct DenseMapInfo<codeview::GloballyHashedType> { 204 static codeview::GloballyHashedType Empty; 205 static codeview::GloballyHashedType Tombstone; 206 207 static codeview::GloballyHashedType getEmptyKey() { return Empty; } 208 209 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } 210 211 static unsigned getHashValue(codeview::GloballyHashedType Val) { 212 return *reinterpret_cast<const unsigned *>(Val.Hash.data()); 213 } 214 215 static bool isEqual(codeview::GloballyHashedType LHS, 216 codeview::GloballyHashedType RHS) { 217 return LHS == RHS; 218 } 219 }; 220 221 template <> struct format_provider<codeview::LocallyHashedType> { 222 public: 223 static void format(const codeview::LocallyHashedType &V, 224 llvm::raw_ostream &Stream, StringRef Style) { 225 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); 226 } 227 }; 228 229 template <> struct format_provider<codeview::GloballyHashedType> { 230 public: 231 static void format(const codeview::GloballyHashedType &V, 232 llvm::raw_ostream &Stream, StringRef Style) { 233 for (uint8_t B : V.Hash) { 234 write_hex(Stream, B, HexPrintStyle::Upper, 2); 235 } 236 } 237 }; 238 239 } // namespace llvm 240 241 #endif 242