1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/Hashing.h" 14 #include "llvm/ADT/StringRef.h" 15 16 #include "llvm/DebugInfo/CodeView/CVRecord.h" 17 #include "llvm/DebugInfo/CodeView/TypeCollection.h" 18 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 19 20 #include "llvm/Support/FormatProviders.h" 21 22 #include <type_traits> 23 24 namespace llvm { 25 class raw_ostream; 26 namespace codeview { 27 28 /// A locally hashed type represents a straightforward hash code of a serialized 29 /// record. The record is simply serialized, and then the bytes are hashed by 30 /// a standard algorithm. This is sufficient for the case of de-duplicating 31 /// records within a single sequence of types, because if two records both have 32 /// a back-reference to the same type in the same stream, they will both have 33 /// the same numeric value for the TypeIndex of the back reference. 34 struct LocallyHashedType { 35 hash_code Hash; 36 ArrayRef<uint8_t> RecordData; 37 38 /// Given a type, compute its local hash. 39 static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); 40 41 /// Given a sequence of types, compute all of the local hashes. 42 template <typename Range> 43 static std::vector<LocallyHashedType> hashTypes(Range &&Records) { 44 std::vector<LocallyHashedType> Hashes; 45 Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); 46 for (const auto &R : Records) 47 Hashes.push_back(hashType(R)); 48 49 return Hashes; 50 } 51 52 static std::vector<LocallyHashedType> 53 hashTypeCollection(TypeCollection &Types) { 54 std::vector<LocallyHashedType> Hashes; 55 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 56 Hashes.push_back(hashType(Type.RecordData)); 57 }); 58 return Hashes; 59 } 60 }; 61 62 enum class GlobalTypeHashAlg : uint16_t { 63 SHA1 = 0, // standard 20-byte SHA1 hash 64 SHA1_8 // last 8-bytes of standard SHA1 hash 65 }; 66 67 /// A globally hashed type represents a hash value that is sufficient to 68 /// uniquely identify a record across multiple type streams or type sequences. 69 /// This works by, for any given record A which references B, replacing the 70 /// TypeIndex that refers to B with a previously-computed global hash for B. As 71 /// this is a recursive algorithm (e.g. the global hash of B also depends on the 72 /// global hashes of the types that B refers to), a global hash can uniquely 73 /// identify identify that A occurs in another stream that has a completely 74 /// different graph structure. Although the hash itself is slower to compute, 75 /// probing is much faster with a globally hashed type, because the hash itself 76 /// is considered "as good as" the original type. Since type records can be 77 /// quite large, this makes the equality comparison of the hash much faster than 78 /// equality comparison of a full record. 79 struct GloballyHashedType { 80 GloballyHashedType() = default; 81 GloballyHashedType(StringRef H) 82 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} 83 GloballyHashedType(ArrayRef<uint8_t> H) { 84 assert(H.size() == 8); 85 ::memcpy(Hash.data(), H.data(), 8); 86 } 87 std::array<uint8_t, 8> Hash; 88 89 bool empty() const { return *(const uint64_t*)Hash.data() == 0; } 90 91 friend inline bool operator==(const GloballyHashedType &L, 92 const GloballyHashedType &R) { 93 return L.Hash == R.Hash; 94 } 95 96 friend inline bool operator!=(const GloballyHashedType &L, 97 const GloballyHashedType &R) { 98 return !(L.Hash == R.Hash); 99 } 100 101 /// Given a sequence of bytes representing a record, compute a global hash for 102 /// this record. Due to the nature of global hashes incorporating the hashes 103 /// of referenced records, this function requires a list of types and ids 104 /// that RecordData might reference, indexable by TypeIndex. 105 static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData, 106 ArrayRef<GloballyHashedType> PreviousTypes, 107 ArrayRef<GloballyHashedType> PreviousIds); 108 109 /// Given a sequence of bytes representing a record, compute a global hash for 110 /// this record. Due to the nature of global hashes incorporating the hashes 111 /// of referenced records, this function requires a list of types and ids 112 /// that RecordData might reference, indexable by TypeIndex. 113 static GloballyHashedType hashType(CVType Type, 114 ArrayRef<GloballyHashedType> PreviousTypes, 115 ArrayRef<GloballyHashedType> PreviousIds) { 116 return hashType(Type.RecordData, PreviousTypes, PreviousIds); 117 } 118 119 /// Given a sequence of combined type and ID records, compute global hashes 120 /// for each of them, returning the results in a vector of hashed types. 121 template <typename Range> 122 static std::vector<GloballyHashedType> hashTypes(Range &&Records) { 123 std::vector<GloballyHashedType> Hashes; 124 bool UnresolvedRecords = false; 125 for (const auto &R : Records) { 126 GloballyHashedType H = hashType(R, Hashes, Hashes); 127 if (H.empty()) 128 UnresolvedRecords = true; 129 Hashes.push_back(H); 130 } 131 132 // In some rare cases, there might be records with forward references in the 133 // stream. Several passes might be needed to fully hash each record in the 134 // Type stream. However this occurs on very small OBJs generated by MASM, 135 // with a dozen records at most. Therefore this codepath isn't 136 // time-critical, as it isn't taken in 99% of cases. 137 while (UnresolvedRecords) { 138 UnresolvedRecords = false; 139 auto HashIt = Hashes.begin(); 140 for (const auto &R : Records) { 141 if (HashIt->empty()) { 142 GloballyHashedType H = hashType(R, Hashes, Hashes); 143 if (H.empty()) 144 UnresolvedRecords = true; 145 else 146 *HashIt = H; 147 } 148 ++HashIt; 149 } 150 } 151 152 return Hashes; 153 } 154 155 /// Given a sequence of combined type and ID records, compute global hashes 156 /// for each of them, returning the results in a vector of hashed types. 157 template <typename Range> 158 static std::vector<GloballyHashedType> 159 hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { 160 std::vector<GloballyHashedType> IdHashes; 161 for (const auto &R : Records) 162 IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); 163 164 return IdHashes; 165 } 166 167 static std::vector<GloballyHashedType> 168 hashTypeCollection(TypeCollection &Types) { 169 std::vector<GloballyHashedType> Hashes; 170 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 171 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); 172 }); 173 return Hashes; 174 } 175 }; 176 static_assert(std::is_trivially_copyable<GloballyHashedType>::value, 177 "GloballyHashedType must be trivially copyable so that we can " 178 "reinterpret_cast arrays of hash data to arrays of " 179 "GloballyHashedType"); 180 } // namespace codeview 181 182 template <> struct DenseMapInfo<codeview::LocallyHashedType> { 183 static codeview::LocallyHashedType Empty; 184 static codeview::LocallyHashedType Tombstone; 185 186 static codeview::LocallyHashedType getEmptyKey() { return Empty; } 187 188 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } 189 190 static unsigned getHashValue(codeview::LocallyHashedType Val) { 191 return Val.Hash; 192 } 193 194 static bool isEqual(codeview::LocallyHashedType LHS, 195 codeview::LocallyHashedType RHS) { 196 if (LHS.Hash != RHS.Hash) 197 return false; 198 return LHS.RecordData == RHS.RecordData; 199 } 200 }; 201 202 template <> struct DenseMapInfo<codeview::GloballyHashedType> { 203 static codeview::GloballyHashedType Empty; 204 static codeview::GloballyHashedType Tombstone; 205 206 static codeview::GloballyHashedType getEmptyKey() { return Empty; } 207 208 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } 209 210 static unsigned getHashValue(codeview::GloballyHashedType Val) { 211 return *reinterpret_cast<const unsigned *>(Val.Hash.data()); 212 } 213 214 static bool isEqual(codeview::GloballyHashedType LHS, 215 codeview::GloballyHashedType RHS) { 216 return LHS == RHS; 217 } 218 }; 219 220 template <> struct format_provider<codeview::LocallyHashedType> { 221 public: 222 static void format(const codeview::LocallyHashedType &V, 223 llvm::raw_ostream &Stream, StringRef Style) { 224 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); 225 } 226 }; 227 228 template <> struct format_provider<codeview::GloballyHashedType> { 229 public: 230 static void format(const codeview::GloballyHashedType &V, 231 llvm::raw_ostream &Stream, StringRef Style) { 232 for (uint8_t B : V.Hash) { 233 write_hex(Stream, B, HexPrintStyle::Upper, 2); 234 } 235 } 236 }; 237 238 } // namespace llvm 239 240 #endif 241