1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/ADT/Hashing.h"
14 #include "llvm/ADT/StringRef.h"
15 
16 #include "llvm/DebugInfo/CodeView/CVRecord.h"
17 #include "llvm/DebugInfo/CodeView/TypeCollection.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
19 
20 #include "llvm/Support/FormatProviders.h"
21 
22 #include <type_traits>
23 
24 namespace llvm {
25 class raw_ostream;
26 namespace codeview {
27 
28 /// A locally hashed type represents a straightforward hash code of a serialized
29 /// record.  The record is simply serialized, and then the bytes are hashed by
30 /// a standard algorithm.  This is sufficient for the case of de-duplicating
31 /// records within a single sequence of types, because if two records both have
32 /// a back-reference to the same type in the same stream, they will both have
33 /// the same numeric value for the TypeIndex of the back reference.
34 struct LocallyHashedType {
35   hash_code Hash;
36   ArrayRef<uint8_t> RecordData;
37 
38   /// Given a type, compute its local hash.
39   static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
40 
41   /// Given a sequence of types, compute all of the local hashes.
42   template <typename Range>
43   static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
44     std::vector<LocallyHashedType> Hashes;
45     Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
46     for (const auto &R : Records)
47       Hashes.push_back(hashType(R));
48 
49     return Hashes;
50   }
51 
52   static std::vector<LocallyHashedType>
53   hashTypeCollection(TypeCollection &Types) {
54     std::vector<LocallyHashedType> Hashes;
55     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
56       Hashes.push_back(hashType(Type.RecordData));
57     });
58     return Hashes;
59   }
60 };
61 
62 enum class GlobalTypeHashAlg : uint16_t {
63   SHA1 = 0, // standard 20-byte SHA1 hash
64   SHA1_8,   // last 8-bytes of standard SHA1 hash
65   BLAKE3,   // truncated 8-bytes BLAKE3
66 };
67 
68 /// A globally hashed type represents a hash value that is sufficient to
69 /// uniquely identify a record across multiple type streams or type sequences.
70 /// This works by, for any given record A which references B, replacing the
71 /// TypeIndex that refers to B with a previously-computed global hash for B.  As
72 /// this is a recursive algorithm (e.g. the global hash of B also depends on the
73 /// global hashes of the types that B refers to), a global hash can uniquely
74 /// identify that A occurs in another stream that has a completely
75 /// different graph structure.  Although the hash itself is slower to compute,
76 /// probing is much faster with a globally hashed type, because the hash itself
77 /// is considered "as good as" the original type.  Since type records can be
78 /// quite large, this makes the equality comparison of the hash much faster than
79 /// equality comparison of a full record.
80 struct GloballyHashedType {
81   GloballyHashedType() = default;
82   GloballyHashedType(StringRef H)
83       : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
84   GloballyHashedType(ArrayRef<uint8_t> H) {
85     assert(H.size() == 8);
86     ::memcpy(Hash.data(), H.data(), 8);
87   }
88   std::array<uint8_t, 8> Hash;
89 
90   bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
91 
92   friend inline bool operator==(const GloballyHashedType &L,
93                                 const GloballyHashedType &R) {
94     return L.Hash == R.Hash;
95   }
96 
97   friend inline bool operator!=(const GloballyHashedType &L,
98                                 const GloballyHashedType &R) {
99     return !(L.Hash == R.Hash);
100   }
101 
102   /// Given a sequence of bytes representing a record, compute a global hash for
103   /// this record.  Due to the nature of global hashes incorporating the hashes
104   /// of referenced records, this function requires a list of types and ids
105   /// that RecordData might reference, indexable by TypeIndex.
106   static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
107                                      ArrayRef<GloballyHashedType> PreviousTypes,
108                                      ArrayRef<GloballyHashedType> PreviousIds);
109 
110   /// Given a sequence of bytes representing a record, compute a global hash for
111   /// this record.  Due to the nature of global hashes incorporating the hashes
112   /// of referenced records, this function requires a list of types and ids
113   /// that RecordData might reference, indexable by TypeIndex.
114   static GloballyHashedType hashType(CVType Type,
115                                      ArrayRef<GloballyHashedType> PreviousTypes,
116                                      ArrayRef<GloballyHashedType> PreviousIds) {
117     return hashType(Type.RecordData, PreviousTypes, PreviousIds);
118   }
119 
120   /// Given a sequence of combined type and ID records, compute global hashes
121   /// for each of them, returning the results in a vector of hashed types.
122   template <typename Range>
123   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
124     std::vector<GloballyHashedType> Hashes;
125     bool UnresolvedRecords = false;
126     for (const auto &R : Records) {
127       GloballyHashedType H = hashType(R, Hashes, Hashes);
128       if (H.empty())
129         UnresolvedRecords = true;
130       Hashes.push_back(H);
131     }
132 
133     // In some rare cases, there might be records with forward references in the
134     // stream. Several passes might be needed to fully hash each record in the
135     // Type stream. However this occurs on very small OBJs generated by MASM,
136     // with a dozen records at most. Therefore this codepath isn't
137     // time-critical, as it isn't taken in 99% of cases.
138     while (UnresolvedRecords) {
139       UnresolvedRecords = false;
140       auto HashIt = Hashes.begin();
141       for (const auto &R : Records) {
142         if (HashIt->empty()) {
143           GloballyHashedType H = hashType(R, Hashes, Hashes);
144           if (H.empty())
145             UnresolvedRecords = true;
146           else
147             *HashIt = H;
148         }
149         ++HashIt;
150       }
151     }
152 
153     return Hashes;
154   }
155 
156   /// Given a sequence of combined type and ID records, compute global hashes
157   /// for each of them, returning the results in a vector of hashed types.
158   template <typename Range>
159   static std::vector<GloballyHashedType>
160   hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
161     std::vector<GloballyHashedType> IdHashes;
162     for (const auto &R : Records)
163       IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
164 
165     return IdHashes;
166   }
167 
168   static std::vector<GloballyHashedType>
169   hashTypeCollection(TypeCollection &Types) {
170     std::vector<GloballyHashedType> Hashes;
171     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
172       Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
173     });
174     return Hashes;
175   }
176 };
177 static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
178               "GloballyHashedType must be trivially copyable so that we can "
179               "reinterpret_cast arrays of hash data to arrays of "
180               "GloballyHashedType");
181 } // namespace codeview
182 
183 template <> struct DenseMapInfo<codeview::LocallyHashedType> {
184   static codeview::LocallyHashedType Empty;
185   static codeview::LocallyHashedType Tombstone;
186 
187   static codeview::LocallyHashedType getEmptyKey() { return Empty; }
188 
189   static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
190 
191   static unsigned getHashValue(codeview::LocallyHashedType Val) {
192     return Val.Hash;
193   }
194 
195   static bool isEqual(codeview::LocallyHashedType LHS,
196                       codeview::LocallyHashedType RHS) {
197     if (LHS.Hash != RHS.Hash)
198       return false;
199     return LHS.RecordData == RHS.RecordData;
200   }
201 };
202 
203 template <> struct DenseMapInfo<codeview::GloballyHashedType> {
204   static codeview::GloballyHashedType Empty;
205   static codeview::GloballyHashedType Tombstone;
206 
207   static codeview::GloballyHashedType getEmptyKey() { return Empty; }
208 
209   static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
210 
211   static unsigned getHashValue(codeview::GloballyHashedType Val) {
212     return *reinterpret_cast<const unsigned *>(Val.Hash.data());
213   }
214 
215   static bool isEqual(codeview::GloballyHashedType LHS,
216                       codeview::GloballyHashedType RHS) {
217     return LHS == RHS;
218   }
219 };
220 
221 template <> struct format_provider<codeview::LocallyHashedType> {
222 public:
223   static void format(const codeview::LocallyHashedType &V,
224                      llvm::raw_ostream &Stream, StringRef Style) {
225     write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
226   }
227 };
228 
229 template <> struct format_provider<codeview::GloballyHashedType> {
230 public:
231   static void format(const codeview::GloballyHashedType &V,
232                      llvm::raw_ostream &Stream, StringRef Style) {
233     for (uint8_t B : V.Hash) {
234       write_hex(Stream, B, HexPrintStyle::Upper, 2);
235     }
236   }
237 };
238 
239 } // namespace llvm
240 
241 #endif
242