1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements an interface allowing to conveniently build hashes of
10 // various data types, without relying on the underlying hasher type to know
11 // about hashed data types.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_SUPPORT_HASHBUILDER_H
16 #define LLVM_SUPPORT_HASHBUILDER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/type_traits.h"
24 
25 #include <iterator>
26 #include <optional>
27 #include <utility>
28 
29 namespace llvm {
30 
31 namespace hashbuilder_detail {
32 /// Trait to indicate whether a type's bits can be hashed directly (after
33 /// endianness correction).
34 template <typename U>
35 struct IsHashableData
36     : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
37 
38 } // namespace hashbuilder_detail
39 
40 /// Declares the hasher member, and functions forwarding directly to the hasher.
41 template <typename HasherT> class HashBuilderBase {
42 public:
43   template <typename HasherT_ = HasherT>
44   using HashResultTy = decltype(std::declval<HasherT_ &>().final());
45 
getHasher()46   HasherT &getHasher() { return Hasher; }
47 
48   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
49   ///
50   /// This may not take the size of `Data` into account.
51   /// Users of this function should pay attention to respect endianness
52   /// contraints.
update(ArrayRef<uint8_t> Data)53   void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
54 
55   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
56   ///
57   /// This may not take the size of `Data` into account.
58   /// Users of this function should pay attention to respect endianness
59   /// contraints.
update(StringRef Data)60   void update(StringRef Data) {
61     update(
62         ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()));
63   }
64 
65   /// Forward to `HasherT::final()` if available.
final()66   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
67     return this->getHasher().final();
68   }
69 
70   /// Forward to `HasherT::result()` if available.
result()71   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
72     return this->getHasher().result();
73   }
74 
75 protected:
HashBuilderBase(HasherT & Hasher)76   explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
77 
78   template <typename... ArgTypes>
HashBuilderBase(ArgTypes &&...Args)79   explicit HashBuilderBase(ArgTypes &&...Args)
80       : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...),
81         Hasher(*OptionalHasher) {}
82 
83 private:
84   std::optional<HasherT> OptionalHasher;
85   HasherT &Hasher;
86 };
87 
88 /// Interface to help hash various types through a hasher type.
89 ///
90 /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
91 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
92 /// without requiring any knowledge of hashed types from the hasher type.
93 ///
94 /// The only method expected from the templated hasher type `HasherT` is:
95 /// * void update(ArrayRef<uint8_t> Data)
96 ///
97 /// Additionally, the following methods will be forwarded to the hasher type:
98 /// * decltype(std::declval<HasherT &>().final()) final()
99 /// * decltype(std::declval<HasherT &>().result()) result()
100 ///
101 /// From a user point of view, the interface provides the following:
102 /// * `template<typename T> add(const T &Value)`
103 ///   The `add` function implements hashing of various types.
104 /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
105 ///   The `addRange` function is designed to aid hashing a range of values.
106 ///   It explicitly adds the size of the range in the hash.
107 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
108 ///   The `addRangeElements` function is also designed to aid hashing a range of
109 ///   values. In contrast to `addRange`, it **ignores** the size of the range,
110 ///   behaving as if elements were added one at a time with `add`.
111 ///
112 /// User-defined `struct` types can participate in this interface by providing
113 /// an `addHash` templated function. See the associated template specialization
114 /// for details.
115 ///
116 /// This interface does not impose requirements on the hasher
117 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
118 /// variable-size types; for example for
119 /// ```
120 /// builder.add({1});
121 /// builder.add({2, 3});
122 /// ```
123 /// and
124 /// ```
125 /// builder.add({1, 2});
126 /// builder.add({3});
127 /// ```
128 /// . Thus, specializations of `add` and `addHash` for variable-size types must
129 /// not assume that the hasher type considers the size as part of the hash; they
130 /// must explicitly add the size to the hash. See for example specializations
131 /// for `ArrayRef` and `StringRef`.
132 ///
133 /// Additionally, since types are eventually forwarded to the hasher's
134 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
135 /// computation (for example when computing `add((int)123)`).
136 /// Specifiying a non-`native` `Endianness` template parameter allows to compute
137 /// stable hash across platforms with different endianness.
138 template <typename HasherT, llvm::endianness Endianness>
139 class HashBuilder : public HashBuilderBase<HasherT> {
140 public:
HashBuilder(HasherT & Hasher)141   explicit HashBuilder(HasherT &Hasher) : HashBuilderBase<HasherT>(Hasher) {}
142   template <typename... ArgTypes>
HashBuilder(ArgTypes &&...Args)143   explicit HashBuilder(ArgTypes &&...Args)
144       : HashBuilderBase<HasherT>(Args...) {}
145 
146   /// Implement hashing for hashable data types, e.g. integral or enum values.
147   template <typename T>
148   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, HashBuilder &>
add(T Value)149   add(T Value) {
150     return adjustForEndiannessAndAdd(Value);
151   }
152 
153   /// Support hashing `ArrayRef`.
154   ///
155   /// `Value.size()` is taken into account to ensure cases like
156   /// ```
157   /// builder.add({1});
158   /// builder.add({2, 3});
159   /// ```
160   /// and
161   /// ```
162   /// builder.add({1, 2});
163   /// builder.add({3});
164   /// ```
165   /// do not collide.
add(ArrayRef<T> Value)166   template <typename T> HashBuilder &add(ArrayRef<T> Value) {
167     // As of implementation time, simply calling `addRange(Value)` would also go
168     // through the `update` fast path. But that would rely on the implementation
169     // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
170     // `update` to guarantee the fast path.
171     add(Value.size());
172     if (hashbuilder_detail::IsHashableData<T>::value &&
173         Endianness == llvm::endianness::native) {
174       this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
175                             Value.size() * sizeof(T)));
176     } else {
177       for (auto &V : Value)
178         add(V);
179     }
180     return *this;
181   }
182 
183   /// Support hashing `StringRef`.
184   ///
185   /// `Value.size()` is taken into account to ensure cases like
186   /// ```
187   /// builder.add("a");
188   /// builder.add("bc");
189   /// ```
190   /// and
191   /// ```
192   /// builder.add("ab");
193   /// builder.add("c");
194   /// ```
195   /// do not collide.
add(StringRef Value)196   HashBuilder &add(StringRef Value) {
197     // As of implementation time, simply calling `addRange(Value)` would also go
198     // through `update`. But that would rely on the implementation of
199     // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
200     // guarantee the fast path.
201     add(Value.size());
202     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
203                           Value.size()));
204     return *this;
205   }
206 
207   template <typename T>
208   using HasAddHashT =
209       decltype(addHash(std::declval<HashBuilder &>(), std::declval<T &>()));
210   /// Implement hashing for user-defined `struct`s.
211   ///
212   /// Any user-define `struct` can participate in hashing via `HashBuilder` by
213   /// providing a `addHash` templated function.
214   ///
215   /// ```
216   /// template <typename HasherT, llvm::endianness Endianness>
217   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
218   ///              const UserDefinedStruct &Value);
219   /// ```
220   ///
221   /// For example:
222   /// ```
223   /// struct SimpleStruct {
224   ///   char c;
225   ///   int i;
226   /// };
227   ///
228   /// template <typename HasherT, llvm::endianness Endianness>
229   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
230   ///              const SimpleStruct &Value) {
231   ///   HBuilder.add(Value.c);
232   ///   HBuilder.add(Value.i);
233   /// }
234   /// ```
235   ///
236   /// To avoid endianness issues, specializations of `addHash` should
237   /// generally rely on exising `add`, `addRange`, and `addRangeElements`
238   /// functions. If directly using `update`, an implementation must correctly
239   /// handle endianness.
240   ///
241   /// ```
242   /// struct __attribute__ ((packed)) StructWithFastHash {
243   ///   int I;
244   ///   char C;
245   ///
246   ///   // If possible, we want to hash both `I` and `C` in a single
247   ///   // `update` call for performance concerns.
248   ///   template <typename HasherT, llvm::endianness Endianness>
249   ///   friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
250   ///                       const StructWithFastHash &Value) {
251   ///     if (Endianness == llvm::endianness::native) {
252   ///       HBuilder.update(ArrayRef(
253   ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
254   ///     } else {
255   ///       // Rely on existing `add` methods to handle endianness.
256   ///       HBuilder.add(Value.I);
257   ///       HBuilder.add(Value.C);
258   ///     }
259   ///   }
260   /// };
261   /// ```
262   ///
263   /// To avoid collisions, specialization of `addHash` for variable-size
264   /// types must take the size into account.
265   ///
266   /// For example:
267   /// ```
268   /// struct CustomContainer {
269   /// private:
270   ///   size_t Size;
271   ///   int Elements[100];
272   ///
273   /// public:
274   ///   CustomContainer(size_t Size) : Size(Size) {
275   ///     for (size_t I = 0; I != Size; ++I)
276   ///       Elements[I] = I;
277   ///   }
278   ///   template <typename HasherT, llvm::endianness Endianness>
279   ///   friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
280   ///                       const CustomContainer &Value) {
281   ///     if (Endianness == llvm::endianness::native) {
282   ///       HBuilder.update(ArrayRef(
283   ///           reinterpret_cast<const uint8_t *>(&Value.Size),
284   ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
285   ///     } else {
286   ///       // `addRange` will take care of encoding the size.
287   ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
288   ///       Value.Size);
289   ///     }
290   ///   }
291   /// };
292   /// ```
293   template <typename T>
294   std::enable_if_t<is_detected<HasAddHashT, T>::value &&
295                        !hashbuilder_detail::IsHashableData<T>::value,
296                    HashBuilder &>
add(const T & Value)297   add(const T &Value) {
298     addHash(*this, Value);
299     return *this;
300   }
301 
302   template <typename T1, typename T2>
add(const std::pair<T1,T2> & Value)303   HashBuilder &add(const std::pair<T1, T2> &Value) {
304     return add(Value.first, Value.second);
305   }
306 
add(const std::tuple<Ts...> & Arg)307   template <typename... Ts> HashBuilder &add(const std::tuple<Ts...> &Arg) {
308     std::apply([this](const auto &...Args) { this->add(Args...); }, Arg);
309     return *this;
310   }
311 
312   /// A convenenience variadic helper.
313   /// It simply iterates over its arguments, in order.
314   /// ```
315   /// add(Arg1, Arg2);
316   /// ```
317   /// is equivalent to
318   /// ```
319   /// add(Arg1)
320   /// add(Arg2)
321   /// ```
322   template <typename... Ts>
add(const Ts &...Args)323   std::enable_if_t<(sizeof...(Ts) > 1), HashBuilder &> add(const Ts &...Args) {
324     return (add(Args), ...);
325   }
326 
327   template <typename ForwardIteratorT>
addRange(ForwardIteratorT First,ForwardIteratorT Last)328   HashBuilder &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
329     add(std::distance(First, Last));
330     return addRangeElements(First, Last);
331   }
332 
addRange(const RangeT & Range)333   template <typename RangeT> HashBuilder &addRange(const RangeT &Range) {
334     return addRange(adl_begin(Range), adl_end(Range));
335   }
336 
337   template <typename ForwardIteratorT>
addRangeElements(ForwardIteratorT First,ForwardIteratorT Last)338   HashBuilder &addRangeElements(ForwardIteratorT First, ForwardIteratorT Last) {
339     return addRangeElementsImpl(
340         First, Last,
341         typename std::iterator_traits<ForwardIteratorT>::iterator_category());
342   }
343 
344   template <typename RangeT>
addRangeElements(const RangeT & Range)345   HashBuilder &addRangeElements(const RangeT &Range) {
346     return addRangeElements(adl_begin(Range), adl_end(Range));
347   }
348 
349   template <typename T>
350   using HasByteSwapT = decltype(support::endian::byte_swap(
351       std::declval<T &>(), llvm::endianness::little));
352   /// Adjust `Value` for the target endianness and add it to the hash.
353   template <typename T>
354   std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilder &>
adjustForEndiannessAndAdd(const T & Value)355   adjustForEndiannessAndAdd(const T &Value) {
356     T SwappedValue = support::endian::byte_swap(Value, Endianness);
357     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
358                           sizeof(SwappedValue)));
359     return *this;
360   }
361 
362 private:
363   // FIXME: Once available, specialize this function for `contiguous_iterator`s,
364   // and use it for `ArrayRef` and `StringRef`.
365   template <typename ForwardIteratorT>
addRangeElementsImpl(ForwardIteratorT First,ForwardIteratorT Last,std::forward_iterator_tag)366   HashBuilder &addRangeElementsImpl(ForwardIteratorT First,
367                                     ForwardIteratorT Last,
368                                     std::forward_iterator_tag) {
369     for (auto It = First; It != Last; ++It)
370       add(*It);
371     return *this;
372   }
373 
374   template <typename T>
375   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
376                        Endianness == llvm::endianness::native,
377                    HashBuilder &>
addRangeElementsImpl(T * First,T * Last,std::forward_iterator_tag)378   addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
379     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
380                           (Last - First) * sizeof(T)));
381     return *this;
382   }
383 };
384 
385 namespace hashbuilder_detail {
386 class HashCodeHasher {
387 public:
HashCodeHasher()388   HashCodeHasher() : Code(0) {}
update(ArrayRef<uint8_t> Data)389   void update(ArrayRef<uint8_t> Data) {
390     hash_code DataCode = hash_value(Data);
391     Code = hash_combine(Code, DataCode);
392   }
393   hash_code Code;
394 };
395 
396 using HashCodeHashBuilder =
397     HashBuilder<hashbuilder_detail::HashCodeHasher, llvm::endianness::native>;
398 } // namespace hashbuilder_detail
399 
400 /// Provide a default implementation of `hash_value` when `addHash(const T &)`
401 /// is supported.
402 template <typename T>
403 std::enable_if_t<
404     is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
405     hash_code>
hash_value(const T & Value)406 hash_value(const T &Value) {
407   hashbuilder_detail::HashCodeHashBuilder HBuilder;
408   HBuilder.add(Value);
409   return HBuilder.getHasher().Code;
410 }
411 } // end namespace llvm
412 
413 #endif // LLVM_SUPPORT_HASHBUILDER_H
414