1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements an interface allowing to conveniently build hashes of
10 // various data types, without relying on the underlying hasher type to know
11 // about hashed data types.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_SUPPORT_HASHBUILDER_H
16 #define LLVM_SUPPORT_HASHBUILDER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/type_traits.h"
24 
25 #include <iterator>
26 #include <optional>
27 #include <utility>
28 
29 namespace llvm {
30 
31 namespace hashbuilder_detail {
32 /// Trait to indicate whether a type's bits can be hashed directly (after
33 /// endianness correction).
34 template <typename U>
35 struct IsHashableData
36     : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
37 
38 } // namespace hashbuilder_detail
39 
40 /// Declares the hasher member, and functions forwarding directly to the hasher.
41 template <typename HasherT> class HashBuilderBase {
42 public:
43   template <typename HasherT_ = HasherT>
44   using HashResultTy = decltype(std::declval<HasherT_ &>().final());
45 
46   HasherT &getHasher() { return Hasher; }
47 
48   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
49   ///
50   /// This may not take the size of `Data` into account.
51   /// Users of this function should pay attention to respect endianness
52   /// contraints.
53   void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
54 
55   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
56   ///
57   /// This may not take the size of `Data` into account.
58   /// Users of this function should pay attention to respect endianness
59   /// contraints.
60   void update(StringRef Data) {
61     update(
62         ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()));
63   }
64 
65   /// Forward to `HasherT::final()` if available.
66   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
67     return this->getHasher().final();
68   }
69 
70   /// Forward to `HasherT::result()` if available.
71   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
72     return this->getHasher().result();
73   }
74 
75 protected:
76   explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
77 
78   template <typename... ArgTypes>
79   explicit HashBuilderBase(ArgTypes &&...Args)
80       : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...),
81         Hasher(*OptionalHasher) {}
82 
83 private:
84   std::optional<HasherT> OptionalHasher;
85   HasherT &Hasher;
86 };
87 
88 /// Implementation of the `HashBuilder` interface.
89 ///
90 /// `support::endianness::native` is not supported. `HashBuilder` is
91 /// expected to canonicalize `support::endianness::native` to one of
92 /// `support::endianness::big` or `support::endianness::little`.
93 template <typename HasherT, support::endianness Endianness>
94 class HashBuilderImpl : public HashBuilderBase<HasherT> {
95   static_assert(Endianness != support::endianness::native,
96                 "HashBuilder should canonicalize endianness");
97 
98 public:
99   explicit HashBuilderImpl(HasherT &Hasher)
100       : HashBuilderBase<HasherT>(Hasher) {}
101   template <typename... ArgTypes>
102   explicit HashBuilderImpl(ArgTypes &&...Args)
103       : HashBuilderBase<HasherT>(Args...) {}
104 
105   /// Implement hashing for hashable data types, e.g. integral or enum values.
106   template <typename T>
107   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
108                    HashBuilderImpl &>
109   add(T Value) {
110     return adjustForEndiannessAndAdd(Value);
111   }
112 
113   /// Support hashing `ArrayRef`.
114   ///
115   /// `Value.size()` is taken into account to ensure cases like
116   /// ```
117   /// builder.add({1});
118   /// builder.add({2, 3});
119   /// ```
120   /// and
121   /// ```
122   /// builder.add({1, 2});
123   /// builder.add({3});
124   /// ```
125   /// do not collide.
126   template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
127     // As of implementation time, simply calling `addRange(Value)` would also go
128     // through the `update` fast path. But that would rely on the implementation
129     // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
130     // `update` to guarantee the fast path.
131     add(Value.size());
132     if (hashbuilder_detail::IsHashableData<T>::value &&
133         Endianness == support::endian::system_endianness()) {
134       this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
135                             Value.size() * sizeof(T)));
136     } else {
137       for (auto &V : Value)
138         add(V);
139     }
140     return *this;
141   }
142 
143   /// Support hashing `StringRef`.
144   ///
145   /// `Value.size()` is taken into account to ensure cases like
146   /// ```
147   /// builder.add("a");
148   /// builder.add("bc");
149   /// ```
150   /// and
151   /// ```
152   /// builder.add("ab");
153   /// builder.add("c");
154   /// ```
155   /// do not collide.
156   HashBuilderImpl &add(StringRef Value) {
157     // As of implementation time, simply calling `addRange(Value)` would also go
158     // through `update`. But that would rely on the implementation of
159     // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
160     // guarantee the fast path.
161     add(Value.size());
162     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
163                           Value.size()));
164     return *this;
165   }
166 
167   template <typename T>
168   using HasAddHashT =
169       decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
170   /// Implement hashing for user-defined `struct`s.
171   ///
172   /// Any user-define `struct` can participate in hashing via `HashBuilder` by
173   /// providing a `addHash` templated function.
174   ///
175   /// ```
176   /// template <typename HasherT, support::endianness Endianness>
177   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
178   ///              const UserDefinedStruct &Value);
179   /// ```
180   ///
181   /// For example:
182   /// ```
183   /// struct SimpleStruct {
184   ///   char c;
185   ///   int i;
186   /// };
187   ///
188   /// template <typename HasherT, support::endianness Endianness>
189   /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
190   ///              const SimpleStruct &Value) {
191   ///   HBuilder.add(Value.c);
192   ///   HBuilder.add(Value.i);
193   /// }
194   /// ```
195   ///
196   /// To avoid endianness issues, specializations of `addHash` should
197   /// generally rely on exising `add`, `addRange`, and `addRangeElements`
198   /// functions. If directly using `update`, an implementation must correctly
199   /// handle endianness.
200   ///
201   /// ```
202   /// struct __attribute__ ((packed)) StructWithFastHash {
203   ///   int I;
204   ///   char C;
205   ///
206   ///   // If possible, we want to hash both `I` and `C` in a single
207   ///   // `update` call for performance concerns.
208   ///   template <typename HasherT, support::endianness Endianness>
209   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
210   ///                       const StructWithFastHash &Value) {
211   ///     if (Endianness == support::endian::system_endianness()) {
212   ///       HBuilder.update(ArrayRef(
213   ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
214   ///     } else {
215   ///       // Rely on existing `add` methods to handle endianness.
216   ///       HBuilder.add(Value.I);
217   ///       HBuilder.add(Value.C);
218   ///     }
219   ///   }
220   /// };
221   /// ```
222   ///
223   /// To avoid collisions, specialization of `addHash` for variable-size
224   /// types must take the size into account.
225   ///
226   /// For example:
227   /// ```
228   /// struct CustomContainer {
229   /// private:
230   ///   size_t Size;
231   ///   int Elements[100];
232   ///
233   /// public:
234   ///   CustomContainer(size_t Size) : Size(Size) {
235   ///     for (size_t I = 0; I != Size; ++I)
236   ///       Elements[I] = I;
237   ///   }
238   ///   template <typename HasherT, support::endianness Endianness>
239   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
240   ///                       const CustomContainer &Value) {
241   ///     if (Endianness == support::endian::system_endianness()) {
242   ///       HBuilder.update(ArrayRef(
243   ///           reinterpret_cast<const uint8_t *>(&Value.Size),
244   ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
245   ///     } else {
246   ///       // `addRange` will take care of encoding the size.
247   ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
248   ///       Value.Size);
249   ///     }
250   ///   }
251   /// };
252   /// ```
253   template <typename T>
254   std::enable_if_t<is_detected<HasAddHashT, T>::value &&
255                        !hashbuilder_detail::IsHashableData<T>::value,
256                    HashBuilderImpl &>
257   add(const T &Value) {
258     addHash(*this, Value);
259     return *this;
260   }
261 
262   template <typename T1, typename T2>
263   HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
264     return add(Value.first, Value.second);
265   }
266 
267   template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
268     std::apply([this](const auto &...Args) { this->add(Args...); }, Arg);
269     return *this;
270   }
271 
272   /// A convenenience variadic helper.
273   /// It simply iterates over its arguments, in order.
274   /// ```
275   /// add(Arg1, Arg2);
276   /// ```
277   /// is equivalent to
278   /// ```
279   /// add(Arg1)
280   /// add(Arg2)
281   /// ```
282   template <typename... Ts>
283   std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &>
284   add(const Ts &...Args) {
285     return (add(Args), ...);
286   }
287 
288   template <typename ForwardIteratorT>
289   HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
290     add(std::distance(First, Last));
291     return addRangeElements(First, Last);
292   }
293 
294   template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
295     return addRange(adl_begin(Range), adl_end(Range));
296   }
297 
298   template <typename ForwardIteratorT>
299   HashBuilderImpl &addRangeElements(ForwardIteratorT First,
300                                     ForwardIteratorT Last) {
301     return addRangeElementsImpl(
302         First, Last,
303         typename std::iterator_traits<ForwardIteratorT>::iterator_category());
304   }
305 
306   template <typename RangeT>
307   HashBuilderImpl &addRangeElements(const RangeT &Range) {
308     return addRangeElements(adl_begin(Range), adl_end(Range));
309   }
310 
311   template <typename T>
312   using HasByteSwapT = decltype(support::endian::byte_swap(
313       std::declval<T &>(), support::endianness::little));
314   /// Adjust `Value` for the target endianness and add it to the hash.
315   template <typename T>
316   std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
317   adjustForEndiannessAndAdd(const T &Value) {
318     T SwappedValue = support::endian::byte_swap(Value, Endianness);
319     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
320                           sizeof(SwappedValue)));
321     return *this;
322   }
323 
324 private:
325   // FIXME: Once available, specialize this function for `contiguous_iterator`s,
326   // and use it for `ArrayRef` and `StringRef`.
327   template <typename ForwardIteratorT>
328   HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
329                                         ForwardIteratorT Last,
330                                         std::forward_iterator_tag) {
331     for (auto It = First; It != Last; ++It)
332       add(*It);
333     return *this;
334   }
335 
336   template <typename T>
337   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
338                        Endianness == support::endian::system_endianness(),
339                    HashBuilderImpl &>
340   addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
341     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
342                           (Last - First) * sizeof(T)));
343     return *this;
344   }
345 };
346 
347 /// Interface to help hash various types through a hasher type.
348 ///
349 /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
350 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
351 /// without requiring any knowledge of hashed types from the hasher type.
352 ///
353 /// The only method expected from the templated hasher type `HasherT` is:
354 /// * void update(ArrayRef<uint8_t> Data)
355 ///
356 /// Additionally, the following methods will be forwarded to the hasher type:
357 /// * decltype(std::declval<HasherT &>().final()) final()
358 /// * decltype(std::declval<HasherT &>().result()) result()
359 ///
360 /// From a user point of view, the interface provides the following:
361 /// * `template<typename T> add(const T &Value)`
362 ///   The `add` function implements hashing of various types.
363 /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
364 ///   The `addRange` function is designed to aid hashing a range of values.
365 ///   It explicitly adds the size of the range in the hash.
366 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
367 ///   The `addRangeElements` function is also designed to aid hashing a range of
368 ///   values. In contrast to `addRange`, it **ignores** the size of the range,
369 ///   behaving as if elements were added one at a time with `add`.
370 ///
371 /// User-defined `struct` types can participate in this interface by providing
372 /// an `addHash` templated function. See the associated template specialization
373 /// for details.
374 ///
375 /// This interface does not impose requirements on the hasher
376 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
377 /// variable-size types; for example for
378 /// ```
379 /// builder.add({1});
380 /// builder.add({2, 3});
381 /// ```
382 /// and
383 /// ```
384 /// builder.add({1, 2});
385 /// builder.add({3});
386 /// ```
387 /// . Thus, specializations of `add` and `addHash` for variable-size types must
388 /// not assume that the hasher type considers the size as part of the hash; they
389 /// must explicitly add the size to the hash. See for example specializations
390 /// for `ArrayRef` and `StringRef`.
391 ///
392 /// Additionally, since types are eventually forwarded to the hasher's
393 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
394 /// computation (for example when computing `add((int)123)`).
395 /// Specifiying a non-`native` `Endianness` template parameter allows to compute
396 /// stable hash across platforms with different endianness.
397 template <class HasherT, support::endianness Endianness>
398 using HashBuilder =
399     HashBuilderImpl<HasherT, (Endianness == support::endianness::native
400                                   ? support::endian::system_endianness()
401                                   : Endianness)>;
402 
403 namespace hashbuilder_detail {
404 class HashCodeHasher {
405 public:
406   HashCodeHasher() : Code(0) {}
407   void update(ArrayRef<uint8_t> Data) {
408     hash_code DataCode = hash_value(Data);
409     Code = hash_combine(Code, DataCode);
410   }
411   hash_code Code;
412 };
413 
414 using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
415                                         support::endianness::native>;
416 } // namespace hashbuilder_detail
417 
418 /// Provide a default implementation of `hash_value` when `addHash(const T &)`
419 /// is supported.
420 template <typename T>
421 std::enable_if_t<
422     is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
423     hash_code>
424 hash_value(const T &Value) {
425   hashbuilder_detail::HashCodeHashBuilder HBuilder;
426   HBuilder.add(Value);
427   return HBuilder.getHasher().Code;
428 }
429 } // end namespace llvm
430 
431 #endif // LLVM_SUPPORT_HASHBUILDER_H
432