1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements an interface allowing to conveniently build hashes of
10 // various data types, without relying on the underlying hasher type to know
11 // about hashed data types.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_SUPPORT_HASHBUILDER_H
16 #define LLVM_SUPPORT_HASHBUILDER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/type_traits.h"
24 
25 #include <iterator>
26 #include <utility>
27 
28 namespace llvm {
29 
30 namespace hashbuilder_detail {
31 /// Trait to indicate whether a type's bits can be hashed directly (after
32 /// endianness correction).
33 template <typename U>
34 struct IsHashableData
35     : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
36 
37 } // namespace hashbuilder_detail
38 
39 /// Declares the hasher member, and functions forwarding directly to the hasher.
40 template <typename HasherT> class HashBuilderBase {
41 public:
42   template <typename HasherT_ = HasherT>
43   using HashResultTy = decltype(std::declval<HasherT_ &>().final());
44 
45   HasherT &getHasher() { return Hasher; }
46 
47   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
48   ///
49   /// This may not take the size of `Data` into account.
50   /// Users of this function should pay attention to respect endianness
51   /// contraints.
52   void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
53 
54   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
55   ///
56   /// This may not take the size of `Data` into account.
57   /// Users of this function should pay attention to respect endianness
58   /// contraints.
59   void update(StringRef Data) {
60     update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
61                         Data.size()));
62   }
63 
64   /// Forward to `HasherT::final()` if available.
65   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
66     return this->getHasher().final();
67   }
68 
69   /// Forward to `HasherT::result()` if available.
70   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
71     return this->getHasher().result();
72   }
73 
74 protected:
75   explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
76 
77   template <typename... ArgTypes>
78   explicit HashBuilderBase(ArgTypes &&...Args)
79       : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
80         Hasher(*OptionalHasher) {}
81 
82 private:
83   Optional<HasherT> OptionalHasher;
84   HasherT &Hasher;
85 };
86 
87 /// Implementation of the `HashBuilder` interface.
88 ///
89 /// `support::endianness::native` is not supported. `HashBuilder` is
90 /// expected to canonicalize `support::endianness::native` to one of
91 /// `support::endianness::big` or `support::endianness::little`.
92 template <typename HasherT, support::endianness Endianness>
93 class HashBuilderImpl : public HashBuilderBase<HasherT> {
94   static_assert(Endianness != support::endianness::native,
95                 "HashBuilder should canonicalize endianness");
96 
97 public:
98   explicit HashBuilderImpl(HasherT &Hasher)
99       : HashBuilderBase<HasherT>(Hasher) {}
100   template <typename... ArgTypes>
101   explicit HashBuilderImpl(ArgTypes &&...Args)
102       : HashBuilderBase<HasherT>(Args...) {}
103 
104   /// Implement hashing for hashable data types, e.g. integral or enum values.
105   template <typename T>
106   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
107                    HashBuilderImpl &>
108   add(T Value) {
109     return adjustForEndiannessAndAdd(Value);
110   }
111 
112   /// Support hashing `ArrayRef`.
113   ///
114   /// `Value.size()` is taken into account to ensure cases like
115   /// ```
116   /// builder.add({1});
117   /// builder.add({2, 3});
118   /// ```
119   /// and
120   /// ```
121   /// builder.add({1, 2});
122   /// builder.add({3});
123   /// ```
124   /// do not collide.
125   template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
126     // As of implementation time, simply calling `addRange(Value)` would also go
127     // through the `update` fast path. But that would rely on the implementation
128     // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
129     // `update` to guarantee the fast path.
130     add(Value.size());
131     if (hashbuilder_detail::IsHashableData<T>::value &&
132         Endianness == support::endian::system_endianness()) {
133       this->update(
134           makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
135                        Value.size() * sizeof(T)));
136     } else {
137       for (auto &V : Value)
138         add(V);
139     }
140     return *this;
141   }
142 
143   /// Support hashing `StringRef`.
144   ///
145   /// `Value.size()` is taken into account to ensure cases like
146   /// ```
147   /// builder.add("a");
148   /// builder.add("bc");
149   /// ```
150   /// and
151   /// ```
152   /// builder.add("ab");
153   /// builder.add("c");
154   /// ```
155   /// do not collide.
156   HashBuilderImpl &add(StringRef Value) {
157     // As of implementation time, simply calling `addRange(Value)` would also go
158     // through `update`. But that would rely on the implementation of
159     // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
160     // guarantee the fast path.
161     add(Value.size());
162     this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
163                               Value.size()));
164     return *this;
165   }
166 
167   template <typename T>
168   using HasAddHashT =
169       decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
170   /// Implement hashing for user-defined `struct`s.
171   ///
172   /// Any user-define `struct` can participate in hashing via `HashBuilder` by
173   /// providing a `addHash` templated function.
174   ///
175   /// ```
176   /// template <typename HasherT, support::endianness Endianness>
177   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
178   ///              const UserDefinedStruct &Value);
179   /// ```
180   ///
181   /// For example:
182   /// ```
183   /// struct SimpleStruct {
184   ///   char c;
185   ///   int i;
186   /// };
187   ///
188   /// template <typename HasherT, support::endianness Endianness>
189   /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
190   ///              const SimpleStruct &Value) {
191   ///   HBuilder.add(Value.c);
192   ///   HBuilder.add(Value.i);
193   /// }
194   /// ```
195   ///
196   /// To avoid endianness issues, specializations of `addHash` should
197   /// generally rely on exising `add`, `addRange`, and `addRangeElements`
198   /// functions. If directly using `update`, an implementation must correctly
199   /// handle endianness.
200   ///
201   /// ```
202   /// struct __attribute__ ((packed)) StructWithFastHash {
203   ///   int I;
204   ///   char C;
205   ///
206   ///   // If possible, we want to hash both `I` and `C` in a single
207   ///   // `update` call for performance concerns.
208   ///   template <typename HasherT, support::endianness Endianness>
209   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
210   ///                       const StructWithFastHash &Value) {
211   ///     if (Endianness == support::endian::system_endianness()) {
212   ///       HBuilder.update(makeArrayRef(
213   ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
214   ///     } else {
215   ///       // Rely on existing `add` methods to handle endianness.
216   ///       HBuilder.add(Value.I);
217   ///       HBuilder.add(Value.C);
218   ///     }
219   ///   }
220   /// };
221   /// ```
222   ///
223   /// To avoid collisions, specialization of `addHash` for variable-size
224   /// types must take the size into account.
225   ///
226   /// For example:
227   /// ```
228   /// struct CustomContainer {
229   /// private:
230   ///   size_t Size;
231   ///   int Elements[100];
232   ///
233   /// public:
234   ///   CustomContainer(size_t Size) : Size(Size) {
235   ///     for (size_t I = 0; I != Size; ++I)
236   ///       Elements[I] = I;
237   ///   }
238   ///   template <typename HasherT, support::endianness Endianness>
239   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
240   ///                       const CustomContainer &Value) {
241   ///     if (Endianness == support::endian::system_endianness()) {
242   ///       HBuilder.update(makeArrayRef(
243   ///           reinterpret_cast<const uint8_t *>(&Value.Size),
244   ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
245   ///     } else {
246   ///       // `addRange` will take care of encoding the size.
247   ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
248   ///       Value.Size);
249   ///     }
250   ///   }
251   /// };
252   /// ```
253   template <typename T>
254   std::enable_if_t<is_detected<HasAddHashT, T>::value &&
255                        !hashbuilder_detail::IsHashableData<T>::value,
256                    HashBuilderImpl &>
257   add(const T &Value) {
258     addHash(*this, Value);
259     return *this;
260   }
261 
262   template <typename T1, typename T2>
263   HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
264     add(Value.first);
265     add(Value.second);
266     return *this;
267   }
268 
269   template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
270     return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
271   }
272 
273   /// A convenenience variadic helper.
274   /// It simply iterates over its arguments, in order.
275   /// ```
276   /// add(Arg1, Arg2);
277   /// ```
278   /// is equivalent to
279   /// ```
280   /// add(Arg1)
281   /// add(Arg2)
282   /// ```
283   template <typename T, typename... Ts>
284   typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
285   add(const T &FirstArg, const Ts &...Args) {
286     add(FirstArg);
287     add(Args...);
288     return *this;
289   }
290 
291   template <typename ForwardIteratorT>
292   HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
293     add(std::distance(First, Last));
294     return addRangeElements(First, Last);
295   }
296 
297   template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
298     return addRange(adl_begin(Range), adl_end(Range));
299   }
300 
301   template <typename ForwardIteratorT>
302   HashBuilderImpl &addRangeElements(ForwardIteratorT First,
303                                     ForwardIteratorT Last) {
304     return addRangeElementsImpl(
305         First, Last,
306         typename std::iterator_traits<ForwardIteratorT>::iterator_category());
307   }
308 
309   template <typename RangeT>
310   HashBuilderImpl &addRangeElements(const RangeT &Range) {
311     return addRangeElements(adl_begin(Range), adl_end(Range));
312   }
313 
314   template <typename T>
315   using HasByteSwapT = decltype(support::endian::byte_swap(
316       std::declval<T &>(), support::endianness::little));
317   /// Adjust `Value` for the target endianness and add it to the hash.
318   template <typename T>
319   std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
320   adjustForEndiannessAndAdd(const T &Value) {
321     T SwappedValue = support::endian::byte_swap(Value, Endianness);
322     this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
323                               sizeof(SwappedValue)));
324     return *this;
325   }
326 
327 private:
328   template <typename... Ts, std::size_t... Indices>
329   HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
330                                   std::index_sequence<Indices...>) {
331     add(std::get<Indices>(Arg)...);
332     return *this;
333   }
334 
335   // FIXME: Once available, specialize this function for `contiguous_iterator`s,
336   // and use it for `ArrayRef` and `StringRef`.
337   template <typename ForwardIteratorT>
338   HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
339                                         ForwardIteratorT Last,
340                                         std::forward_iterator_tag) {
341     for (auto It = First; It != Last; ++It)
342       add(*It);
343     return *this;
344   }
345 
346   template <typename T>
347   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
348                        Endianness == support::endian::system_endianness(),
349                    HashBuilderImpl &>
350   addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
351     this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
352                               (Last - First) * sizeof(T)));
353     return *this;
354   }
355 };
356 
357 /// Interface to help hash various types through a hasher type.
358 ///
359 /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
360 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
361 /// without requiring any knowledge of hashed types from the hasher type.
362 ///
363 /// The only method expected from the templated hasher type `HasherT` is:
364 /// * void update(ArrayRef<uint8_t> Data)
365 ///
366 /// Additionally, the following methods will be forwarded to the hasher type:
367 /// * decltype(std::declval<HasherT &>().final()) final()
368 /// * decltype(std::declval<HasherT &>().result()) result()
369 ///
370 /// From a user point of view, the interface provides the following:
371 /// * `template<typename T> add(const T &Value)`
372 ///   The `add` function implements hashing of various types.
373 /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
374 ///   The `addRange` function is designed to aid hashing a range of values.
375 ///   It explicitly adds the size of the range in the hash.
376 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
377 ///   The `addRangeElements` function is also designed to aid hashing a range of
378 ///   values. In contrast to `addRange`, it **ignores** the size of the range,
379 ///   behaving as if elements were added one at a time with `add`.
380 ///
381 /// User-defined `struct` types can participate in this interface by providing
382 /// an `addHash` templated function. See the associated template specialization
383 /// for details.
384 ///
385 /// This interface does not impose requirements on the hasher
386 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
387 /// variable-size types; for example for
388 /// ```
389 /// builder.add({1});
390 /// builder.add({2, 3});
391 /// ```
392 /// and
393 /// ```
394 /// builder.add({1, 2});
395 /// builder.add({3});
396 /// ```
397 /// . Thus, specializations of `add` and `addHash` for variable-size types must
398 /// not assume that the hasher type considers the size as part of the hash; they
399 /// must explicitly add the size to the hash. See for example specializations
400 /// for `ArrayRef` and `StringRef`.
401 ///
402 /// Additionally, since types are eventually forwarded to the hasher's
403 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
404 /// computation (for example when computing `add((int)123)`).
405 /// Specifiying a non-`native` `Endianness` template parameter allows to compute
406 /// stable hash across platforms with different endianness.
407 template <class HasherT, support::endianness Endianness>
408 using HashBuilder =
409     HashBuilderImpl<HasherT, (Endianness == support::endianness::native
410                                   ? support::endian::system_endianness()
411                                   : Endianness)>;
412 
413 namespace hashbuilder_detail {
414 class HashCodeHasher {
415 public:
416   HashCodeHasher() : Code(0) {}
417   void update(ArrayRef<uint8_t> Data) {
418     hash_code DataCode = hash_value(Data);
419     Code = hash_combine(Code, DataCode);
420   }
421   hash_code Code;
422 };
423 
424 using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
425                                         support::endianness::native>;
426 } // namespace hashbuilder_detail
427 
428 /// Provide a default implementation of `hash_value` when `addHash(const T &)`
429 /// is supported.
430 template <typename T>
431 std::enable_if_t<
432     is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
433     hash_code>
434 hash_value(const T &Value) {
435   hashbuilder_detail::HashCodeHashBuilder HBuilder;
436   HBuilder.add(Value);
437   return HBuilder.getHasher().Code;
438 }
439 } // end namespace llvm
440 
441 #endif // LLVM_SUPPORT_HASHBUILDER_H
442