1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements an interface allowing to conveniently build hashes of 10 // various data types, without relying on the underlying hasher type to know 11 // about hashed data types. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_SUPPORT_HASHBUILDER_H 16 #define LLVM_SUPPORT_HASHBUILDER_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/type_traits.h" 24 25 #include <iterator> 26 #include <optional> 27 #include <utility> 28 29 namespace llvm { 30 31 namespace hashbuilder_detail { 32 /// Trait to indicate whether a type's bits can be hashed directly (after 33 /// endianness correction). 34 template <typename U> 35 struct IsHashableData 36 : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; 37 38 } // namespace hashbuilder_detail 39 40 /// Declares the hasher member, and functions forwarding directly to the hasher. 41 template <typename HasherT> class HashBuilderBase { 42 public: 43 template <typename HasherT_ = HasherT> 44 using HashResultTy = decltype(std::declval<HasherT_ &>().final()); 45 46 HasherT &getHasher() { return Hasher; } 47 48 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 49 /// 50 /// This may not take the size of `Data` into account. 51 /// Users of this function should pay attention to respect endianness 52 /// contraints. 53 void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } 54 55 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 56 /// 57 /// This may not take the size of `Data` into account. 58 /// Users of this function should pay attention to respect endianness 59 /// contraints. 60 void update(StringRef Data) { 61 update( 62 ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size())); 63 } 64 65 /// Forward to `HasherT::final()` if available. 66 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { 67 return this->getHasher().final(); 68 } 69 70 /// Forward to `HasherT::result()` if available. 71 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { 72 return this->getHasher().result(); 73 } 74 75 protected: 76 explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} 77 78 template <typename... ArgTypes> 79 explicit HashBuilderBase(ArgTypes &&...Args) 80 : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...), 81 Hasher(*OptionalHasher) {} 82 83 private: 84 std::optional<HasherT> OptionalHasher; 85 HasherT &Hasher; 86 }; 87 88 /// Implementation of the `HashBuilder` interface. 89 /// 90 /// `support::endianness::native` is not supported. `HashBuilder` is 91 /// expected to canonicalize `support::endianness::native` to one of 92 /// `support::endianness::big` or `support::endianness::little`. 93 template <typename HasherT, support::endianness Endianness> 94 class HashBuilderImpl : public HashBuilderBase<HasherT> { 95 static_assert(Endianness != support::endianness::native, 96 "HashBuilder should canonicalize endianness"); 97 98 public: 99 explicit HashBuilderImpl(HasherT &Hasher) 100 : HashBuilderBase<HasherT>(Hasher) {} 101 template <typename... ArgTypes> 102 explicit HashBuilderImpl(ArgTypes &&...Args) 103 : HashBuilderBase<HasherT>(Args...) {} 104 105 /// Implement hashing for hashable data types, e.g. integral or enum values. 106 template <typename T> 107 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, 108 HashBuilderImpl &> 109 add(T Value) { 110 return adjustForEndiannessAndAdd(Value); 111 } 112 113 /// Support hashing `ArrayRef`. 114 /// 115 /// `Value.size()` is taken into account to ensure cases like 116 /// ``` 117 /// builder.add({1}); 118 /// builder.add({2, 3}); 119 /// ``` 120 /// and 121 /// ``` 122 /// builder.add({1, 2}); 123 /// builder.add({3}); 124 /// ``` 125 /// do not collide. 126 template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { 127 // As of implementation time, simply calling `addRange(Value)` would also go 128 // through the `update` fast path. But that would rely on the implementation 129 // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call 130 // `update` to guarantee the fast path. 131 add(Value.size()); 132 if (hashbuilder_detail::IsHashableData<T>::value && 133 Endianness == support::endian::system_endianness()) { 134 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 135 Value.size() * sizeof(T))); 136 } else { 137 for (auto &V : Value) 138 add(V); 139 } 140 return *this; 141 } 142 143 /// Support hashing `StringRef`. 144 /// 145 /// `Value.size()` is taken into account to ensure cases like 146 /// ``` 147 /// builder.add("a"); 148 /// builder.add("bc"); 149 /// ``` 150 /// and 151 /// ``` 152 /// builder.add("ab"); 153 /// builder.add("c"); 154 /// ``` 155 /// do not collide. 156 HashBuilderImpl &add(StringRef Value) { 157 // As of implementation time, simply calling `addRange(Value)` would also go 158 // through `update`. But that would rely on the implementation of 159 // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to 160 // guarantee the fast path. 161 add(Value.size()); 162 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 163 Value.size())); 164 return *this; 165 } 166 167 template <typename T> 168 using HasAddHashT = 169 decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); 170 /// Implement hashing for user-defined `struct`s. 171 /// 172 /// Any user-define `struct` can participate in hashing via `HashBuilder` by 173 /// providing a `addHash` templated function. 174 /// 175 /// ``` 176 /// template <typename HasherT, support::endianness Endianness> 177 /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 178 /// const UserDefinedStruct &Value); 179 /// ``` 180 /// 181 /// For example: 182 /// ``` 183 /// struct SimpleStruct { 184 /// char c; 185 /// int i; 186 /// }; 187 /// 188 /// template <typename HasherT, support::endianness Endianness> 189 /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 190 /// const SimpleStruct &Value) { 191 /// HBuilder.add(Value.c); 192 /// HBuilder.add(Value.i); 193 /// } 194 /// ``` 195 /// 196 /// To avoid endianness issues, specializations of `addHash` should 197 /// generally rely on exising `add`, `addRange`, and `addRangeElements` 198 /// functions. If directly using `update`, an implementation must correctly 199 /// handle endianness. 200 /// 201 /// ``` 202 /// struct __attribute__ ((packed)) StructWithFastHash { 203 /// int I; 204 /// char C; 205 /// 206 /// // If possible, we want to hash both `I` and `C` in a single 207 /// // `update` call for performance concerns. 208 /// template <typename HasherT, support::endianness Endianness> 209 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 210 /// const StructWithFastHash &Value) { 211 /// if (Endianness == support::endian::system_endianness()) { 212 /// HBuilder.update(ArrayRef( 213 /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); 214 /// } else { 215 /// // Rely on existing `add` methods to handle endianness. 216 /// HBuilder.add(Value.I); 217 /// HBuilder.add(Value.C); 218 /// } 219 /// } 220 /// }; 221 /// ``` 222 /// 223 /// To avoid collisions, specialization of `addHash` for variable-size 224 /// types must take the size into account. 225 /// 226 /// For example: 227 /// ``` 228 /// struct CustomContainer { 229 /// private: 230 /// size_t Size; 231 /// int Elements[100]; 232 /// 233 /// public: 234 /// CustomContainer(size_t Size) : Size(Size) { 235 /// for (size_t I = 0; I != Size; ++I) 236 /// Elements[I] = I; 237 /// } 238 /// template <typename HasherT, support::endianness Endianness> 239 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 240 /// const CustomContainer &Value) { 241 /// if (Endianness == support::endian::system_endianness()) { 242 /// HBuilder.update(ArrayRef( 243 /// reinterpret_cast<const uint8_t *>(&Value.Size), 244 /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); 245 /// } else { 246 /// // `addRange` will take care of encoding the size. 247 /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + 248 /// Value.Size); 249 /// } 250 /// } 251 /// }; 252 /// ``` 253 template <typename T> 254 std::enable_if_t<is_detected<HasAddHashT, T>::value && 255 !hashbuilder_detail::IsHashableData<T>::value, 256 HashBuilderImpl &> 257 add(const T &Value) { 258 addHash(*this, Value); 259 return *this; 260 } 261 262 template <typename T1, typename T2> 263 HashBuilderImpl &add(const std::pair<T1, T2> &Value) { 264 return add(Value.first, Value.second); 265 } 266 267 template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { 268 std::apply([this](const auto &...Args) { this->add(Args...); }, Arg); 269 return *this; 270 } 271 272 /// A convenenience variadic helper. 273 /// It simply iterates over its arguments, in order. 274 /// ``` 275 /// add(Arg1, Arg2); 276 /// ``` 277 /// is equivalent to 278 /// ``` 279 /// add(Arg1) 280 /// add(Arg2) 281 /// ``` 282 template <typename... Ts> 283 std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &> 284 add(const Ts &...Args) { 285 return (add(Args), ...); 286 } 287 288 template <typename ForwardIteratorT> 289 HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { 290 add(std::distance(First, Last)); 291 return addRangeElements(First, Last); 292 } 293 294 template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { 295 return addRange(adl_begin(Range), adl_end(Range)); 296 } 297 298 template <typename ForwardIteratorT> 299 HashBuilderImpl &addRangeElements(ForwardIteratorT First, 300 ForwardIteratorT Last) { 301 return addRangeElementsImpl( 302 First, Last, 303 typename std::iterator_traits<ForwardIteratorT>::iterator_category()); 304 } 305 306 template <typename RangeT> 307 HashBuilderImpl &addRangeElements(const RangeT &Range) { 308 return addRangeElements(adl_begin(Range), adl_end(Range)); 309 } 310 311 template <typename T> 312 using HasByteSwapT = decltype(support::endian::byte_swap( 313 std::declval<T &>(), support::endianness::little)); 314 /// Adjust `Value` for the target endianness and add it to the hash. 315 template <typename T> 316 std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> 317 adjustForEndiannessAndAdd(const T &Value) { 318 T SwappedValue = support::endian::byte_swap(Value, Endianness); 319 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), 320 sizeof(SwappedValue))); 321 return *this; 322 } 323 324 private: 325 // FIXME: Once available, specialize this function for `contiguous_iterator`s, 326 // and use it for `ArrayRef` and `StringRef`. 327 template <typename ForwardIteratorT> 328 HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, 329 ForwardIteratorT Last, 330 std::forward_iterator_tag) { 331 for (auto It = First; It != Last; ++It) 332 add(*It); 333 return *this; 334 } 335 336 template <typename T> 337 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && 338 Endianness == support::endian::system_endianness(), 339 HashBuilderImpl &> 340 addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { 341 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First), 342 (Last - First) * sizeof(T))); 343 return *this; 344 } 345 }; 346 347 /// Interface to help hash various types through a hasher type. 348 /// 349 /// Via provided specializations of `add`, `addRange`, and `addRangeElements` 350 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed 351 /// without requiring any knowledge of hashed types from the hasher type. 352 /// 353 /// The only method expected from the templated hasher type `HasherT` is: 354 /// * void update(ArrayRef<uint8_t> Data) 355 /// 356 /// Additionally, the following methods will be forwarded to the hasher type: 357 /// * decltype(std::declval<HasherT &>().final()) final() 358 /// * decltype(std::declval<HasherT &>().result()) result() 359 /// 360 /// From a user point of view, the interface provides the following: 361 /// * `template<typename T> add(const T &Value)` 362 /// The `add` function implements hashing of various types. 363 /// * `template <typename ItT> void addRange(ItT First, ItT Last)` 364 /// The `addRange` function is designed to aid hashing a range of values. 365 /// It explicitly adds the size of the range in the hash. 366 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` 367 /// The `addRangeElements` function is also designed to aid hashing a range of 368 /// values. In contrast to `addRange`, it **ignores** the size of the range, 369 /// behaving as if elements were added one at a time with `add`. 370 /// 371 /// User-defined `struct` types can participate in this interface by providing 372 /// an `addHash` templated function. See the associated template specialization 373 /// for details. 374 /// 375 /// This interface does not impose requirements on the hasher 376 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for 377 /// variable-size types; for example for 378 /// ``` 379 /// builder.add({1}); 380 /// builder.add({2, 3}); 381 /// ``` 382 /// and 383 /// ``` 384 /// builder.add({1, 2}); 385 /// builder.add({3}); 386 /// ``` 387 /// . Thus, specializations of `add` and `addHash` for variable-size types must 388 /// not assume that the hasher type considers the size as part of the hash; they 389 /// must explicitly add the size to the hash. See for example specializations 390 /// for `ArrayRef` and `StringRef`. 391 /// 392 /// Additionally, since types are eventually forwarded to the hasher's 393 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash 394 /// computation (for example when computing `add((int)123)`). 395 /// Specifiying a non-`native` `Endianness` template parameter allows to compute 396 /// stable hash across platforms with different endianness. 397 template <class HasherT, support::endianness Endianness> 398 using HashBuilder = 399 HashBuilderImpl<HasherT, (Endianness == support::endianness::native 400 ? support::endian::system_endianness() 401 : Endianness)>; 402 403 namespace hashbuilder_detail { 404 class HashCodeHasher { 405 public: 406 HashCodeHasher() : Code(0) {} 407 void update(ArrayRef<uint8_t> Data) { 408 hash_code DataCode = hash_value(Data); 409 Code = hash_combine(Code, DataCode); 410 } 411 hash_code Code; 412 }; 413 414 using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, 415 support::endianness::native>; 416 } // namespace hashbuilder_detail 417 418 /// Provide a default implementation of `hash_value` when `addHash(const T &)` 419 /// is supported. 420 template <typename T> 421 std::enable_if_t< 422 is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, 423 hash_code> 424 hash_value(const T &Value) { 425 hashbuilder_detail::HashCodeHashBuilder HBuilder; 426 HBuilder.add(Value); 427 return HBuilder.getHasher().Code; 428 } 429 } // end namespace llvm 430 431 #endif // LLVM_SUPPORT_HASHBUILDER_H 432