1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements an interface allowing to conveniently build hashes of 10 // various data types, without relying on the underlying hasher type to know 11 // about hashed data types. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_SUPPORT_HASHBUILDER_H 16 #define LLVM_SUPPORT_HASHBUILDER_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/type_traits.h" 24 25 #include <iterator> 26 #include <utility> 27 28 namespace llvm { 29 30 namespace hashbuilder_detail { 31 /// Trait to indicate whether a type's bits can be hashed directly (after 32 /// endianness correction). 33 template <typename U> 34 struct IsHashableData 35 : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; 36 37 } // namespace hashbuilder_detail 38 39 /// Declares the hasher member, and functions forwarding directly to the hasher. 40 template <typename HasherT> class HashBuilderBase { 41 public: 42 template <typename HasherT_ = HasherT> 43 using HashResultTy = decltype(std::declval<HasherT_ &>().final()); 44 45 HasherT &getHasher() { return Hasher; } 46 47 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 48 /// 49 /// This may not take the size of `Data` into account. 50 /// Users of this function should pay attention to respect endianness 51 /// contraints. 52 void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } 53 54 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 55 /// 56 /// This may not take the size of `Data` into account. 57 /// Users of this function should pay attention to respect endianness 58 /// contraints. 59 void update(StringRef Data) { 60 update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), 61 Data.size())); 62 } 63 64 /// Forward to `HasherT::final()` if available. 65 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { 66 return this->getHasher().final(); 67 } 68 69 /// Forward to `HasherT::result()` if available. 70 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { 71 return this->getHasher().result(); 72 } 73 74 protected: 75 explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} 76 77 template <typename... ArgTypes> 78 explicit HashBuilderBase(ArgTypes &&...Args) 79 : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...), 80 Hasher(*OptionalHasher) {} 81 82 private: 83 Optional<HasherT> OptionalHasher; 84 HasherT &Hasher; 85 }; 86 87 /// Implementation of the `HashBuilder` interface. 88 /// 89 /// `support::endianness::native` is not supported. `HashBuilder` is 90 /// expected to canonicalize `support::endianness::native` to one of 91 /// `support::endianness::big` or `support::endianness::little`. 92 template <typename HasherT, support::endianness Endianness> 93 class HashBuilderImpl : public HashBuilderBase<HasherT> { 94 static_assert(Endianness != support::endianness::native, 95 "HashBuilder should canonicalize endianness"); 96 97 public: 98 explicit HashBuilderImpl(HasherT &Hasher) 99 : HashBuilderBase<HasherT>(Hasher) {} 100 template <typename... ArgTypes> 101 explicit HashBuilderImpl(ArgTypes &&...Args) 102 : HashBuilderBase<HasherT>(Args...) {} 103 104 /// Implement hashing for hashable data types, e.g. integral or enum values. 105 template <typename T> 106 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, 107 HashBuilderImpl &> 108 add(T Value) { 109 return adjustForEndiannessAndAdd(Value); 110 } 111 112 /// Support hashing `ArrayRef`. 113 /// 114 /// `Value.size()` is taken into account to ensure cases like 115 /// ``` 116 /// builder.add({1}); 117 /// builder.add({2, 3}); 118 /// ``` 119 /// and 120 /// ``` 121 /// builder.add({1, 2}); 122 /// builder.add({3}); 123 /// ``` 124 /// do not collide. 125 template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { 126 // As of implementation time, simply calling `addRange(Value)` would also go 127 // through the `update` fast path. But that would rely on the implementation 128 // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call 129 // `update` to guarantee the fast path. 130 add(Value.size()); 131 if (hashbuilder_detail::IsHashableData<T>::value && 132 Endianness == support::endian::system_endianness()) { 133 this->update( 134 makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 135 Value.size() * sizeof(T))); 136 } else { 137 for (auto &V : Value) 138 add(V); 139 } 140 return *this; 141 } 142 143 /// Support hashing `StringRef`. 144 /// 145 /// `Value.size()` is taken into account to ensure cases like 146 /// ``` 147 /// builder.add("a"); 148 /// builder.add("bc"); 149 /// ``` 150 /// and 151 /// ``` 152 /// builder.add("ab"); 153 /// builder.add("c"); 154 /// ``` 155 /// do not collide. 156 HashBuilderImpl &add(StringRef Value) { 157 // As of implementation time, simply calling `addRange(Value)` would also go 158 // through `update`. But that would rely on the implementation of 159 // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to 160 // guarantee the fast path. 161 add(Value.size()); 162 this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 163 Value.size())); 164 return *this; 165 } 166 167 template <typename T> 168 using HasAddHashT = 169 decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); 170 /// Implement hashing for user-defined `struct`s. 171 /// 172 /// Any user-define `struct` can participate in hashing via `HashBuilder` by 173 /// providing a `addHash` templated function. 174 /// 175 /// ``` 176 /// template <typename HasherT, support::endianness Endianness> 177 /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 178 /// const UserDefinedStruct &Value); 179 /// ``` 180 /// 181 /// For example: 182 /// ``` 183 /// struct SimpleStruct { 184 /// char c; 185 /// int i; 186 /// }; 187 /// 188 /// template <typename HasherT, support::endianness Endianness> 189 /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 190 /// const SimpleStruct &Value) { 191 /// HBuilder.add(Value.c); 192 /// HBuilder.add(Value.i); 193 /// } 194 /// ``` 195 /// 196 /// To avoid endianness issues, specializations of `addHash` should 197 /// generally rely on exising `add`, `addRange`, and `addRangeElements` 198 /// functions. If directly using `update`, an implementation must correctly 199 /// handle endianness. 200 /// 201 /// ``` 202 /// struct __attribute__ ((packed)) StructWithFastHash { 203 /// int I; 204 /// char C; 205 /// 206 /// // If possible, we want to hash both `I` and `C` in a single 207 /// // `update` call for performance concerns. 208 /// template <typename HasherT, support::endianness Endianness> 209 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 210 /// const StructWithFastHash &Value) { 211 /// if (Endianness == support::endian::system_endianness()) { 212 /// HBuilder.update(makeArrayRef( 213 /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); 214 /// } else { 215 /// // Rely on existing `add` methods to handle endianness. 216 /// HBuilder.add(Value.I); 217 /// HBuilder.add(Value.C); 218 /// } 219 /// } 220 /// }; 221 /// ``` 222 /// 223 /// To avoid collisions, specialization of `addHash` for variable-size 224 /// types must take the size into account. 225 /// 226 /// For example: 227 /// ``` 228 /// struct CustomContainer { 229 /// private: 230 /// size_t Size; 231 /// int Elements[100]; 232 /// 233 /// public: 234 /// CustomContainer(size_t Size) : Size(Size) { 235 /// for (size_t I = 0; I != Size; ++I) 236 /// Elements[I] = I; 237 /// } 238 /// template <typename HasherT, support::endianness Endianness> 239 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, 240 /// const CustomContainer &Value) { 241 /// if (Endianness == support::endian::system_endianness()) { 242 /// HBuilder.update(makeArrayRef( 243 /// reinterpret_cast<const uint8_t *>(&Value.Size), 244 /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); 245 /// } else { 246 /// // `addRange` will take care of encoding the size. 247 /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + 248 /// Value.Size); 249 /// } 250 /// } 251 /// }; 252 /// ``` 253 template <typename T> 254 std::enable_if_t<is_detected<HasAddHashT, T>::value && 255 !hashbuilder_detail::IsHashableData<T>::value, 256 HashBuilderImpl &> 257 add(const T &Value) { 258 addHash(*this, Value); 259 return *this; 260 } 261 262 template <typename T1, typename T2> 263 HashBuilderImpl &add(const std::pair<T1, T2> &Value) { 264 add(Value.first); 265 add(Value.second); 266 return *this; 267 } 268 269 template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { 270 return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>()); 271 } 272 273 /// A convenenience variadic helper. 274 /// It simply iterates over its arguments, in order. 275 /// ``` 276 /// add(Arg1, Arg2); 277 /// ``` 278 /// is equivalent to 279 /// ``` 280 /// add(Arg1) 281 /// add(Arg2) 282 /// ``` 283 template <typename T, typename... Ts> 284 typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type 285 add(const T &FirstArg, const Ts &...Args) { 286 add(FirstArg); 287 add(Args...); 288 return *this; 289 } 290 291 template <typename ForwardIteratorT> 292 HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { 293 add(std::distance(First, Last)); 294 return addRangeElements(First, Last); 295 } 296 297 template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { 298 return addRange(adl_begin(Range), adl_end(Range)); 299 } 300 301 template <typename ForwardIteratorT> 302 HashBuilderImpl &addRangeElements(ForwardIteratorT First, 303 ForwardIteratorT Last) { 304 return addRangeElementsImpl( 305 First, Last, 306 typename std::iterator_traits<ForwardIteratorT>::iterator_category()); 307 } 308 309 template <typename RangeT> 310 HashBuilderImpl &addRangeElements(const RangeT &Range) { 311 return addRangeElements(adl_begin(Range), adl_end(Range)); 312 } 313 314 template <typename T> 315 using HasByteSwapT = decltype(support::endian::byte_swap( 316 std::declval<T &>(), support::endianness::little)); 317 /// Adjust `Value` for the target endianness and add it to the hash. 318 template <typename T> 319 std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> 320 adjustForEndiannessAndAdd(const T &Value) { 321 T SwappedValue = support::endian::byte_swap(Value, Endianness); 322 this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), 323 sizeof(SwappedValue))); 324 return *this; 325 } 326 327 private: 328 template <typename... Ts, std::size_t... Indices> 329 HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg, 330 std::index_sequence<Indices...>) { 331 add(std::get<Indices>(Arg)...); 332 return *this; 333 } 334 335 // FIXME: Once available, specialize this function for `contiguous_iterator`s, 336 // and use it for `ArrayRef` and `StringRef`. 337 template <typename ForwardIteratorT> 338 HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, 339 ForwardIteratorT Last, 340 std::forward_iterator_tag) { 341 for (auto It = First; It != Last; ++It) 342 add(*It); 343 return *this; 344 } 345 346 template <typename T> 347 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && 348 Endianness == support::endian::system_endianness(), 349 HashBuilderImpl &> 350 addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { 351 this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First), 352 (Last - First) * sizeof(T))); 353 return *this; 354 } 355 }; 356 357 /// Interface to help hash various types through a hasher type. 358 /// 359 /// Via provided specializations of `add`, `addRange`, and `addRangeElements` 360 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed 361 /// without requiring any knowledge of hashed types from the hasher type. 362 /// 363 /// The only method expected from the templated hasher type `HasherT` is: 364 /// * void update(ArrayRef<uint8_t> Data) 365 /// 366 /// Additionally, the following methods will be forwarded to the hasher type: 367 /// * decltype(std::declval<HasherT &>().final()) final() 368 /// * decltype(std::declval<HasherT &>().result()) result() 369 /// 370 /// From a user point of view, the interface provides the following: 371 /// * `template<typename T> add(const T &Value)` 372 /// The `add` function implements hashing of various types. 373 /// * `template <typename ItT> void addRange(ItT First, ItT Last)` 374 /// The `addRange` function is designed to aid hashing a range of values. 375 /// It explicitly adds the size of the range in the hash. 376 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` 377 /// The `addRangeElements` function is also designed to aid hashing a range of 378 /// values. In contrast to `addRange`, it **ignores** the size of the range, 379 /// behaving as if elements were added one at a time with `add`. 380 /// 381 /// User-defined `struct` types can participate in this interface by providing 382 /// an `addHash` templated function. See the associated template specialization 383 /// for details. 384 /// 385 /// This interface does not impose requirements on the hasher 386 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for 387 /// variable-size types; for example for 388 /// ``` 389 /// builder.add({1}); 390 /// builder.add({2, 3}); 391 /// ``` 392 /// and 393 /// ``` 394 /// builder.add({1, 2}); 395 /// builder.add({3}); 396 /// ``` 397 /// . Thus, specializations of `add` and `addHash` for variable-size types must 398 /// not assume that the hasher type considers the size as part of the hash; they 399 /// must explicitly add the size to the hash. See for example specializations 400 /// for `ArrayRef` and `StringRef`. 401 /// 402 /// Additionally, since types are eventually forwarded to the hasher's 403 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash 404 /// computation (for example when computing `add((int)123)`). 405 /// Specifiying a non-`native` `Endianness` template parameter allows to compute 406 /// stable hash across platforms with different endianness. 407 template <class HasherT, support::endianness Endianness> 408 using HashBuilder = 409 HashBuilderImpl<HasherT, (Endianness == support::endianness::native 410 ? support::endian::system_endianness() 411 : Endianness)>; 412 413 namespace hashbuilder_detail { 414 class HashCodeHasher { 415 public: 416 HashCodeHasher() : Code(0) {} 417 void update(ArrayRef<uint8_t> Data) { 418 hash_code DataCode = hash_value(Data); 419 Code = hash_combine(Code, DataCode); 420 } 421 hash_code Code; 422 }; 423 424 using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, 425 support::endianness::native>; 426 } // namespace hashbuilder_detail 427 428 /// Provide a default implementation of `hash_value` when `addHash(const T &)` 429 /// is supported. 430 template <typename T> 431 std::enable_if_t< 432 is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, 433 hash_code> 434 hash_value(const T &Value) { 435 hashbuilder_detail::HashCodeHashBuilder HBuilder; 436 HBuilder.add(Value); 437 return HBuilder.getHasher().Code; 438 } 439 } // end namespace llvm 440 441 #endif // LLVM_SUPPORT_HASHBUILDER_H 442