1 /* Copyright 2009-2016 Francesco Biscani (bluescarni@gmail.com) 2 3 This file is part of the Piranha library. 4 5 The Piranha library is free software; you can redistribute it and/or modify 6 it under the terms of either: 7 8 * the GNU Lesser General Public License as published by the Free 9 Software Foundation; either version 3 of the License, or (at your 10 option) any later version. 11 12 or 13 14 * the GNU General Public License as published by the Free Software 15 Foundation; either version 3 of the License, or (at your option) any 16 later version. 17 18 or both in parallel, as here. 19 20 The Piranha library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 23 for more details. 24 25 You should have received copies of the GNU General Public License and the 26 GNU Lesser General Public License along with the Piranha library. If not, 27 see https://www.gnu.org/licenses/. */ 28 29 #ifndef PIRANHA_HASH_SET_HPP 30 #define PIRANHA_HASH_SET_HPP 31 32 #include <boost/iterator/iterator_facade.hpp> 33 #include <boost/numeric/conversion/cast.hpp> 34 #include <cmath> 35 #include <cstddef> 36 #include <cstdint> 37 #include <functional> 38 #include <initializer_list> 39 #include <limits> 40 #include <map> 41 #include <memory> 42 #include <new> 43 #include <stdexcept> 44 #include <string> 45 #include <tuple> 46 #include <type_traits> 47 #include <utility> 48 #include <vector> 49 50 #include "config.hpp" 51 #include "debug_access.hpp" 52 #include "detail/init_data.hpp" 53 #include "exceptions.hpp" 54 #include "s11n.hpp" 55 #include "safe_cast.hpp" 56 #include "thread_pool.hpp" 57 #include "type_traits.hpp" 58 59 namespace piranha 60 { 61 62 /// Hash set. 63 /** 64 * Hash set class with interface similar to \p std::unordered_set. The main points of difference with respect to 65 * \p std::unordered_set are the following: 66 * 67 * - the exception safety guarantee is weaker (see below), 68 * - iterators and iterator invalidation: after a rehash operation, all iterators will be invalidated and existing 69 * references/pointers to the elements will also be invalid; after an insertion/erase operation, all existing 70 * iterators, pointers and references to the elements in the destination bucket will be invalid, 71 * - the complexity of iterator traversal depends on the load factor of the table. 72 * 73 * The implementation employs a separate chaining strategy consisting of an array of buckets, each one a singly linked 74 * list with the first node stored directly within the array (so that the first insertion in a bucket does not require 75 * any heap allocation). 76 * 77 * An additional set of low-level methods is provided: such methods are suitable for use in high-performance and 78 * multi-threaded contexts, and, if misused, could lead to data corruption and other unpredictable errors. 79 * 80 * Note that for performance reasons the implementation employs sizes that are powers of two. Hence, particular care 81 * should be taken that the hash function does not exhibit commensurabilities with powers of 2. 82 * 83 * ## Type requirements ## 84 * 85 * - \p T must satisfy piranha::is_container_element, 86 * - \p Hash must satisfy piranha::is_hash_function_object, 87 * - \p Pred must satisfy piranha::is_equality_function_object. 88 * 89 * ## Exception safety guarantee ## 90 * 91 * This class provides the strong exception safety guarantee for all operations apart from methods involving insertion, 92 * which provide the basic guarantee (after a failed insertion, the set will be left in an unspecified but valid state). 93 * 94 * ## Move semantics ## 95 * 96 * Move construction and move assignment will leave the moved-from object equivalent to an empty set whose hasher and 97 * equality predicate have been moved-from. 98 */ 99 /* Some improvement NOTEs: 100 * - tests for low-level methods 101 * - better increase_size with recycling of dynamically-allocated nodes 102 * - see if we can reduce the number of branches in the find algorithm (e.g., when traversing the list) -> this should be 103 * a general review of the internal linked list 104 * implementation. 105 * - memory handling: the usage of the allocator object should be more standard, i.e., use the pointer and reference 106 * typedefs defined within, replace 107 * positional new with construct even in the list implementation. Then it can be made a template parameter with default = 108 * std::allocator. 109 * - use of new: we should probably replace new with new, in case new is overloaded -> also, check all occurrences of 110 * root new, it is used as well 111 * in static_vector for instance. 112 * - inline the first bucket, with the idea of avoiding memory allocations when the series consist of a single element 113 * (useful for instance 114 * when iterating over the series with the fat iterator). 115 * - optimisation for the begin() iterator, 116 * - check again about the mod implementation, 117 * - in the dtor checks, do we still want the shutdown() logic after we rework symbol? 118 * are we still accessing potentially global variables? 119 * - maybe a bit more enabling for ctor and other template methods, not really essential though. 120 */ 121 template <typename T, typename Hash = std::hash<T>, typename Pred = std::equal_to<T>> 122 class hash_set 123 { 124 PIRANHA_TT_CHECK(is_container_element, T); 125 PIRANHA_TT_CHECK(is_hash_function_object, Hash, T); 126 PIRANHA_TT_CHECK(is_equality_function_object, Pred, T); 127 // Make friend with debug access class. 128 template <typename U> 129 friend class debug_access; 130 // Node class for bucket element. 131 struct node { 132 typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type storage_type; nodepiranha::hash_set::node133 node() : m_next(nullptr) 134 { 135 } 136 // Erase all other ctors/assignments, we do not want to 137 // copy around this object as m_storage is not what it is 138 // (and often could be uninitialized, which would lead to UB if used). 139 node(const node &) = delete; 140 node(node &&) = delete; 141 node &operator=(const node &) = delete; 142 node &operator=(node &&) = delete; 143 // NOTE: the idea here is that we use these helpers only *after* the object has been constructed, 144 // for constructing the object we must always access m_storage directly. The chain of two casts 145 // seems to be the only standard-conforming way of getting out a pointer to T. 146 // http://stackoverflow.com/questions/1082378/what-is-the-basic-use-of-aligned-storage 147 // http://stackoverflow.com/questions/13466556/aligned-storage-and-strict-aliasing 148 // http://en.cppreference.com/w/cpp/types/aligned_storage 149 // A couple of further notes: 150 // - pointer casting to/from void * is ok (4.10.2 and 5.2.9.13, via static_cast); 151 // - the placement new operator (which we use to construct the object into the storage) 152 // is guaranteed to construct the object at the beginning of the storage, and taking 153 // a void * out of the storage will return the address of the object stored within; 154 // - the lifetime of the original POD storage_type ends when we reuse its storage via 155 // placement new. 156 // This page contains some more info about storage reuse: 157 // http://en.cppreference.com/w/cpp/language/lifetime#Storage_reuse 158 // See also this link, regarding static_cast vs reinterpret_cast for this very specific 159 // usage: 160 // http://stackoverflow.com/questions/19300142/static-cast-and-reinterpret-cast-for-stdaligned-storage 161 // Apparently, it is equivalent. ptrpiranha::hash_set::node162 const T *ptr() const 163 { 164 piranha_assert(m_next); 165 return static_cast<const T *>(static_cast<const void *>(&m_storage)); 166 } ptrpiranha::hash_set::node167 T *ptr() 168 { 169 piranha_assert(m_next); 170 return static_cast<T *>(static_cast<void *>(&m_storage)); 171 } 172 storage_type m_storage; 173 node *m_next; 174 }; 175 // List constituting the bucket. 176 // NOTE: in this list implementation the m_next pointer is used as a flag to signal if the current node 177 // stores an item: the pointer is not null if it does contain something. The value of m_next pointer in a node is 178 // set to a constant 179 // &terminator node if it is the last node of the list. I.e., the terminator is end() in all cases 180 // except when the list is empty (in that case the m_node member is end()). 181 struct list { 182 // NOTE: re: std::iterator inheritance. It's netiher encouraged not discouraged according to this: 183 // http://stackoverflow.com/questions/6471019/can-should-i-inherit-from-stl-iterator 184 // I think we can just keep on using boost iterator_facade for the time being. 185 template <typename U> 186 class iterator_impl : public boost::iterator_facade<iterator_impl<U>, U, boost::forward_traversal_tag> 187 { 188 typedef typename std::conditional<std::is_const<U>::value, node const *, node *>::type ptr_type; 189 template <typename V> 190 friend class iterator_impl; 191 192 public: iterator_impl()193 iterator_impl() : m_ptr(nullptr) 194 { 195 } iterator_impl(ptr_type ptr)196 explicit iterator_impl(ptr_type ptr) : m_ptr(ptr) 197 { 198 } 199 // Constructor from other iterator type. 200 template <typename V, 201 enable_if_t<std::is_convertible<typename iterator_impl<V>::ptr_type, ptr_type>::value, int> = 0> iterator_impl(const iterator_impl<V> & other)202 iterator_impl(const iterator_impl<V> &other) : m_ptr(other.m_ptr) 203 { 204 } 205 206 private: 207 friend class boost::iterator_core_access; increment()208 void increment() 209 { 210 piranha_assert(m_ptr && m_ptr->m_next); 211 m_ptr = m_ptr->m_next; 212 } 213 template <typename V> equal(const iterator_impl<V> & other) const214 bool equal(const iterator_impl<V> &other) const 215 { 216 return m_ptr == other.m_ptr; 217 } dereference() const218 U &dereference() const 219 { 220 piranha_assert(m_ptr && m_ptr->m_next); 221 return *m_ptr->ptr(); 222 } 223 224 public: 225 ptr_type m_ptr; 226 }; 227 typedef iterator_impl<T> iterator; 228 typedef iterator_impl<T const> const_iterator; 229 // Static checks on the iterator types. 230 PIRANHA_TT_CHECK(is_forward_iterator, iterator); 231 PIRANHA_TT_CHECK(is_forward_iterator, const_iterator); listpiranha::hash_set::list232 list() : m_node() 233 { 234 } listpiranha::hash_set::list235 list(list &&other) noexcept : m_node() 236 { 237 steal_from_rvalue(std::move(other)); 238 } listpiranha::hash_set::list239 list(const list &other) : m_node() 240 { 241 try { 242 auto cur = &m_node; 243 auto other_cur = &other.m_node; 244 while (other_cur->m_next) { 245 if (cur->m_next) { 246 // This assert means we are operating on the last element 247 // of the list, as we are doing back-insertions. 248 piranha_assert(cur->m_next == &terminator); 249 // Create a new node with content equal to other_cur 250 // and linking forward to the terminator. 251 std::unique_ptr<node> new_node(::new node()); 252 ::new (static_cast<void *>(&new_node->m_storage)) T(*other_cur->ptr()); 253 new_node->m_next = &terminator; 254 // Link the new node. 255 cur->m_next = new_node.release(); 256 cur = cur->m_next; 257 } else { 258 // This means this is the first node. 259 ::new (static_cast<void *>(&cur->m_storage)) T(*other_cur->ptr()); 260 cur->m_next = &terminator; 261 } 262 other_cur = other_cur->m_next; 263 } 264 } catch (...) { 265 destroy(); 266 throw; 267 } 268 } operator =piranha::hash_set::list269 list &operator=(list &&other) 270 { 271 if (likely(this != &other)) { 272 // Destroy the content of this. 273 destroy(); 274 steal_from_rvalue(std::move(other)); 275 } 276 return *this; 277 } operator =piranha::hash_set::list278 list &operator=(const list &other) 279 { 280 if (likely(this != &other)) { 281 auto tmp = other; 282 *this = std::move(tmp); 283 } 284 return *this; 285 } ~listpiranha::hash_set::list286 ~list() 287 { 288 destroy(); 289 } steal_from_rvaluepiranha::hash_set::list290 void steal_from_rvalue(list &&other) 291 { 292 piranha_assert(empty()); 293 // Do something only if there is content in the other. 294 if (other.m_node.m_next) { 295 // Move construct current first node with first node of other. 296 ::new (static_cast<void *>(&m_node.m_storage)) T(std::move(*other.m_node.ptr())); 297 // Link remaining content of other into this. 298 m_node.m_next = other.m_node.m_next; 299 // Destroy first node of other. 300 other.m_node.ptr()->~T(); 301 other.m_node.m_next = nullptr; 302 } 303 piranha_assert(other.empty()); 304 } 305 template <typename U, enable_if_t<std::is_same<T, uncvref_t<U>>::value, int> = 0> insertpiranha::hash_set::list306 node *insert(U &&item) 307 { 308 // NOTE: optimize with likely/unlikely? 309 if (m_node.m_next) { 310 // Create the new node and forward-link it to the second node. 311 std::unique_ptr<node> new_node(::new node()); 312 ::new (static_cast<void *>(&new_node->m_storage)) T(std::forward<U>(item)); 313 new_node->m_next = m_node.m_next; 314 // Link first node to the new node. 315 m_node.m_next = new_node.release(); 316 return m_node.m_next; 317 } else { 318 ::new (static_cast<void *>(&m_node.m_storage)) T(std::forward<U>(item)); 319 m_node.m_next = &terminator; 320 return &m_node; 321 } 322 } beginpiranha::hash_set::list323 iterator begin() 324 { 325 return iterator(&m_node); 326 } endpiranha::hash_set::list327 iterator end() 328 { 329 return iterator((m_node.m_next) ? &terminator : &m_node); 330 } beginpiranha::hash_set::list331 const_iterator begin() const 332 { 333 return const_iterator(&m_node); 334 } endpiranha::hash_set::list335 const_iterator end() const 336 { 337 return const_iterator((m_node.m_next) ? &terminator : &m_node); 338 } emptypiranha::hash_set::list339 bool empty() const 340 { 341 return !m_node.m_next; 342 } destroypiranha::hash_set::list343 void destroy() 344 { 345 node *cur = &m_node; 346 while (cur->m_next) { 347 // Store the current value temporarily. 348 auto old = cur; 349 // Assign the next. 350 cur = cur->m_next; 351 // Destroy the old payload and erase connections. 352 old->ptr()->~T(); 353 old->m_next = nullptr; 354 // If the old node was not the initial one, delete it. 355 if (old != &m_node) { 356 ::delete old; 357 } 358 } 359 // After destruction, the list should be equivalent to a default-constructed one. 360 piranha_assert(empty()); 361 } 362 static node terminator; 363 node m_node; 364 }; 365 // Allocator type. 366 // NOTE: if we move allocator choice in public interface we need to document the exception behaviour of the 367 // allocator. Also, check the validity 368 // of the assumptions on the type returned by allocate(): must it be a pointer or just convertible to pointer? 369 // NOTE: for std::allocator, pointer is guaranteed to be "T *": 370 // http://en.cppreference.com/w/cpp/memory/allocator 371 typedef std::allocator<list> allocator_type; 372 373 public: 374 /// Functor type for the calculation of hash values. 375 using hasher = Hash; 376 /// Functor type for comparing the items in the set. 377 using key_equal = Pred; 378 /// Key type. 379 using key_type = T; 380 /// Size type. 381 /** 382 * Alias for \p std::size_t. 383 */ 384 using size_type = std::size_t; 385 386 private: 387 // The container is a pointer to an array of lists. 388 using ptr_type = list *; 389 // Internal pack type, containing the pointer to the objects, the hash/equal functor 390 // and the allocator. In many cases these are stateless so we can exploit EBCO if implemented 391 // in the tuple type (likely). 392 using pack_type = std::tuple<ptr_type, hasher, key_equal, allocator_type>; 393 // A few handy accessors. ptr()394 ptr_type &ptr() 395 { 396 return std::get<0u>(m_pack); 397 } ptr() const398 const ptr_type &ptr() const 399 { 400 return std::get<0u>(m_pack); 401 } hash() const402 const hasher &hash() const 403 { 404 return std::get<1u>(m_pack); 405 } k_equal() const406 const key_equal &k_equal() const 407 { 408 return std::get<2u>(m_pack); 409 } allocator()410 allocator_type &allocator() 411 { 412 return std::get<3u>(m_pack); 413 } allocator() const414 const allocator_type &allocator() const 415 { 416 return std::get<3u>(m_pack); 417 } 418 // Definition of the iterator type for the set. 419 template <typename Key> 420 class iterator_impl : public boost::iterator_facade<iterator_impl<Key>, Key, boost::forward_traversal_tag> 421 { 422 friend class hash_set; 423 typedef typename std::conditional<std::is_const<Key>::value, hash_set const, hash_set>::type set_type; 424 typedef typename std::conditional<std::is_const<Key>::value, typename list::const_iterator, 425 typename list::iterator>::type it_type; 426 427 public: iterator_impl()428 iterator_impl() : m_set(nullptr), m_idx(0u), m_it() 429 { 430 } iterator_impl(set_type * set,const size_type & idx,it_type it)431 explicit iterator_impl(set_type *set, const size_type &idx, it_type it) : m_set(set), m_idx(idx), m_it(it) 432 { 433 } 434 435 private: 436 friend class boost::iterator_core_access; increment()437 void increment() 438 { 439 piranha_assert(m_set); 440 auto &container = m_set->ptr(); 441 // Assert that the current iterator is valid. 442 piranha_assert(m_idx < m_set->bucket_count()); 443 piranha_assert(!container[m_idx].empty()); 444 piranha_assert(m_it != container[m_idx].end()); 445 ++m_it; 446 if (m_it == container[m_idx].end()) { 447 const size_type container_size = m_set->bucket_count(); 448 while (true) { 449 ++m_idx; 450 if (m_idx == container_size) { 451 m_it = it_type{}; 452 return; 453 } else if (!container[m_idx].empty()) { 454 m_it = container[m_idx].begin(); 455 return; 456 } 457 } 458 } 459 } equal(const iterator_impl & other) const460 bool equal(const iterator_impl &other) const 461 { 462 // NOTE: comparing iterators from different containers is UB 463 // in the standard. 464 piranha_assert(m_set && other.m_set); 465 return (m_idx == other.m_idx && m_it == other.m_it); 466 } dereference() const467 Key &dereference() const 468 { 469 piranha_assert(m_set && m_idx < m_set->bucket_count() && m_it != m_set->ptr()[m_idx].end()); 470 return *m_it; 471 } 472 473 private: 474 set_type *m_set; 475 size_type m_idx; 476 it_type m_it; 477 }; init_from_n_buckets(const size_type & n_buckets,unsigned n_threads)478 void init_from_n_buckets(const size_type &n_buckets, unsigned n_threads) 479 { 480 piranha_assert(!ptr() && !m_log2_size && !m_n_elements); 481 if (unlikely(!n_threads)) { 482 piranha_throw(std::invalid_argument, "the number of threads must be strictly positive"); 483 } 484 // Proceed to actual construction only if the requested number of buckets is nonzero. 485 if (!n_buckets) { 486 return; 487 } 488 const size_type log2_size = get_log2_from_hint(n_buckets); 489 const size_type size = size_type(1u) << log2_size; 490 auto new_ptr = allocator().allocate(size); 491 if (unlikely(!new_ptr)) { 492 piranha_throw(std::bad_alloc, ); 493 } 494 if (n_threads == 1u) { 495 // Default-construct the elements of the array. 496 // NOTE: this is a noexcept operation, no need to account for rolling back. 497 for (size_type i = 0u; i < size; ++i) { 498 allocator().construct(&new_ptr[i]); 499 } 500 } else { 501 // Sync variables. 502 using crs_type = std::vector<std::pair<size_type, size_type>>; 503 crs_type constructed_ranges(static_cast<typename crs_type::size_type>(n_threads), 504 std::make_pair(size_type(0u), size_type(0u))); 505 if (unlikely(constructed_ranges.size() != n_threads)) { 506 piranha_throw(std::bad_alloc, ); 507 } 508 // Thread function. 509 auto thread_function = [this, new_ptr, &constructed_ranges](const size_type &start, const size_type &end, 510 const unsigned &thread_idx) { 511 for (size_type i = start; i != end; ++i) { 512 this->allocator().construct(&new_ptr[i]); 513 } 514 constructed_ranges[thread_idx] = std::make_pair(start, end); 515 }; 516 // Work per thread. 517 const auto wpt = size / n_threads; 518 future_list<decltype(thread_function(0u, 0u, 0u))> f_list; 519 try { 520 for (unsigned i = 0u; i < n_threads; ++i) { 521 const auto start = static_cast<size_type>(wpt * i), 522 end = static_cast<size_type>((i == n_threads - 1u) ? size : wpt * (i + 1u)); 523 f_list.push_back(thread_pool::enqueue(i, thread_function, start, end, i)); 524 } 525 f_list.wait_all(); 526 // NOTE: no need to get_all() here, as we know no exceptions will be generated inside thread_func. 527 } catch (...) { 528 // NOTE: everything in thread_func is noexcept, if we are here the exception was thrown by enqueue or 529 // push_back. 530 // Wait for everything to wind down. 531 f_list.wait_all(); 532 // Destroy what was constructed. 533 for (const auto &r : constructed_ranges) { 534 for (size_type i = r.first; i != r.second; ++i) { 535 allocator().destroy(&new_ptr[i]); 536 } 537 } 538 // Deallocate before re-throwing. 539 allocator().deallocate(new_ptr, size); 540 throw; 541 } 542 } 543 // Assign the members. 544 ptr() = new_ptr; 545 m_log2_size = log2_size; 546 } 547 // Destroy all elements and deallocate ptr(). destroy_and_deallocate()548 void destroy_and_deallocate() 549 { 550 // Proceed to destroy all elements and deallocate only if the set is actually storing something. 551 if (ptr()) { 552 const size_type size = size_type(1u) << m_log2_size; 553 for (size_type i = 0u; i < size; ++i) { 554 allocator().destroy(&ptr()[i]); 555 } 556 allocator().deallocate(ptr(), size); 557 } else { 558 piranha_assert(!m_log2_size && !m_n_elements); 559 } 560 } 561 // Serialization support. 562 friend class boost::serialization::access; 563 template <class Archive> save(Archive & ar,unsigned) const564 void save(Archive &ar, unsigned) const 565 { 566 // Size. 567 boost_save(ar, size()); 568 // Serialize the items. 569 boost_save_range(ar, begin(), end()); 570 } 571 template <class Archive> load(Archive & ar,unsigned)572 void load(Archive &ar, unsigned) 573 { 574 // Reset this. 575 *this = hash_set{}; 576 // Recover the size. 577 size_type size; 578 boost_load(ar, size); 579 // Prepare an adequate number of buckets. 580 rehash(boost::numeric_cast<size_type>(std::ceil(static_cast<double>(size) / max_load_factor()))); 581 for (size_type i = 0; i < size; ++i) { 582 T tmp; 583 boost_load(ar, tmp); 584 const auto p = insert(std::move(tmp)); 585 if (unlikely(!p.second)) { 586 piranha_throw(std::invalid_argument, "while deserializing a hash_set from a Boost archive " 587 "a duplicate value was encountered"); 588 } 589 } 590 } 591 BOOST_SERIALIZATION_SPLIT_MEMBER() 592 // Enabler for insert(). 593 template <typename U> 594 using insert_enabler = enable_if_t<std::is_same<key_type, uncvref_t<U>>::value, int>; 595 // Run a consistency check on the set, will return false if something is wrong. sanity_check() const596 bool sanity_check() const 597 { 598 // Ignore sanity checks on shutdown. 599 if (shutdown()) { 600 return true; 601 } 602 size_type count = 0u; 603 for (size_type i = 0u; i < bucket_count(); ++i) { 604 for (auto it = ptr()[i].begin(); it != ptr()[i].end(); ++it) { 605 if (_bucket(*it) != i) { 606 return false; 607 } 608 ++count; 609 } 610 } 611 if (count != m_n_elements) { 612 return false; 613 } 614 // m_log2_size must not be equal to or greater than the number of bits of size_type. 615 if (m_log2_size >= unsigned(std::numeric_limits<size_type>::digits)) { 616 return false; 617 } 618 // The container pointer must be consistent with the other members. 619 if (!ptr() && (m_log2_size || m_n_elements)) { 620 return false; 621 } 622 // Check size is consistent with number of iterator traversals. 623 count = 0u; 624 for (auto it = begin(); it != end(); ++it, ++count) { 625 } 626 if (count != m_n_elements) { 627 return false; 628 } 629 return true; 630 } 631 // The number of available nonzero sizes will be the number of bits in the size type. Possible nonzero sizes will be 632 // in 633 // the [2 ** 0, 2 ** (n-1)] range. 634 static const size_type m_n_nonzero_sizes = static_cast<size_type>(std::numeric_limits<size_type>::digits); 635 // Get log2 of set size at least equal to hint. To be used only when hint is not zero. get_log2_from_hint(const size_type & hint)636 static size_type get_log2_from_hint(const size_type &hint) 637 { 638 piranha_assert(hint); 639 for (size_type i = 0u; i < m_n_nonzero_sizes; ++i) { 640 if ((size_type(1u) << i) >= hint) { 641 return i; 642 } 643 } 644 piranha_throw(std::bad_alloc, ); 645 } 646 647 public: 648 /// Iterator type. 649 /** 650 * A read-only forward iterator. 651 */ 652 using iterator = iterator_impl<key_type const>; 653 654 private: 655 // Static checks on the iterator type. 656 PIRANHA_TT_CHECK(is_forward_iterator, iterator); 657 658 public: 659 /// Const iterator type. 660 /** 661 * Equivalent to the iterator type. 662 */ 663 using const_iterator = iterator; 664 /// Local iterator. 665 /** 666 * Const iterator that can be used to iterate through a single bucket. 667 */ 668 using local_iterator = typename list::const_iterator; 669 /// Default constructor. 670 /** 671 * If not specified, it will default-initialise the hasher and the equality predicate. The resulting 672 * hash set will be empty. 673 * 674 * @param h hasher functor. 675 * @param k equality predicate. 676 * 677 * @throws unspecified any exception thrown by the copy constructors of <tt>Hash</tt> or <tt>Pred</tt>. 678 */ hash_set(const hasher & h=hasher{},const key_equal & k=key_equal{})679 hash_set(const hasher &h = hasher{}, const key_equal &k = key_equal{}) 680 : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u) 681 { 682 } 683 /// Constructor from number of buckets. 684 /** 685 * Will construct a set whose number of buckets is at least equal to \p n_buckets. If \p n_threads is not 1, 686 * then the first \p n_threads threads from piranha::thread_pool will be used concurrently for the initialisation 687 * of the set. 688 * 689 * @param n_buckets desired number of buckets. 690 * @param h hasher functor. 691 * @param k equality predicate. 692 * @param n_threads number of threads to use during initialisation. 693 * 694 * @throws std::bad_alloc if the desired number of buckets is greater than an implementation-defined maximum, or in 695 * case 696 * of memory errors. 697 * @throws std::invalid_argument if \p n_threads is zero. 698 * @throws unspecified any exception thrown by: 699 * - the copy constructors of <tt>Hash</tt> or <tt>Pred</tt>, 700 * - piranha::thread_pool::enqueue() or piranha::future_list::push_back(), if \p n_threads is not 1. 701 */ hash_set(const size_type & n_buckets,const hasher & h=hasher{},const key_equal & k=key_equal{},unsigned n_threads=1u)702 explicit hash_set(const size_type &n_buckets, const hasher &h = hasher{}, const key_equal &k = key_equal{}, 703 unsigned n_threads = 1u) 704 : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u) 705 { 706 init_from_n_buckets(n_buckets, n_threads); 707 } 708 /// Copy constructor. 709 /** 710 * The hasher, the equality comparator and the allocator will also be copied. 711 * 712 * @param other piranha::hash_set that will be copied into \p this. 713 * 714 * @throws unspecified any exception thrown by memory allocation errors, 715 * the copy constructor of the stored type, <tt>Hash</tt> or <tt>Pred</tt>. 716 */ hash_set(const hash_set & other)717 hash_set(const hash_set &other) 718 : m_pack(nullptr, other.hash(), other.k_equal(), other.allocator()), m_log2_size(0u), m_n_elements(0u) 719 { 720 // Proceed to actual copy only if other has some content. 721 if (other.ptr()) { 722 const size_type size = size_type(1u) << other.m_log2_size; 723 auto new_ptr = allocator().allocate(size); 724 if (unlikely(!new_ptr)) { 725 piranha_throw(std::bad_alloc, ); 726 } 727 size_type i = 0u; 728 try { 729 // Copy-construct the elements of the array. 730 for (; i < size; ++i) { 731 allocator().construct(&new_ptr[i], other.ptr()[i]); 732 } 733 } catch (...) { 734 // Unwind the construction and deallocate, before re-throwing. 735 for (size_type j = 0u; j < i; ++j) { 736 allocator().destroy(&new_ptr[j]); 737 } 738 allocator().deallocate(new_ptr, size); 739 throw; 740 } 741 // Assign the members. 742 ptr() = new_ptr; 743 m_log2_size = other.m_log2_size; 744 m_n_elements = other.m_n_elements; 745 } else { 746 piranha_assert(!other.m_log2_size && !other.m_n_elements); 747 } 748 } 749 /// Move constructor. 750 /** 751 * After the move, \p other will have zero buckets and zero elements, and its hasher and equality predicate 752 * will have been used to move-construct their counterparts in \p this. 753 * 754 * @param other set to be moved. 755 */ hash_set(hash_set && other)756 hash_set(hash_set &&other) noexcept 757 : m_pack(std::move(other.m_pack)), m_log2_size(other.m_log2_size), m_n_elements(other.m_n_elements) 758 { 759 // Clear out the other one. 760 other.ptr() = nullptr; 761 other.m_log2_size = 0u; 762 other.m_n_elements = 0u; 763 } 764 /// Constructor from range. 765 /** 766 * Create a set with a copy of a range. 767 * 768 * @param begin begin of range. 769 * @param end end of range. 770 * @param n_buckets number of initial buckets. 771 * @param h hash functor. 772 * @param k key equality predicate. 773 * 774 * @throws std::bad_alloc if the desired number of buckets is greater than an implementation-defined maximum. 775 * @throws unspecified any exception thrown by the copy constructors of <tt>Hash</tt> or <tt>Pred</tt>, or arising 776 * from 777 * calling insert() on the elements of the range. 778 */ 779 template <typename InputIterator> hash_set(const InputIterator & begin,const InputIterator & end,const size_type & n_buckets=0u,const hasher & h=hasher{},const key_equal & k=key_equal{})780 explicit hash_set(const InputIterator &begin, const InputIterator &end, const size_type &n_buckets = 0u, 781 const hasher &h = hasher{}, const key_equal &k = key_equal{}) 782 : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u) 783 { 784 init_from_n_buckets(n_buckets, 1u); 785 for (auto it = begin; it != end; ++it) { 786 insert(*it); 787 } 788 } 789 /// Constructor from initializer list. 790 /** 791 * Will insert() all the elements of the initializer list, ignoring the return value of the operation. 792 * Hash functor and equality predicate will be default-constructed. 793 * 794 * @param list initializer list of elements to be inserted. 795 * 796 * @throws std::bad_alloc if the desired number of buckets is greater than an implementation-defined maximum. 797 * @throws unspecified any exception thrown by either insert() or of the default constructor of <tt>Hash</tt> or 798 * <tt>Pred</tt>. 799 */ 800 template <typename U> hash_set(std::initializer_list<U> list)801 explicit hash_set(std::initializer_list<U> list) 802 : m_pack(nullptr, hasher{}, key_equal{}, allocator_type{}), m_log2_size(0u), m_n_elements(0u) 803 { 804 // We do not care here for possible truncation of list.size(), as this is only an optimization. 805 init_from_n_buckets(static_cast<size_type>(list.size()), 1u); 806 for (const auto &x : list) { 807 insert(x); 808 } 809 } 810 /// Destructor. 811 /** 812 * No side effects. 813 */ ~hash_set()814 ~hash_set() 815 { 816 piranha_assert(sanity_check()); 817 destroy_and_deallocate(); 818 } 819 /// Copy assignment operator. 820 /** 821 * @param other assignment argument. 822 * 823 * @return reference to \p this. 824 * 825 * @throws unspecified any exception thrown by the copy constructor. 826 */ operator =(const hash_set & other)827 hash_set &operator=(const hash_set &other) 828 { 829 if (likely(this != &other)) { 830 hash_set tmp(other); 831 *this = std::move(tmp); 832 } 833 return *this; 834 } 835 /// Move assignment operator. 836 /** 837 * @param other set to be moved into \p this. 838 * 839 * @return reference to \p this. 840 */ operator =(hash_set && other)841 hash_set &operator=(hash_set &&other) noexcept 842 { 843 if (likely(this != &other)) { 844 destroy_and_deallocate(); 845 m_pack = std::move(other.m_pack); 846 m_log2_size = other.m_log2_size; 847 m_n_elements = other.m_n_elements; 848 // Zero out other. 849 other.ptr() = nullptr; 850 other.m_log2_size = 0u; 851 other.m_n_elements = 0u; 852 } 853 return *this; 854 } 855 /// Const begin iterator. 856 /** 857 * @return hash_set::const_iterator to the first element of the set, or end() if the set is empty. 858 */ begin() const859 const_iterator begin() const 860 { 861 // NOTE: this could take a while in case of an empty set with lots of buckets. Take a shortcut 862 // taking into account the number of elements in the set - if zero, go directly to end()? 863 const_iterator retval; 864 retval.m_set = this; 865 size_type idx = 0u; 866 const auto b_count = bucket_count(); 867 for (; idx < b_count; ++idx) { 868 if (!ptr()[idx].empty()) { 869 break; 870 } 871 } 872 retval.m_idx = idx; 873 // If we are not at the end, assign proper iterator. 874 if (idx != b_count) { 875 retval.m_it = ptr()[idx].begin(); 876 } 877 return retval; 878 } 879 /// Const end iterator. 880 /** 881 * @return hash_set::const_iterator to the position past the last element of the set. 882 */ end() const883 const_iterator end() const 884 { 885 return const_iterator(this, bucket_count(), local_iterator{}); 886 } 887 /// Begin iterator. 888 /** 889 * @return hash_set::iterator to the first element of the set, or end() if the set is empty. 890 */ begin()891 iterator begin() 892 { 893 return static_cast<hash_set const *>(this)->begin(); 894 } 895 /// End iterator. 896 /** 897 * @return hash_set::iterator to the position past the last element of the set. 898 */ end()899 iterator end() 900 { 901 return static_cast<hash_set const *>(this)->end(); 902 } 903 /// Number of elements contained in the set. 904 /** 905 * @return number of elements in the set. 906 */ size() const907 size_type size() const 908 { 909 return m_n_elements; 910 } 911 /// Test for empty set. 912 /** 913 * @return \p true if size() returns 0, \p false otherwise. 914 */ empty() const915 bool empty() const 916 { 917 return !size(); 918 } 919 /// Number of buckets. 920 /** 921 * @return number of buckets in the set. 922 */ bucket_count() const923 size_type bucket_count() const 924 { 925 return (ptr()) ? (size_type(1u) << m_log2_size) : size_type(0u); 926 } 927 /// Load factor. 928 /** 929 * @return <tt>(double)size() / bucket_count()</tt>, or 0 if the set is empty. 930 */ load_factor() const931 double load_factor() const 932 { 933 const auto b_count = bucket_count(); 934 return (b_count) ? static_cast<double>(size()) / static_cast<double>(b_count) : 0.; 935 } 936 /// Index of destination bucket. 937 /** 938 * Index to which \p k would belong, were it to be inserted into the set. The index of the 939 * destination bucket is the hash value reduced modulo the bucket count. 940 * 941 * @param k input argument. 942 * 943 * @return index of the destination bucket for \p k. 944 * 945 * @throws piranha::zero_division_error if bucket_count() returns zero. 946 * @throws unspecified any exception thrown by _bucket(). 947 */ bucket(const key_type & k) const948 size_type bucket(const key_type &k) const 949 { 950 if (unlikely(!bucket_count())) { 951 piranha_throw(zero_division_error, "cannot calculate bucket index in an empty set"); 952 } 953 return _bucket(k); 954 } 955 /// Find element. 956 /** 957 * @param k element to be located. 958 * 959 * @return hash_set::const_iterator to <tt>k</tt>'s position in the set, or end() if \p k is not in the set. 960 * 961 * @throws unspecified any exception thrown by _find() or by _bucket(). 962 */ find(const key_type & k) const963 const_iterator find(const key_type &k) const 964 { 965 if (unlikely(!bucket_count())) { 966 return end(); 967 } 968 return _find(k, _bucket(k)); 969 } 970 /// Find element. 971 /** 972 * @param k element to be located. 973 * 974 * @return hash_set::iterator to <tt>k</tt>'s position in the set, or end() if \p k is not in the set. 975 * 976 * @throws unspecified any exception thrown by _find(). 977 */ find(const key_type & k)978 iterator find(const key_type &k) 979 { 980 return static_cast<const hash_set *>(this)->find(k); 981 } 982 /// Maximum load factor. 983 /** 984 * @return the maximum load factor allowed before a resize. 985 */ max_load_factor() const986 double max_load_factor() const 987 { 988 // Maximum load factor hard-coded to 1. 989 // NOTE: if this is ever made configurable, it should never be allowed to go to zero. 990 return 1.; 991 } 992 /// Insert element. 993 /** 994 * \note 995 * This template method is activated only if \p T and \p U are the same type, aside from cv qualifications and 996 * references. 997 * 998 * If no other key equivalent to \p k exists in the set, the insertion is successful and returns the 999 * <tt>(it,true)</tt> 1000 * pair - where \p it is the position in the set into which the object has been inserted. Otherwise, the return 1001 * value 1002 * will be <tt>(it,false)</tt> - where \p it is the position of the existing equivalent object. 1003 * 1004 * @param k object that will be inserted into the set. 1005 * 1006 * @return <tt>(hash_set::iterator,bool)</tt> pair containing an iterator to the newly-inserted object (or its 1007 * existing 1008 * equivalent) and the result of the operation. 1009 * 1010 * @throws unspecified any exception thrown by: 1011 * - hash_set::key_type's copy constructor, 1012 * - _find(), 1013 * - _bucket(). 1014 * @throws std::overflow_error if a successful insertion would result in size() exceeding the maximum 1015 * value representable by type piranha::hash_set::size_type. 1016 * @throws std::bad_alloc if the operation results in a resize of the set past an implementation-defined 1017 * maximum number of buckets. 1018 */ 1019 template <typename U, insert_enabler<U> = 0> insert(U && k)1020 std::pair<iterator, bool> insert(U &&k) 1021 { 1022 auto b_count = bucket_count(); 1023 // Handle the case of a set with no buckets. 1024 if (unlikely(!b_count)) { 1025 _increase_size(); 1026 // Update the bucket count. 1027 b_count = 1u; 1028 } 1029 // Try to locate the element. 1030 auto bucket_idx = _bucket(k); 1031 const auto it = _find(k, bucket_idx); 1032 if (it != end()) { 1033 // Item already present, exit. 1034 return std::make_pair(it, false); 1035 } 1036 if (unlikely(m_n_elements == std::numeric_limits<size_type>::max())) { 1037 piranha_throw(std::overflow_error, "maximum number of elements reached"); 1038 } 1039 // Item is new. Handle the case in which we need to rehash because of load factor. 1040 if (unlikely(static_cast<double>(m_n_elements + size_type(1u)) / static_cast<double>(b_count) 1041 > max_load_factor())) { 1042 _increase_size(); 1043 // We need a new bucket index in case of a rehash. 1044 bucket_idx = _bucket(k); 1045 } 1046 const auto it_retval = _unique_insert(std::forward<U>(k), bucket_idx); 1047 ++m_n_elements; 1048 return std::make_pair(it_retval, true); 1049 } 1050 /// Erase element. 1051 /** 1052 * Erase the element to which \p it points. \p it must be a valid iterator 1053 * pointing to an element of the set. 1054 * 1055 * Erasing an element invalidates all iterators pointing to elements in the same bucket 1056 * as the erased element. 1057 * 1058 * After the operation has taken place, the size() of the set will be decreased by one. 1059 * 1060 * @param it iterator to the element of the set to be removed. 1061 * 1062 * @return iterator pointing to the element following \p it prior to the element being erased, or end() if 1063 * no such element exists. 1064 */ erase(const_iterator it)1065 iterator erase(const_iterator it) 1066 { 1067 piranha_assert(!empty()); 1068 const auto b_it = _erase(it); 1069 iterator retval; 1070 retval.m_set = this; 1071 const auto b_count = bucket_count(); 1072 if (b_it == ptr()[it.m_idx].end()) { 1073 // Travel to the next iterator if the deleted element was 1074 // the last one in the bucket. 1075 auto idx = static_cast<size_type>(it.m_idx + 1u); 1076 // Advance to the first non-empty bucket if necessary, 1077 // without going past the end of the set. 1078 for (; idx < b_count; ++idx) { 1079 if (!ptr()[idx].empty()) { 1080 break; 1081 } 1082 } 1083 retval.m_idx = idx; 1084 // If we are not at the end, assign proper iterator. 1085 if (idx != b_count) { 1086 retval.m_it = ptr()[idx].begin(); 1087 } 1088 // NOTE: in case we reached the end of the container, the end() iterator should be: 1089 // {this,bucket_count,local_iterator{}} 1090 // this has been set above already, bucket_count is set by retval.m_idx = idx 1091 // and the default local_iterator ctor is called by the def ctor of iterator. 1092 } else { 1093 // Otherwise, just copy over the iterator returned by _erase(). 1094 retval.m_idx = it.m_idx; 1095 retval.m_it = b_it; 1096 } 1097 piranha_assert(m_n_elements); 1098 // Update the number of elements. 1099 m_n_elements = static_cast<size_type>(m_n_elements - 1u); 1100 return retval; 1101 } 1102 /// Remove all elements. 1103 /** 1104 * After this call, size() and bucket_count() will both return zero. 1105 */ clear()1106 void clear() 1107 { 1108 destroy_and_deallocate(); 1109 // Reset the members. 1110 ptr() = nullptr; 1111 m_log2_size = 0u; 1112 m_n_elements = 0u; 1113 } 1114 /// Swap content. 1115 /** 1116 * Will use \p std::swap to swap hasher and equality predicate. 1117 * 1118 * @param other swap argument. 1119 * 1120 * @throws unspecified any exception thrown by swapping hasher or equality predicate via \p std::swap. 1121 */ swap(hash_set & other)1122 void swap(hash_set &other) 1123 { 1124 std::swap(m_pack, other.m_pack); 1125 std::swap(m_log2_size, other.m_log2_size); 1126 std::swap(m_n_elements, other.m_n_elements); 1127 } 1128 /// Rehash set. 1129 /** 1130 * Change the number of buckets in the set to at least \p new_size. No rehash is performed 1131 * if rehashing would lead to exceeding the maximum load factor. If \p n_threads is not 1, 1132 * then the first \p n_threads threads from piranha::thread_pool will be used concurrently during 1133 * the rehash operation. 1134 * 1135 * @param new_size new desired number of buckets. 1136 * @param n_threads number of threads to use. 1137 * 1138 * @throws std::invalid_argument if \p n_threads is zero. 1139 * @throws unspecified any exception thrown by the constructor from number of buckets, 1140 * _unique_insert() or _bucket(). 1141 */ rehash(const size_type & new_size,unsigned n_threads=1u)1142 void rehash(const size_type &new_size, unsigned n_threads = 1u) 1143 { 1144 if (unlikely(!n_threads)) { 1145 piranha_throw(std::invalid_argument, "the number of threads must be strictly positive"); 1146 } 1147 // If rehash is requested to zero, do something only if there are no items stored in the set. 1148 if (!new_size) { 1149 if (!size()) { 1150 clear(); 1151 } 1152 return; 1153 } 1154 // Do nothing if rehashing to the new size would lead to exceeding the max load factor. 1155 if (static_cast<double>(size()) / static_cast<double>(new_size) > max_load_factor()) { 1156 return; 1157 } 1158 // Create a new set with needed amount of buckets. 1159 hash_set new_set(new_size, hash(), k_equal(), n_threads); 1160 try { 1161 const auto it_f = _m_end(); 1162 for (auto it = _m_begin(); it != it_f; ++it) { 1163 const auto new_idx = new_set._bucket(*it); 1164 new_set._unique_insert(std::move(*it), new_idx); 1165 } 1166 } catch (...) { 1167 // Clear up both this and the new set upon any kind of error. 1168 clear(); 1169 new_set.clear(); 1170 throw; 1171 } 1172 // Retain the number of elements. 1173 new_set.m_n_elements = m_n_elements; 1174 // Clear the old set. 1175 clear(); 1176 // Assign the new set. 1177 *this = std::move(new_set); 1178 } 1179 /// Get information on the sparsity of the set. 1180 /** 1181 * @return an <tt>std::map<size_type,size_type></tt> in which the key is the number of elements 1182 * stored in a bucket and the mapped type the number of buckets containing those many elements. 1183 * 1184 * @throws unspecified any exception thrown by memory errors in standard containers. 1185 */ evaluate_sparsity() const1186 std::map<size_type, size_type> evaluate_sparsity() const 1187 { 1188 const auto it_f = ptr() + bucket_count(); 1189 std::map<size_type, size_type> retval; 1190 size_type counter; 1191 for (auto it = ptr(); it != it_f; ++it) { 1192 counter = 0u; 1193 for (auto l_it = it->begin(); l_it != it->end(); ++l_it) { 1194 ++counter; 1195 } 1196 ++retval[counter]; 1197 } 1198 return retval; 1199 } 1200 /** @name Low-level interface 1201 * Low-level methods and types. 1202 */ 1203 //@{ 1204 /// Mutable iterator. 1205 /** 1206 * This iterator type provides non-const access to the elements of the set. Please note that modifications 1207 * to an existing element of the set might invalidate the relation between the element and its position in the set. 1208 * After such modifications of one or more elements, the only valid operation is hash_set::clear() (destruction of 1209 * the 1210 * set before calling hash_set::clear() will lead to assertion failures in debug mode). 1211 */ 1212 using _m_iterator = iterator_impl<key_type>; 1213 /// Mutable begin iterator. 1214 /** 1215 * @return hash_set::_m_iterator to the beginning of the set. 1216 */ _m_begin()1217 _m_iterator _m_begin() 1218 { 1219 // NOTE: this could take a while in case of an empty set with lots of buckets. Take a shortcut 1220 // taking into account the number of elements in the set - if zero, go directly to end()? 1221 const auto b_count = bucket_count(); 1222 _m_iterator retval; 1223 retval.m_set = this; 1224 size_type idx = 0u; 1225 for (; idx < b_count; ++idx) { 1226 if (!ptr()[idx].empty()) { 1227 break; 1228 } 1229 } 1230 retval.m_idx = idx; 1231 // If we are not at the end, assign proper iterator. 1232 if (idx != b_count) { 1233 retval.m_it = ptr()[idx].begin(); 1234 } 1235 return retval; 1236 } 1237 /// Mutable end iterator. 1238 /** 1239 * @return hash_set::_m_iterator to the end of the set. 1240 */ _m_end()1241 _m_iterator _m_end() 1242 { 1243 return _m_iterator(this, bucket_count(), typename list::iterator{}); 1244 } 1245 /// Insert unique element (low-level). 1246 /** 1247 * \note 1248 * This template method is activated only if \p T and \p U are the same type, aside from cv qualifications and 1249 * references. 1250 * 1251 * The parameter \p bucket_idx is the index of the destination bucket for \p k and, for a 1252 * set with a nonzero number of buckets, must be equal to the output 1253 * of bucket() before the insertion. 1254 * 1255 * This method will not check if a key equivalent to \p k already exists in the set, it will not 1256 * update the number of elements present in the set after the insertion, it will not resize 1257 * the set in case the maximum load factor is exceeded, nor it will check 1258 * if the value of \p bucket_idx is correct. 1259 * 1260 * @param k object that will be inserted into the set. 1261 * @param bucket_idx destination bucket for \p k. 1262 * 1263 * @return iterator pointing to the newly-inserted element. 1264 * 1265 * @throws unspecified any exception thrown by the copy constructor of hash_set::key_type or by memory allocation 1266 * errors. 1267 */ 1268 template <typename U, insert_enabler<U> = 0> _unique_insert(U && k,const size_type & bucket_idx)1269 iterator _unique_insert(U &&k, const size_type &bucket_idx) 1270 { 1271 // Assert that key is not present already in the set. 1272 piranha_assert(find(std::forward<U>(k)) == end()); 1273 // Assert bucket index is correct. 1274 piranha_assert(bucket_idx == _bucket(k)); 1275 auto p = ptr()[bucket_idx].insert(std::forward<U>(k)); 1276 return iterator(this, bucket_idx, local_iterator(p)); 1277 } 1278 /// Find element (low-level). 1279 /** 1280 * Locate element in the set. The parameter \p bucket_idx is the index of the destination bucket for \p k and, for 1281 * a set with a nonzero number of buckets, must be equal to the output 1282 * of bucket() before the insertion. This method will not check if the value of \p bucket_idx is correct. 1283 * 1284 * @param k element to be located. 1285 * @param bucket_idx index of the destination bucket for \p k. 1286 * 1287 * @return hash_set::iterator to <tt>k</tt>'s position in the set, or end() if \p k is not in the set. 1288 * 1289 * @throws unspecified any exception thrown by calling the equality predicate. 1290 */ _find(const key_type & k,const size_type & bucket_idx) const1291 const_iterator _find(const key_type &k, const size_type &bucket_idx) const 1292 { 1293 // Assert bucket index is correct. 1294 piranha_assert(bucket_idx == _bucket(k) && bucket_idx < bucket_count()); 1295 const auto &b = ptr()[bucket_idx]; 1296 const auto it_f = b.end(); 1297 const_iterator retval(end()); 1298 for (auto it = b.begin(); it != it_f; ++it) { 1299 if (k_equal()(*it, k)) { 1300 retval.m_idx = bucket_idx; 1301 retval.m_it = it; 1302 break; 1303 } 1304 } 1305 return retval; 1306 } 1307 /// Index of destination bucket from hash value. 1308 /** 1309 * Note that this method will not check if the number of buckets is zero. 1310 * 1311 * @param hash input hash value. 1312 * 1313 * @return index of the destination bucket for an object with hash value \p hash. 1314 */ _bucket_from_hash(const std::size_t & hash) const1315 size_type _bucket_from_hash(const std::size_t &hash) const 1316 { 1317 piranha_assert(bucket_count()); 1318 return hash % (size_type(1u) << m_log2_size); 1319 } 1320 /// Index of destination bucket (low-level). 1321 /** 1322 * Equivalent to bucket(), with the exception that this method will not check 1323 * if the number of buckets is zero. 1324 * 1325 * @param k input argument. 1326 * 1327 * @return index of the destination bucket for \p k. 1328 * 1329 * @throws unspecified any exception thrown by the call operator of the hasher. 1330 */ _bucket(const key_type & k) const1331 size_type _bucket(const key_type &k) const 1332 { 1333 return _bucket_from_hash(hash()(k)); 1334 } 1335 /// Force update of the number of elements. 1336 /** 1337 * After this call, size() will return \p new_size regardless of the true number of elements in the set. 1338 * 1339 * @param new_size new set size. 1340 */ _update_size(const size_type & new_size)1341 void _update_size(const size_type &new_size) 1342 { 1343 m_n_elements = new_size; 1344 } 1345 /// Increase bucket count. 1346 /** 1347 * Increase the number of buckets to the next implementation-defined value. 1348 * 1349 * @throws std::bad_alloc if the operation results in a resize of the set past an implementation-defined 1350 * maximum number of buckets. 1351 * @throws unspecified any exception thrown by rehash(). 1352 */ _increase_size()1353 void _increase_size() 1354 { 1355 if (unlikely(m_log2_size >= m_n_nonzero_sizes - 1u)) { 1356 piranha_throw(std::bad_alloc, ); 1357 } 1358 // We must take care here: if the set has zero buckets, 1359 // the next log2_size is 0u. Otherwise increase current log2_size. 1360 piranha_assert(ptr() || (!ptr() && !m_log2_size)); 1361 const auto new_log2_size = (ptr()) ? (m_log2_size + 1u) : 0u; 1362 // Rehash to the new size. 1363 rehash(size_type(1u) << new_log2_size); 1364 } 1365 /// Const reference to list in bucket. 1366 /** 1367 * @param idx index of the bucket whose list will be returned. 1368 * 1369 * @return a const reference to the list of items contained in the bucket positioned 1370 * at index \p idx. 1371 */ _get_bucket_list(const size_type & idx) const1372 const list &_get_bucket_list(const size_type &idx) const 1373 { 1374 piranha_assert(idx < bucket_count()); 1375 return ptr()[idx]; 1376 } 1377 /// Erase element. 1378 /** 1379 * Erase the element to which \p it points. \p it must be a valid iterator 1380 * pointing to an element of the set. 1381 * 1382 * Erasing an element invalidates all iterators pointing to elements in the same bucket 1383 * as the erased element. 1384 * 1385 * This method will not update the number of elements in the set, nor it will try to access elements 1386 * outside the bucket to which \p it refers. 1387 * 1388 * @param it iterator to the element of the set to be removed. 1389 * 1390 * @return local iterator pointing to the element following \p it prior to the element being erased, or local end() 1391 * if 1392 * no such element exists. 1393 */ _erase(const_iterator it)1394 local_iterator _erase(const_iterator it) 1395 { 1396 // Verify the iterator is valid. 1397 piranha_assert(it.m_set == this); 1398 piranha_assert(it.m_idx < bucket_count()); 1399 piranha_assert(!ptr()[it.m_idx].empty()); 1400 piranha_assert(it.m_it != ptr()[it.m_idx].end()); 1401 auto &bucket = ptr()[it.m_idx]; 1402 // If the pointed-to element is the first one in the bucket, we need special care. 1403 if (&*it == &*bucket.m_node.ptr()) { 1404 // Destroy the payload. 1405 bucket.m_node.ptr()->~T(); 1406 if (bucket.m_node.m_next == &bucket.terminator) { 1407 // Special handling if this was the only element. 1408 bucket.m_node.m_next = nullptr; 1409 return bucket.end(); 1410 } else { 1411 // Store the link in the second element (this could be the terminator). 1412 auto tmp = bucket.m_node.m_next->m_next; 1413 // Move-construct from the second element, and then destroy it. 1414 ::new (static_cast<void *>(&bucket.m_node.m_storage)) T(std::move(*bucket.m_node.m_next->ptr())); 1415 bucket.m_node.m_next->ptr()->~T(); 1416 ::delete bucket.m_node.m_next; 1417 // Establish the new link. 1418 bucket.m_node.m_next = tmp; 1419 return bucket.begin(); 1420 } 1421 } else { 1422 auto b_it = bucket.begin(); 1423 auto prev_b_it = b_it; 1424 const auto b_it_f = bucket.end(); 1425 ++b_it; 1426 for (; b_it != b_it_f; ++b_it, ++prev_b_it) { 1427 if (&*b_it == &*it) { 1428 // Assign to the previous element the next link of the current one. 1429 prev_b_it.m_ptr->m_next = b_it.m_ptr->m_next; 1430 // Delete the current one. 1431 b_it.m_ptr->ptr()->~T(); 1432 ::delete b_it.m_ptr; 1433 break; 1434 }; 1435 } 1436 // We never want to go through the whole list, it means the element 1437 // to which 'it' refers is not here: assert that the iterator we just 1438 // erased was not end() - i.e., it was pointing to something. 1439 piranha_assert(b_it.m_ptr); 1440 // Move forward the iterator that originally preceded the erased item. 1441 // It will now point to the item past the erased one or to the local end(). 1442 return ++prev_b_it; 1443 } 1444 } 1445 //@} 1446 private: 1447 pack_type m_pack; 1448 size_type m_log2_size; 1449 size_type m_n_elements; 1450 }; 1451 1452 template <typename T, typename Hash, typename Pred> 1453 typename hash_set<T, Hash, Pred>::node hash_set<T, Hash, Pred>::list::terminator; 1454 1455 template <typename T, typename Hash, typename Pred> 1456 const typename hash_set<T, Hash, Pred>::size_type hash_set<T, Hash, Pred>::m_n_nonzero_sizes; 1457 1458 inline namespace impl 1459 { 1460 1461 // Enablers for boost s11n. 1462 template <typename Archive, typename T, typename Hash, typename Pred> 1463 using hash_set_boost_save_enabler 1464 = enable_if_t<conjunction<has_boost_save<Archive, T>, 1465 has_boost_save<Archive, typename hash_set<T, Hash, Pred>::size_type>>::value>; 1466 1467 template <typename Archive, typename T, typename Hash, typename Pred> 1468 using hash_set_boost_load_enabler 1469 = enable_if_t<conjunction<has_boost_load<Archive, T>, 1470 has_boost_load<Archive, typename hash_set<T, Hash, Pred>::size_type>>::value>; 1471 } 1472 1473 /// Specialisation of piranha::boost_save() for piranha::hash_set. 1474 /** 1475 * \note 1476 * This specialisation is enabled only if \p T and the size type of piranha::hash_set satisfy 1477 * piranha::has_boost_save. 1478 * 1479 * The hashing functor and the equality predicate are not serialized. 1480 * 1481 * @throws unspecified any exception thrown by piranha::boost_save(). 1482 */ 1483 template <typename Archive, typename T, typename Hash, typename Pred> 1484 struct boost_save_impl<Archive, hash_set<T, Hash, Pred>, hash_set_boost_save_enabler<Archive, T, Hash, Pred>> 1485 : boost_save_via_boost_api<Archive, hash_set<T, Hash, Pred>> { 1486 }; 1487 1488 /// Specialisation of piranha::boost_load() for piranha::hash_set. 1489 /** 1490 * \note 1491 * This specialisation is enabled only if \p T and the size type of piranha::hash_set satisfy 1492 * piranha::has_boost_load. 1493 * 1494 * In case duplicate elements are encountered during deserialization, an exception will be raised. Before performing 1495 * the deserialization, the output piranha::hash_set is reset with a default-constructed instance of piranha::hash_set. 1496 * The hashing functor and the equality predicate are not deserialized. The basic exception safety guarantee is 1497 * provided. 1498 * 1499 * @throws std::invalid_argument if a duplicate element is encountered during deserialization. 1500 * @throws unspecified any exception thrown by: 1501 * - the public interface of piranha::hash_set, 1502 * - piranha::boost_load(), 1503 * - <tt>boost::numeric_cast()</tt>. 1504 */ 1505 template <typename Archive, typename T, typename Hash, typename Pred> 1506 struct boost_load_impl<Archive, hash_set<T, Hash, Pred>, hash_set_boost_load_enabler<Archive, T, Hash, Pred>> 1507 : boost_load_via_boost_api<Archive, hash_set<T, Hash, Pred>> { 1508 }; 1509 1510 #if defined(PIRANHA_WITH_MSGPACK) 1511 1512 inline namespace impl 1513 { 1514 1515 // Enablers for msgpack s11n. 1516 template <typename Stream, typename T, typename Hash, typename Pred> 1517 using hash_set_msgpack_pack_enabler 1518 = enable_if_t<conjunction<is_msgpack_stream<Stream>, 1519 has_safe_cast<std::uint32_t, typename hash_set<T, Hash, Pred>::size_type>, 1520 has_msgpack_pack<Stream, T>>::value>; 1521 1522 template <typename T> 1523 using hash_set_msgpack_convert_enabler = enable_if_t<has_msgpack_convert<T>::value>; 1524 } 1525 1526 /// Specialisation of piranha::msgpack_pack() for piranha::hash_set. 1527 /** 1528 * \note 1529 * This specialisation is enabled only if 1530 * - \p Stream satisfies piranha::is_msgpack_stream, 1531 * - \p T satisfies piranha::has_msgpack_pack, 1532 * - the size type of piranha::hash_set is safely convertible to \p std::uint32_t. 1533 */ 1534 template <typename Stream, typename T, typename Hash, typename Pred> 1535 struct msgpack_pack_impl<Stream, hash_set<T, Hash, Pred>, hash_set_msgpack_pack_enabler<Stream, T, Hash, Pred>> { 1536 /// Call operator. 1537 /** 1538 * This method will serialize \p h into \p p using the format \p f. The hashing functor and the equality predicate 1539 * are not serialized. The msgpack representation of a piranha::hash_set consists of an array containing the 1540 * items in the set. 1541 * 1542 * @param p the target packer. 1543 * @param h the piranha::hash_set that will be serialized. 1544 * @param f the desired piranha::msgpack_format. 1545 * 1546 * @throws unspecified any exception thrown by: 1547 * - the public interface of <tt>msgpack::packer</tt>, 1548 * - piranha::safe_cast(), 1549 * - piranha::msgpack_pack(). 1550 */ operator ()piranha::msgpack_pack_impl1551 void operator()(msgpack::packer<Stream> &p, const hash_set<T, Hash, Pred> &h, msgpack_format f) const 1552 { 1553 msgpack_pack_range(p, h.begin(), h.end(), h.size(), f); 1554 } 1555 }; 1556 1557 /// Specialisation of piranha::msgpack_convert() for piranha::hash_set. 1558 /** 1559 * \note 1560 * This specialisation is enabled only if \p T satisfies piranha::has_msgpack_convert. 1561 */ 1562 template <typename T, typename Hash, typename Pred> 1563 struct msgpack_convert_impl<hash_set<T, Hash, Pred>, hash_set_msgpack_convert_enabler<T>> { 1564 /// Call operator. 1565 /** 1566 * This method will convert the input object \p o into \p h using the format \p f. In case duplicate elements 1567 * are encountered during deserialization, an exception will be raised. Before performing the deserialization, 1568 * \p h is reset with a default-constructed instance of piranha::hash_set. The hashing functor and the equality 1569 * predicate are not deserialized. This method provides the basic exception safety guarantee. 1570 * 1571 * @param h the target piranha::hash_set. 1572 * @param o the source <tt>msgpack::object</tt>. 1573 * @param f the desired piranha::msgpack_format. 1574 * 1575 * @throws std::invalid_argument if a duplicate element is encountered during deserialization. 1576 * @throws unspecified any exception thrown by: 1577 * - the public interface of piranha::hash_set and <tt>msgpack::object</tt>, 1578 * - <tt>boost::numeric_cast()</tt>, 1579 * - piranha::msgpack_convert(). 1580 */ operator ()piranha::msgpack_convert_impl1581 void operator()(hash_set<T, Hash, Pred> &h, const msgpack::object &o, msgpack_format f) const 1582 { 1583 // Clear out the retval. 1584 h = hash_set<T, Hash, Pred>{}; 1585 // Extract the array of items as a vector of objects. 1586 std::vector<msgpack::object> items; 1587 o.convert(items); 1588 // Prepare the number of buckets. 1589 h.rehash(boost::numeric_cast<typename hash_set<T, Hash, Pred>::size_type>( 1590 std::ceil(static_cast<double>(items.size()) / h.max_load_factor()))); 1591 // Deserialize the items. 1592 for (const auto &obj : items) { 1593 T tmp; 1594 msgpack_convert(tmp, obj, f); 1595 const auto p = h.insert(std::move(tmp)); 1596 if (unlikely(!p.second)) { 1597 piranha_throw(std::invalid_argument, "while deserializing a hash_set from a msgpack object " 1598 "a duplicate value was encountered"); 1599 } 1600 } 1601 } 1602 }; 1603 1604 #endif 1605 } 1606 1607 #endif 1608