1 /* Copyright 2009-2016 Francesco Biscani (bluescarni@gmail.com)
2 
3 This file is part of the Piranha library.
4 
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7 
8   * the GNU Lesser General Public License as published by the Free
9     Software Foundation; either version 3 of the License, or (at your
10     option) any later version.
11 
12 or
13 
14   * the GNU General Public License as published by the Free Software
15     Foundation; either version 3 of the License, or (at your option) any
16     later version.
17 
18 or both in parallel, as here.
19 
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
23 for more details.
24 
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library.  If not,
27 see https://www.gnu.org/licenses/. */
28 
29 #ifndef PIRANHA_HASH_SET_HPP
30 #define PIRANHA_HASH_SET_HPP
31 
32 #include <boost/iterator/iterator_facade.hpp>
33 #include <boost/numeric/conversion/cast.hpp>
34 #include <cmath>
35 #include <cstddef>
36 #include <cstdint>
37 #include <functional>
38 #include <initializer_list>
39 #include <limits>
40 #include <map>
41 #include <memory>
42 #include <new>
43 #include <stdexcept>
44 #include <string>
45 #include <tuple>
46 #include <type_traits>
47 #include <utility>
48 #include <vector>
49 
50 #include "config.hpp"
51 #include "debug_access.hpp"
52 #include "detail/init_data.hpp"
53 #include "exceptions.hpp"
54 #include "s11n.hpp"
55 #include "safe_cast.hpp"
56 #include "thread_pool.hpp"
57 #include "type_traits.hpp"
58 
59 namespace piranha
60 {
61 
62 /// Hash set.
63 /**
64  * Hash set class with interface similar to \p std::unordered_set. The main points of difference with respect to
65  * \p std::unordered_set are the following:
66  *
67  * - the exception safety guarantee is weaker (see below),
68  * - iterators and iterator invalidation: after a rehash operation, all iterators will be invalidated and existing
69  *   references/pointers to the elements will also be invalid; after an insertion/erase operation, all existing
70  *   iterators, pointers and references to the elements in the destination bucket will be invalid,
71  * - the complexity of iterator traversal depends on the load factor of the table.
72  *
73  * The implementation employs a separate chaining strategy consisting of an array of buckets, each one a singly linked
74  * list with the first node stored directly within the array (so that the first insertion in a bucket does not require
75  * any heap allocation).
76  *
77  * An additional set of low-level methods is provided: such methods are suitable for use in high-performance and
78  * multi-threaded contexts, and, if misused, could lead to data corruption and other unpredictable errors.
79  *
80  * Note that for performance reasons the implementation employs sizes that are powers of two. Hence, particular care
81  * should be taken that the hash function does not exhibit commensurabilities with powers of 2.
82  *
83  * ## Type requirements ##
84  *
85  * - \p T must satisfy piranha::is_container_element,
86  * - \p Hash must satisfy piranha::is_hash_function_object,
87  * - \p Pred must satisfy piranha::is_equality_function_object.
88  *
89  * ## Exception safety guarantee ##
90  *
91  * This class provides the strong exception safety guarantee for all operations apart from methods involving insertion,
92  * which provide the basic guarantee (after a failed insertion, the set will be left in an unspecified but valid state).
93  *
94  * ## Move semantics ##
95  *
96  * Move construction and move assignment will leave the moved-from object equivalent to an empty set whose hasher and
97  * equality predicate have been moved-from.
98  */
99 /* Some improvement NOTEs:
100 * - tests for low-level methods
101 * - better increase_size with recycling of dynamically-allocated nodes
102 * - see if we can reduce the number of branches in the find algorithm (e.g., when traversing the list) -> this should be
103 * a general review of the internal linked list
104 * implementation.
105 * - memory handling: the usage of the allocator object should be more standard, i.e., use the pointer and reference
106 * typedefs defined within, replace
107 * positional new with construct even in the list implementation. Then it can be made a template parameter with default =
108 * std::allocator.
109 * - use of new: we should probably replace new with new, in case new is overloaded -> also, check all occurrences of
110 * root new, it is used as well
111 * in static_vector for instance.
112 * - inline the first bucket, with the idea of avoiding memory allocations when the series consist of a single element
113 * (useful for instance
114 * when iterating over the series with the fat iterator).
115 * - optimisation for the begin() iterator,
116 * - check again about the mod implementation,
117 * - in the dtor checks, do we still want the shutdown() logic after we rework symbol?
118 *   are we still accessing potentially global variables?
119 * - maybe a bit more enabling for ctor and other template methods, not really essential though.
120 */
121 template <typename T, typename Hash = std::hash<T>, typename Pred = std::equal_to<T>>
122 class hash_set
123 {
124     PIRANHA_TT_CHECK(is_container_element, T);
125     PIRANHA_TT_CHECK(is_hash_function_object, Hash, T);
126     PIRANHA_TT_CHECK(is_equality_function_object, Pred, T);
127     // Make friend with debug access class.
128     template <typename U>
129     friend class debug_access;
130     // Node class for bucket element.
131     struct node {
132         typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type storage_type;
nodepiranha::hash_set::node133         node() : m_next(nullptr)
134         {
135         }
136         // Erase all other ctors/assignments, we do not want to
137         // copy around this object as m_storage is not what it is
138         // (and often could be uninitialized, which would lead to UB if used).
139         node(const node &) = delete;
140         node(node &&) = delete;
141         node &operator=(const node &) = delete;
142         node &operator=(node &&) = delete;
143         // NOTE: the idea here is that we use these helpers only *after* the object has been constructed,
144         // for constructing the object we must always access m_storage directly. The chain of two casts
145         // seems to be the only standard-conforming way of getting out a pointer to T.
146         // http://stackoverflow.com/questions/1082378/what-is-the-basic-use-of-aligned-storage
147         // http://stackoverflow.com/questions/13466556/aligned-storage-and-strict-aliasing
148         // http://en.cppreference.com/w/cpp/types/aligned_storage
149         // A couple of further notes:
150         // - pointer casting to/from void * is ok (4.10.2 and 5.2.9.13, via static_cast);
151         // - the placement new operator (which we use to construct the object into the storage)
152         //   is guaranteed to construct the object at the beginning of the storage, and taking
153         //   a void * out of the storage will return the address of the object stored within;
154         // - the lifetime of the original POD storage_type ends when we reuse its storage via
155         //   placement new.
156         // This page contains some more info about storage reuse:
157         // http://en.cppreference.com/w/cpp/language/lifetime#Storage_reuse
158         // See also this link, regarding static_cast vs reinterpret_cast for this very specific
159         // usage:
160         // http://stackoverflow.com/questions/19300142/static-cast-and-reinterpret-cast-for-stdaligned-storage
161         // Apparently, it is equivalent.
ptrpiranha::hash_set::node162         const T *ptr() const
163         {
164             piranha_assert(m_next);
165             return static_cast<const T *>(static_cast<const void *>(&m_storage));
166         }
ptrpiranha::hash_set::node167         T *ptr()
168         {
169             piranha_assert(m_next);
170             return static_cast<T *>(static_cast<void *>(&m_storage));
171         }
172         storage_type m_storage;
173         node *m_next;
174     };
175     // List constituting the bucket.
176     // NOTE: in this list implementation the m_next pointer is used as a flag to signal if the current node
177     // stores an item: the pointer is not null if it does contain something. The value of m_next pointer in a node is
178     // set to a constant
179     // &terminator node if it is the last node of the list. I.e., the terminator is end() in all cases
180     // except when the list is empty (in that case the m_node member is end()).
181     struct list {
182         // NOTE: re: std::iterator inheritance. It's netiher encouraged not discouraged according to this:
183         // http://stackoverflow.com/questions/6471019/can-should-i-inherit-from-stl-iterator
184         // I think we can just keep on using boost iterator_facade for the time being.
185         template <typename U>
186         class iterator_impl : public boost::iterator_facade<iterator_impl<U>, U, boost::forward_traversal_tag>
187         {
188             typedef typename std::conditional<std::is_const<U>::value, node const *, node *>::type ptr_type;
189             template <typename V>
190             friend class iterator_impl;
191 
192         public:
iterator_impl()193             iterator_impl() : m_ptr(nullptr)
194             {
195             }
iterator_impl(ptr_type ptr)196             explicit iterator_impl(ptr_type ptr) : m_ptr(ptr)
197             {
198             }
199             // Constructor from other iterator type.
200             template <typename V,
201                       enable_if_t<std::is_convertible<typename iterator_impl<V>::ptr_type, ptr_type>::value, int> = 0>
iterator_impl(const iterator_impl<V> & other)202             iterator_impl(const iterator_impl<V> &other) : m_ptr(other.m_ptr)
203             {
204             }
205 
206         private:
207             friend class boost::iterator_core_access;
increment()208             void increment()
209             {
210                 piranha_assert(m_ptr && m_ptr->m_next);
211                 m_ptr = m_ptr->m_next;
212             }
213             template <typename V>
equal(const iterator_impl<V> & other) const214             bool equal(const iterator_impl<V> &other) const
215             {
216                 return m_ptr == other.m_ptr;
217             }
dereference() const218             U &dereference() const
219             {
220                 piranha_assert(m_ptr && m_ptr->m_next);
221                 return *m_ptr->ptr();
222             }
223 
224         public:
225             ptr_type m_ptr;
226         };
227         typedef iterator_impl<T> iterator;
228         typedef iterator_impl<T const> const_iterator;
229         // Static checks on the iterator types.
230         PIRANHA_TT_CHECK(is_forward_iterator, iterator);
231         PIRANHA_TT_CHECK(is_forward_iterator, const_iterator);
listpiranha::hash_set::list232         list() : m_node()
233         {
234         }
listpiranha::hash_set::list235         list(list &&other) noexcept : m_node()
236         {
237             steal_from_rvalue(std::move(other));
238         }
listpiranha::hash_set::list239         list(const list &other) : m_node()
240         {
241             try {
242                 auto cur = &m_node;
243                 auto other_cur = &other.m_node;
244                 while (other_cur->m_next) {
245                     if (cur->m_next) {
246                         // This assert means we are operating on the last element
247                         // of the list, as we are doing back-insertions.
248                         piranha_assert(cur->m_next == &terminator);
249                         // Create a new node with content equal to other_cur
250                         // and linking forward to the terminator.
251                         std::unique_ptr<node> new_node(::new node());
252                         ::new (static_cast<void *>(&new_node->m_storage)) T(*other_cur->ptr());
253                         new_node->m_next = &terminator;
254                         // Link the new node.
255                         cur->m_next = new_node.release();
256                         cur = cur->m_next;
257                     } else {
258                         // This means this is the first node.
259                         ::new (static_cast<void *>(&cur->m_storage)) T(*other_cur->ptr());
260                         cur->m_next = &terminator;
261                     }
262                     other_cur = other_cur->m_next;
263                 }
264             } catch (...) {
265                 destroy();
266                 throw;
267             }
268         }
operator =piranha::hash_set::list269         list &operator=(list &&other)
270         {
271             if (likely(this != &other)) {
272                 // Destroy the content of this.
273                 destroy();
274                 steal_from_rvalue(std::move(other));
275             }
276             return *this;
277         }
operator =piranha::hash_set::list278         list &operator=(const list &other)
279         {
280             if (likely(this != &other)) {
281                 auto tmp = other;
282                 *this = std::move(tmp);
283             }
284             return *this;
285         }
~listpiranha::hash_set::list286         ~list()
287         {
288             destroy();
289         }
steal_from_rvaluepiranha::hash_set::list290         void steal_from_rvalue(list &&other)
291         {
292             piranha_assert(empty());
293             // Do something only if there is content in the other.
294             if (other.m_node.m_next) {
295                 // Move construct current first node with first node of other.
296                 ::new (static_cast<void *>(&m_node.m_storage)) T(std::move(*other.m_node.ptr()));
297                 // Link remaining content of other into this.
298                 m_node.m_next = other.m_node.m_next;
299                 // Destroy first node of other.
300                 other.m_node.ptr()->~T();
301                 other.m_node.m_next = nullptr;
302             }
303             piranha_assert(other.empty());
304         }
305         template <typename U, enable_if_t<std::is_same<T, uncvref_t<U>>::value, int> = 0>
insertpiranha::hash_set::list306         node *insert(U &&item)
307         {
308             // NOTE: optimize with likely/unlikely?
309             if (m_node.m_next) {
310                 // Create the new node and forward-link it to the second node.
311                 std::unique_ptr<node> new_node(::new node());
312                 ::new (static_cast<void *>(&new_node->m_storage)) T(std::forward<U>(item));
313                 new_node->m_next = m_node.m_next;
314                 // Link first node to the new node.
315                 m_node.m_next = new_node.release();
316                 return m_node.m_next;
317             } else {
318                 ::new (static_cast<void *>(&m_node.m_storage)) T(std::forward<U>(item));
319                 m_node.m_next = &terminator;
320                 return &m_node;
321             }
322         }
beginpiranha::hash_set::list323         iterator begin()
324         {
325             return iterator(&m_node);
326         }
endpiranha::hash_set::list327         iterator end()
328         {
329             return iterator((m_node.m_next) ? &terminator : &m_node);
330         }
beginpiranha::hash_set::list331         const_iterator begin() const
332         {
333             return const_iterator(&m_node);
334         }
endpiranha::hash_set::list335         const_iterator end() const
336         {
337             return const_iterator((m_node.m_next) ? &terminator : &m_node);
338         }
emptypiranha::hash_set::list339         bool empty() const
340         {
341             return !m_node.m_next;
342         }
destroypiranha::hash_set::list343         void destroy()
344         {
345             node *cur = &m_node;
346             while (cur->m_next) {
347                 // Store the current value temporarily.
348                 auto old = cur;
349                 // Assign the next.
350                 cur = cur->m_next;
351                 // Destroy the old payload and erase connections.
352                 old->ptr()->~T();
353                 old->m_next = nullptr;
354                 // If the old node was not the initial one, delete it.
355                 if (old != &m_node) {
356                     ::delete old;
357                 }
358             }
359             // After destruction, the list should be equivalent to a default-constructed one.
360             piranha_assert(empty());
361         }
362         static node terminator;
363         node m_node;
364     };
365     // Allocator type.
366     // NOTE: if we move allocator choice in public interface we need to document the exception behaviour of the
367     // allocator. Also, check the validity
368     // of the assumptions on the type returned by allocate(): must it be a pointer or just convertible to pointer?
369     // NOTE: for std::allocator, pointer is guaranteed to be "T *":
370     // http://en.cppreference.com/w/cpp/memory/allocator
371     typedef std::allocator<list> allocator_type;
372 
373 public:
374     /// Functor type for the calculation of hash values.
375     using hasher = Hash;
376     /// Functor type for comparing the items in the set.
377     using key_equal = Pred;
378     /// Key type.
379     using key_type = T;
380     /// Size type.
381     /**
382      * Alias for \p std::size_t.
383      */
384     using size_type = std::size_t;
385 
386 private:
387     // The container is a pointer to an array of lists.
388     using ptr_type = list *;
389     // Internal pack type, containing the pointer to the objects, the hash/equal functor
390     // and the allocator. In many cases these are stateless so we can exploit EBCO if implemented
391     // in the tuple type (likely).
392     using pack_type = std::tuple<ptr_type, hasher, key_equal, allocator_type>;
393     // A few handy accessors.
ptr()394     ptr_type &ptr()
395     {
396         return std::get<0u>(m_pack);
397     }
ptr() const398     const ptr_type &ptr() const
399     {
400         return std::get<0u>(m_pack);
401     }
hash() const402     const hasher &hash() const
403     {
404         return std::get<1u>(m_pack);
405     }
k_equal() const406     const key_equal &k_equal() const
407     {
408         return std::get<2u>(m_pack);
409     }
allocator()410     allocator_type &allocator()
411     {
412         return std::get<3u>(m_pack);
413     }
allocator() const414     const allocator_type &allocator() const
415     {
416         return std::get<3u>(m_pack);
417     }
418     // Definition of the iterator type for the set.
419     template <typename Key>
420     class iterator_impl : public boost::iterator_facade<iterator_impl<Key>, Key, boost::forward_traversal_tag>
421     {
422         friend class hash_set;
423         typedef typename std::conditional<std::is_const<Key>::value, hash_set const, hash_set>::type set_type;
424         typedef typename std::conditional<std::is_const<Key>::value, typename list::const_iterator,
425                                           typename list::iterator>::type it_type;
426 
427     public:
iterator_impl()428         iterator_impl() : m_set(nullptr), m_idx(0u), m_it()
429         {
430         }
iterator_impl(set_type * set,const size_type & idx,it_type it)431         explicit iterator_impl(set_type *set, const size_type &idx, it_type it) : m_set(set), m_idx(idx), m_it(it)
432         {
433         }
434 
435     private:
436         friend class boost::iterator_core_access;
increment()437         void increment()
438         {
439             piranha_assert(m_set);
440             auto &container = m_set->ptr();
441             // Assert that the current iterator is valid.
442             piranha_assert(m_idx < m_set->bucket_count());
443             piranha_assert(!container[m_idx].empty());
444             piranha_assert(m_it != container[m_idx].end());
445             ++m_it;
446             if (m_it == container[m_idx].end()) {
447                 const size_type container_size = m_set->bucket_count();
448                 while (true) {
449                     ++m_idx;
450                     if (m_idx == container_size) {
451                         m_it = it_type{};
452                         return;
453                     } else if (!container[m_idx].empty()) {
454                         m_it = container[m_idx].begin();
455                         return;
456                     }
457                 }
458             }
459         }
equal(const iterator_impl & other) const460         bool equal(const iterator_impl &other) const
461         {
462             // NOTE: comparing iterators from different containers is UB
463             // in the standard.
464             piranha_assert(m_set && other.m_set);
465             return (m_idx == other.m_idx && m_it == other.m_it);
466         }
dereference() const467         Key &dereference() const
468         {
469             piranha_assert(m_set && m_idx < m_set->bucket_count() && m_it != m_set->ptr()[m_idx].end());
470             return *m_it;
471         }
472 
473     private:
474         set_type *m_set;
475         size_type m_idx;
476         it_type m_it;
477     };
init_from_n_buckets(const size_type & n_buckets,unsigned n_threads)478     void init_from_n_buckets(const size_type &n_buckets, unsigned n_threads)
479     {
480         piranha_assert(!ptr() && !m_log2_size && !m_n_elements);
481         if (unlikely(!n_threads)) {
482             piranha_throw(std::invalid_argument, "the number of threads must be strictly positive");
483         }
484         // Proceed to actual construction only if the requested number of buckets is nonzero.
485         if (!n_buckets) {
486             return;
487         }
488         const size_type log2_size = get_log2_from_hint(n_buckets);
489         const size_type size = size_type(1u) << log2_size;
490         auto new_ptr = allocator().allocate(size);
491         if (unlikely(!new_ptr)) {
492             piranha_throw(std::bad_alloc, );
493         }
494         if (n_threads == 1u) {
495             // Default-construct the elements of the array.
496             // NOTE: this is a noexcept operation, no need to account for rolling back.
497             for (size_type i = 0u; i < size; ++i) {
498                 allocator().construct(&new_ptr[i]);
499             }
500         } else {
501             // Sync variables.
502             using crs_type = std::vector<std::pair<size_type, size_type>>;
503             crs_type constructed_ranges(static_cast<typename crs_type::size_type>(n_threads),
504                                         std::make_pair(size_type(0u), size_type(0u)));
505             if (unlikely(constructed_ranges.size() != n_threads)) {
506                 piranha_throw(std::bad_alloc, );
507             }
508             // Thread function.
509             auto thread_function = [this, new_ptr, &constructed_ranges](const size_type &start, const size_type &end,
510                                                                         const unsigned &thread_idx) {
511                 for (size_type i = start; i != end; ++i) {
512                     this->allocator().construct(&new_ptr[i]);
513                 }
514                 constructed_ranges[thread_idx] = std::make_pair(start, end);
515             };
516             // Work per thread.
517             const auto wpt = size / n_threads;
518             future_list<decltype(thread_function(0u, 0u, 0u))> f_list;
519             try {
520                 for (unsigned i = 0u; i < n_threads; ++i) {
521                     const auto start = static_cast<size_type>(wpt * i),
522                                end = static_cast<size_type>((i == n_threads - 1u) ? size : wpt * (i + 1u));
523                     f_list.push_back(thread_pool::enqueue(i, thread_function, start, end, i));
524                 }
525                 f_list.wait_all();
526                 // NOTE: no need to get_all() here, as we know no exceptions will be generated inside thread_func.
527             } catch (...) {
528                 // NOTE: everything in thread_func is noexcept, if we are here the exception was thrown by enqueue or
529                 // push_back.
530                 // Wait for everything to wind down.
531                 f_list.wait_all();
532                 // Destroy what was constructed.
533                 for (const auto &r : constructed_ranges) {
534                     for (size_type i = r.first; i != r.second; ++i) {
535                         allocator().destroy(&new_ptr[i]);
536                     }
537                 }
538                 // Deallocate before re-throwing.
539                 allocator().deallocate(new_ptr, size);
540                 throw;
541             }
542         }
543         // Assign the members.
544         ptr() = new_ptr;
545         m_log2_size = log2_size;
546     }
547     // Destroy all elements and deallocate ptr().
destroy_and_deallocate()548     void destroy_and_deallocate()
549     {
550         // Proceed to destroy all elements and deallocate only if the set is actually storing something.
551         if (ptr()) {
552             const size_type size = size_type(1u) << m_log2_size;
553             for (size_type i = 0u; i < size; ++i) {
554                 allocator().destroy(&ptr()[i]);
555             }
556             allocator().deallocate(ptr(), size);
557         } else {
558             piranha_assert(!m_log2_size && !m_n_elements);
559         }
560     }
561     // Serialization support.
562     friend class boost::serialization::access;
563     template <class Archive>
save(Archive & ar,unsigned) const564     void save(Archive &ar, unsigned) const
565     {
566         // Size.
567         boost_save(ar, size());
568         // Serialize the items.
569         boost_save_range(ar, begin(), end());
570     }
571     template <class Archive>
load(Archive & ar,unsigned)572     void load(Archive &ar, unsigned)
573     {
574         // Reset this.
575         *this = hash_set{};
576         // Recover the size.
577         size_type size;
578         boost_load(ar, size);
579         // Prepare an adequate number of buckets.
580         rehash(boost::numeric_cast<size_type>(std::ceil(static_cast<double>(size) / max_load_factor())));
581         for (size_type i = 0; i < size; ++i) {
582             T tmp;
583             boost_load(ar, tmp);
584             const auto p = insert(std::move(tmp));
585             if (unlikely(!p.second)) {
586                 piranha_throw(std::invalid_argument, "while deserializing a hash_set from a Boost archive "
587                                                      "a duplicate value was encountered");
588             }
589         }
590     }
591     BOOST_SERIALIZATION_SPLIT_MEMBER()
592     // Enabler for insert().
593     template <typename U>
594     using insert_enabler = enable_if_t<std::is_same<key_type, uncvref_t<U>>::value, int>;
595     // Run a consistency check on the set, will return false if something is wrong.
sanity_check() const596     bool sanity_check() const
597     {
598         // Ignore sanity checks on shutdown.
599         if (shutdown()) {
600             return true;
601         }
602         size_type count = 0u;
603         for (size_type i = 0u; i < bucket_count(); ++i) {
604             for (auto it = ptr()[i].begin(); it != ptr()[i].end(); ++it) {
605                 if (_bucket(*it) != i) {
606                     return false;
607                 }
608                 ++count;
609             }
610         }
611         if (count != m_n_elements) {
612             return false;
613         }
614         // m_log2_size must not be equal to or greater than the number of bits of size_type.
615         if (m_log2_size >= unsigned(std::numeric_limits<size_type>::digits)) {
616             return false;
617         }
618         // The container pointer must be consistent with the other members.
619         if (!ptr() && (m_log2_size || m_n_elements)) {
620             return false;
621         }
622         // Check size is consistent with number of iterator traversals.
623         count = 0u;
624         for (auto it = begin(); it != end(); ++it, ++count) {
625         }
626         if (count != m_n_elements) {
627             return false;
628         }
629         return true;
630     }
631     // The number of available nonzero sizes will be the number of bits in the size type. Possible nonzero sizes will be
632     // in
633     // the [2 ** 0, 2 ** (n-1)] range.
634     static const size_type m_n_nonzero_sizes = static_cast<size_type>(std::numeric_limits<size_type>::digits);
635     // Get log2 of set size at least equal to hint. To be used only when hint is not zero.
get_log2_from_hint(const size_type & hint)636     static size_type get_log2_from_hint(const size_type &hint)
637     {
638         piranha_assert(hint);
639         for (size_type i = 0u; i < m_n_nonzero_sizes; ++i) {
640             if ((size_type(1u) << i) >= hint) {
641                 return i;
642             }
643         }
644         piranha_throw(std::bad_alloc, );
645     }
646 
647 public:
648     /// Iterator type.
649     /**
650      * A read-only forward iterator.
651      */
652     using iterator = iterator_impl<key_type const>;
653 
654 private:
655     // Static checks on the iterator type.
656     PIRANHA_TT_CHECK(is_forward_iterator, iterator);
657 
658 public:
659     /// Const iterator type.
660     /**
661      * Equivalent to the iterator type.
662      */
663     using const_iterator = iterator;
664     /// Local iterator.
665     /**
666      * Const iterator that can be used to iterate through a single bucket.
667      */
668     using local_iterator = typename list::const_iterator;
669     /// Default constructor.
670     /**
671      * If not specified, it will default-initialise the hasher and the equality predicate. The resulting
672      * hash set will be empty.
673      *
674      * @param h hasher functor.
675      * @param k equality predicate.
676      *
677      * @throws unspecified any exception thrown by the copy constructors of <tt>Hash</tt> or <tt>Pred</tt>.
678      */
hash_set(const hasher & h=hasher{},const key_equal & k=key_equal{})679     hash_set(const hasher &h = hasher{}, const key_equal &k = key_equal{})
680         : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
681     {
682     }
683     /// Constructor from number of buckets.
684     /**
685      * Will construct a set whose number of buckets is at least equal to \p n_buckets. If \p n_threads is not 1,
686      * then the first \p n_threads threads from piranha::thread_pool will be used concurrently for the initialisation
687      * of the set.
688      *
689      * @param n_buckets desired number of buckets.
690      * @param h hasher functor.
691      * @param k equality predicate.
692      * @param n_threads number of threads to use during initialisation.
693      *
694      * @throws std::bad_alloc if the desired number of buckets is greater than an implementation-defined maximum, or in
695      * case
696      * of memory errors.
697      * @throws std::invalid_argument if \p n_threads is zero.
698      * @throws unspecified any exception thrown by:
699      * - the copy constructors of <tt>Hash</tt> or <tt>Pred</tt>,
700      * - piranha::thread_pool::enqueue() or piranha::future_list::push_back(), if \p n_threads is not 1.
701      */
hash_set(const size_type & n_buckets,const hasher & h=hasher{},const key_equal & k=key_equal{},unsigned n_threads=1u)702     explicit hash_set(const size_type &n_buckets, const hasher &h = hasher{}, const key_equal &k = key_equal{},
703                       unsigned n_threads = 1u)
704         : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
705     {
706         init_from_n_buckets(n_buckets, n_threads);
707     }
708     /// Copy constructor.
709     /**
710      * The hasher, the equality comparator and the allocator will also be copied.
711      *
712      * @param other piranha::hash_set that will be copied into \p this.
713      *
714      * @throws unspecified any exception thrown by memory allocation errors,
715      * the copy constructor of the stored type, <tt>Hash</tt> or <tt>Pred</tt>.
716      */
hash_set(const hash_set & other)717     hash_set(const hash_set &other)
718         : m_pack(nullptr, other.hash(), other.k_equal(), other.allocator()), m_log2_size(0u), m_n_elements(0u)
719     {
720         // Proceed to actual copy only if other has some content.
721         if (other.ptr()) {
722             const size_type size = size_type(1u) << other.m_log2_size;
723             auto new_ptr = allocator().allocate(size);
724             if (unlikely(!new_ptr)) {
725                 piranha_throw(std::bad_alloc, );
726             }
727             size_type i = 0u;
728             try {
729                 // Copy-construct the elements of the array.
730                 for (; i < size; ++i) {
731                     allocator().construct(&new_ptr[i], other.ptr()[i]);
732                 }
733             } catch (...) {
734                 // Unwind the construction and deallocate, before re-throwing.
735                 for (size_type j = 0u; j < i; ++j) {
736                     allocator().destroy(&new_ptr[j]);
737                 }
738                 allocator().deallocate(new_ptr, size);
739                 throw;
740             }
741             // Assign the members.
742             ptr() = new_ptr;
743             m_log2_size = other.m_log2_size;
744             m_n_elements = other.m_n_elements;
745         } else {
746             piranha_assert(!other.m_log2_size && !other.m_n_elements);
747         }
748     }
749     /// Move constructor.
750     /**
751      * After the move, \p other will have zero buckets and zero elements, and its hasher and equality predicate
752      * will have been used to move-construct their counterparts in \p this.
753      *
754      * @param other set to be moved.
755      */
hash_set(hash_set && other)756     hash_set(hash_set &&other) noexcept
757         : m_pack(std::move(other.m_pack)), m_log2_size(other.m_log2_size), m_n_elements(other.m_n_elements)
758     {
759         // Clear out the other one.
760         other.ptr() = nullptr;
761         other.m_log2_size = 0u;
762         other.m_n_elements = 0u;
763     }
764     /// Constructor from range.
765     /**
766      * Create a set with a copy of a range.
767      *
768      * @param begin begin of range.
769      * @param end end of range.
770      * @param n_buckets number of initial buckets.
771      * @param h hash functor.
772      * @param k key equality predicate.
773      *
774      * @throws std::bad_alloc if the desired number of buckets is greater than an implementation-defined maximum.
775      * @throws unspecified any exception thrown by the copy constructors of <tt>Hash</tt> or <tt>Pred</tt>, or arising
776      * from
777      * calling insert() on the elements of the range.
778      */
779     template <typename InputIterator>
hash_set(const InputIterator & begin,const InputIterator & end,const size_type & n_buckets=0u,const hasher & h=hasher{},const key_equal & k=key_equal{})780     explicit hash_set(const InputIterator &begin, const InputIterator &end, const size_type &n_buckets = 0u,
781                       const hasher &h = hasher{}, const key_equal &k = key_equal{})
782         : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
783     {
784         init_from_n_buckets(n_buckets, 1u);
785         for (auto it = begin; it != end; ++it) {
786             insert(*it);
787         }
788     }
789     /// Constructor from initializer list.
790     /**
791      * Will insert() all the elements of the initializer list, ignoring the return value of the operation.
792      * Hash functor and equality predicate will be default-constructed.
793      *
794      * @param list initializer list of elements to be inserted.
795      *
796      * @throws std::bad_alloc if the desired number of buckets is greater than an implementation-defined maximum.
797      * @throws unspecified any exception thrown by either insert() or of the default constructor of <tt>Hash</tt> or
798      * <tt>Pred</tt>.
799      */
800     template <typename U>
hash_set(std::initializer_list<U> list)801     explicit hash_set(std::initializer_list<U> list)
802         : m_pack(nullptr, hasher{}, key_equal{}, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
803     {
804         // We do not care here for possible truncation of list.size(), as this is only an optimization.
805         init_from_n_buckets(static_cast<size_type>(list.size()), 1u);
806         for (const auto &x : list) {
807             insert(x);
808         }
809     }
810     /// Destructor.
811     /**
812      * No side effects.
813      */
~hash_set()814     ~hash_set()
815     {
816         piranha_assert(sanity_check());
817         destroy_and_deallocate();
818     }
819     /// Copy assignment operator.
820     /**
821      * @param other assignment argument.
822      *
823      * @return reference to \p this.
824      *
825      * @throws unspecified any exception thrown by the copy constructor.
826      */
operator =(const hash_set & other)827     hash_set &operator=(const hash_set &other)
828     {
829         if (likely(this != &other)) {
830             hash_set tmp(other);
831             *this = std::move(tmp);
832         }
833         return *this;
834     }
835     /// Move assignment operator.
836     /**
837      * @param other set to be moved into \p this.
838      *
839      * @return reference to \p this.
840      */
operator =(hash_set && other)841     hash_set &operator=(hash_set &&other) noexcept
842     {
843         if (likely(this != &other)) {
844             destroy_and_deallocate();
845             m_pack = std::move(other.m_pack);
846             m_log2_size = other.m_log2_size;
847             m_n_elements = other.m_n_elements;
848             // Zero out other.
849             other.ptr() = nullptr;
850             other.m_log2_size = 0u;
851             other.m_n_elements = 0u;
852         }
853         return *this;
854     }
855     /// Const begin iterator.
856     /**
857      * @return hash_set::const_iterator to the first element of the set, or end() if the set is empty.
858      */
begin() const859     const_iterator begin() const
860     {
861         // NOTE: this could take a while in case of an empty set with lots of buckets. Take a shortcut
862         // taking into account the number of elements in the set - if zero, go directly to end()?
863         const_iterator retval;
864         retval.m_set = this;
865         size_type idx = 0u;
866         const auto b_count = bucket_count();
867         for (; idx < b_count; ++idx) {
868             if (!ptr()[idx].empty()) {
869                 break;
870             }
871         }
872         retval.m_idx = idx;
873         // If we are not at the end, assign proper iterator.
874         if (idx != b_count) {
875             retval.m_it = ptr()[idx].begin();
876         }
877         return retval;
878     }
879     /// Const end iterator.
880     /**
881      * @return hash_set::const_iterator to the position past the last element of the set.
882      */
end() const883     const_iterator end() const
884     {
885         return const_iterator(this, bucket_count(), local_iterator{});
886     }
887     /// Begin iterator.
888     /**
889      * @return hash_set::iterator to the first element of the set, or end() if the set is empty.
890      */
begin()891     iterator begin()
892     {
893         return static_cast<hash_set const *>(this)->begin();
894     }
895     /// End iterator.
896     /**
897      * @return hash_set::iterator to the position past the last element of the set.
898      */
end()899     iterator end()
900     {
901         return static_cast<hash_set const *>(this)->end();
902     }
903     /// Number of elements contained in the set.
904     /**
905      * @return number of elements in the set.
906      */
size() const907     size_type size() const
908     {
909         return m_n_elements;
910     }
911     /// Test for empty set.
912     /**
913      * @return \p true if size() returns 0, \p false otherwise.
914      */
empty() const915     bool empty() const
916     {
917         return !size();
918     }
919     /// Number of buckets.
920     /**
921      * @return number of buckets in the set.
922      */
bucket_count() const923     size_type bucket_count() const
924     {
925         return (ptr()) ? (size_type(1u) << m_log2_size) : size_type(0u);
926     }
927     /// Load factor.
928     /**
929      * @return <tt>(double)size() / bucket_count()</tt>, or 0 if the set is empty.
930      */
load_factor() const931     double load_factor() const
932     {
933         const auto b_count = bucket_count();
934         return (b_count) ? static_cast<double>(size()) / static_cast<double>(b_count) : 0.;
935     }
936     /// Index of destination bucket.
937     /**
938      * Index to which \p k would belong, were it to be inserted into the set. The index of the
939      * destination bucket is the hash value reduced modulo the bucket count.
940      *
941      * @param k input argument.
942      *
943      * @return index of the destination bucket for \p k.
944      *
945      * @throws piranha::zero_division_error if bucket_count() returns zero.
946      * @throws unspecified any exception thrown by _bucket().
947      */
bucket(const key_type & k) const948     size_type bucket(const key_type &k) const
949     {
950         if (unlikely(!bucket_count())) {
951             piranha_throw(zero_division_error, "cannot calculate bucket index in an empty set");
952         }
953         return _bucket(k);
954     }
955     /// Find element.
956     /**
957      * @param k element to be located.
958      *
959      * @return hash_set::const_iterator to <tt>k</tt>'s position in the set, or end() if \p k is not in the set.
960      *
961      * @throws unspecified any exception thrown by _find() or by _bucket().
962      */
find(const key_type & k) const963     const_iterator find(const key_type &k) const
964     {
965         if (unlikely(!bucket_count())) {
966             return end();
967         }
968         return _find(k, _bucket(k));
969     }
970     /// Find element.
971     /**
972      * @param k element to be located.
973      *
974      * @return hash_set::iterator to <tt>k</tt>'s position in the set, or end() if \p k is not in the set.
975      *
976      * @throws unspecified any exception thrown by _find().
977      */
find(const key_type & k)978     iterator find(const key_type &k)
979     {
980         return static_cast<const hash_set *>(this)->find(k);
981     }
982     /// Maximum load factor.
983     /**
984      * @return the maximum load factor allowed before a resize.
985      */
max_load_factor() const986     double max_load_factor() const
987     {
988         // Maximum load factor hard-coded to 1.
989         // NOTE: if this is ever made configurable, it should never be allowed to go to zero.
990         return 1.;
991     }
992     /// Insert element.
993     /**
994      * \note
995      * This template method is activated only if \p T and \p U are the same type, aside from cv qualifications and
996      * references.
997      *
998      * If no other key equivalent to \p k exists in the set, the insertion is successful and returns the
999      * <tt>(it,true)</tt>
1000      * pair - where \p it is the position in the set into which the object has been inserted. Otherwise, the return
1001      * value
1002      * will be <tt>(it,false)</tt> - where \p it is the position of the existing equivalent object.
1003      *
1004      * @param k object that will be inserted into the set.
1005      *
1006      * @return <tt>(hash_set::iterator,bool)</tt> pair containing an iterator to the newly-inserted object (or its
1007      * existing
1008      * equivalent) and the result of the operation.
1009      *
1010      * @throws unspecified any exception thrown by:
1011      * - hash_set::key_type's copy constructor,
1012      * - _find(),
1013      * - _bucket().
1014      * @throws std::overflow_error if a successful insertion would result in size() exceeding the maximum
1015      * value representable by type piranha::hash_set::size_type.
1016      * @throws std::bad_alloc if the operation results in a resize of the set past an implementation-defined
1017      * maximum number of buckets.
1018      */
1019     template <typename U, insert_enabler<U> = 0>
insert(U && k)1020     std::pair<iterator, bool> insert(U &&k)
1021     {
1022         auto b_count = bucket_count();
1023         // Handle the case of a set with no buckets.
1024         if (unlikely(!b_count)) {
1025             _increase_size();
1026             // Update the bucket count.
1027             b_count = 1u;
1028         }
1029         // Try to locate the element.
1030         auto bucket_idx = _bucket(k);
1031         const auto it = _find(k, bucket_idx);
1032         if (it != end()) {
1033             // Item already present, exit.
1034             return std::make_pair(it, false);
1035         }
1036         if (unlikely(m_n_elements == std::numeric_limits<size_type>::max())) {
1037             piranha_throw(std::overflow_error, "maximum number of elements reached");
1038         }
1039         // Item is new. Handle the case in which we need to rehash because of load factor.
1040         if (unlikely(static_cast<double>(m_n_elements + size_type(1u)) / static_cast<double>(b_count)
1041                      > max_load_factor())) {
1042             _increase_size();
1043             // We need a new bucket index in case of a rehash.
1044             bucket_idx = _bucket(k);
1045         }
1046         const auto it_retval = _unique_insert(std::forward<U>(k), bucket_idx);
1047         ++m_n_elements;
1048         return std::make_pair(it_retval, true);
1049     }
1050     /// Erase element.
1051     /**
1052      * Erase the element to which \p it points. \p it must be a valid iterator
1053      * pointing to an element of the set.
1054      *
1055      * Erasing an element invalidates all iterators pointing to elements in the same bucket
1056      * as the erased element.
1057      *
1058      * After the operation has taken place, the size() of the set will be decreased by one.
1059      *
1060      * @param it iterator to the element of the set to be removed.
1061      *
1062      * @return iterator pointing to the element following \p it prior to the element being erased, or end() if
1063      * no such element exists.
1064      */
erase(const_iterator it)1065     iterator erase(const_iterator it)
1066     {
1067         piranha_assert(!empty());
1068         const auto b_it = _erase(it);
1069         iterator retval;
1070         retval.m_set = this;
1071         const auto b_count = bucket_count();
1072         if (b_it == ptr()[it.m_idx].end()) {
1073             // Travel to the next iterator if the deleted element was
1074             // the last one in the bucket.
1075             auto idx = static_cast<size_type>(it.m_idx + 1u);
1076             // Advance to the first non-empty bucket if necessary,
1077             // without going past the end of the set.
1078             for (; idx < b_count; ++idx) {
1079                 if (!ptr()[idx].empty()) {
1080                     break;
1081                 }
1082             }
1083             retval.m_idx = idx;
1084             // If we are not at the end, assign proper iterator.
1085             if (idx != b_count) {
1086                 retval.m_it = ptr()[idx].begin();
1087             }
1088             // NOTE: in case we reached the end of the container, the end() iterator should be:
1089             // {this,bucket_count,local_iterator{}}
1090             // this has been set above already, bucket_count is set by retval.m_idx = idx
1091             // and the default local_iterator ctor is called by the def ctor of iterator.
1092         } else {
1093             // Otherwise, just copy over the iterator returned by _erase().
1094             retval.m_idx = it.m_idx;
1095             retval.m_it = b_it;
1096         }
1097         piranha_assert(m_n_elements);
1098         // Update the number of elements.
1099         m_n_elements = static_cast<size_type>(m_n_elements - 1u);
1100         return retval;
1101     }
1102     /// Remove all elements.
1103     /**
1104      * After this call, size() and bucket_count() will both return zero.
1105      */
clear()1106     void clear()
1107     {
1108         destroy_and_deallocate();
1109         // Reset the members.
1110         ptr() = nullptr;
1111         m_log2_size = 0u;
1112         m_n_elements = 0u;
1113     }
1114     /// Swap content.
1115     /**
1116      * Will use \p std::swap to swap hasher and equality predicate.
1117      *
1118      * @param other swap argument.
1119      *
1120      * @throws unspecified any exception thrown by swapping hasher or equality predicate via \p std::swap.
1121      */
swap(hash_set & other)1122     void swap(hash_set &other)
1123     {
1124         std::swap(m_pack, other.m_pack);
1125         std::swap(m_log2_size, other.m_log2_size);
1126         std::swap(m_n_elements, other.m_n_elements);
1127     }
1128     /// Rehash set.
1129     /**
1130      * Change the number of buckets in the set to at least \p new_size. No rehash is performed
1131      * if rehashing would lead to exceeding the maximum load factor. If \p n_threads is not 1,
1132      * then the first \p n_threads threads from piranha::thread_pool will be used concurrently during
1133      * the rehash operation.
1134      *
1135      * @param new_size new desired number of buckets.
1136      * @param n_threads number of threads to use.
1137      *
1138      * @throws std::invalid_argument if \p n_threads is zero.
1139      * @throws unspecified any exception thrown by the constructor from number of buckets,
1140      * _unique_insert() or _bucket().
1141      */
rehash(const size_type & new_size,unsigned n_threads=1u)1142     void rehash(const size_type &new_size, unsigned n_threads = 1u)
1143     {
1144         if (unlikely(!n_threads)) {
1145             piranha_throw(std::invalid_argument, "the number of threads must be strictly positive");
1146         }
1147         // If rehash is requested to zero, do something only if there are no items stored in the set.
1148         if (!new_size) {
1149             if (!size()) {
1150                 clear();
1151             }
1152             return;
1153         }
1154         // Do nothing if rehashing to the new size would lead to exceeding the max load factor.
1155         if (static_cast<double>(size()) / static_cast<double>(new_size) > max_load_factor()) {
1156             return;
1157         }
1158         // Create a new set with needed amount of buckets.
1159         hash_set new_set(new_size, hash(), k_equal(), n_threads);
1160         try {
1161             const auto it_f = _m_end();
1162             for (auto it = _m_begin(); it != it_f; ++it) {
1163                 const auto new_idx = new_set._bucket(*it);
1164                 new_set._unique_insert(std::move(*it), new_idx);
1165             }
1166         } catch (...) {
1167             // Clear up both this and the new set upon any kind of error.
1168             clear();
1169             new_set.clear();
1170             throw;
1171         }
1172         // Retain the number of elements.
1173         new_set.m_n_elements = m_n_elements;
1174         // Clear the old set.
1175         clear();
1176         // Assign the new set.
1177         *this = std::move(new_set);
1178     }
1179     /// Get information on the sparsity of the set.
1180     /**
1181      * @return an <tt>std::map<size_type,size_type></tt> in which the key is the number of elements
1182      * stored in a bucket and the mapped type the number of buckets containing those many elements.
1183      *
1184      * @throws unspecified any exception thrown by memory errors in standard containers.
1185      */
evaluate_sparsity() const1186     std::map<size_type, size_type> evaluate_sparsity() const
1187     {
1188         const auto it_f = ptr() + bucket_count();
1189         std::map<size_type, size_type> retval;
1190         size_type counter;
1191         for (auto it = ptr(); it != it_f; ++it) {
1192             counter = 0u;
1193             for (auto l_it = it->begin(); l_it != it->end(); ++l_it) {
1194                 ++counter;
1195             }
1196             ++retval[counter];
1197         }
1198         return retval;
1199     }
1200     /** @name Low-level interface
1201      * Low-level methods and types.
1202      */
1203     //@{
1204     /// Mutable iterator.
1205     /**
1206      * This iterator type provides non-const access to the elements of the set. Please note that modifications
1207      * to an existing element of the set might invalidate the relation between the element and its position in the set.
1208      * After such modifications of one or more elements, the only valid operation is hash_set::clear() (destruction of
1209      * the
1210      * set before calling hash_set::clear() will lead to assertion failures in debug mode).
1211      */
1212     using _m_iterator = iterator_impl<key_type>;
1213     /// Mutable begin iterator.
1214     /**
1215      * @return hash_set::_m_iterator to the beginning of the set.
1216      */
_m_begin()1217     _m_iterator _m_begin()
1218     {
1219         // NOTE: this could take a while in case of an empty set with lots of buckets. Take a shortcut
1220         // taking into account the number of elements in the set - if zero, go directly to end()?
1221         const auto b_count = bucket_count();
1222         _m_iterator retval;
1223         retval.m_set = this;
1224         size_type idx = 0u;
1225         for (; idx < b_count; ++idx) {
1226             if (!ptr()[idx].empty()) {
1227                 break;
1228             }
1229         }
1230         retval.m_idx = idx;
1231         // If we are not at the end, assign proper iterator.
1232         if (idx != b_count) {
1233             retval.m_it = ptr()[idx].begin();
1234         }
1235         return retval;
1236     }
1237     /// Mutable end iterator.
1238     /**
1239      * @return hash_set::_m_iterator to the end of the set.
1240      */
_m_end()1241     _m_iterator _m_end()
1242     {
1243         return _m_iterator(this, bucket_count(), typename list::iterator{});
1244     }
1245     /// Insert unique element (low-level).
1246     /**
1247      * \note
1248      * This template method is activated only if \p T and \p U are the same type, aside from cv qualifications and
1249      * references.
1250      *
1251      * The parameter \p bucket_idx is the index of the destination bucket for \p k and, for a
1252      * set with a nonzero number of buckets, must be equal to the output
1253      * of bucket() before the insertion.
1254      *
1255      * This method will not check if a key equivalent to \p k already exists in the set, it will not
1256      * update the number of elements present in the set after the insertion, it will not resize
1257      * the set in case the maximum load factor is exceeded, nor it will check
1258      * if the value of \p bucket_idx is correct.
1259      *
1260      * @param k object that will be inserted into the set.
1261      * @param bucket_idx destination bucket for \p k.
1262      *
1263      * @return iterator pointing to the newly-inserted element.
1264      *
1265      * @throws unspecified any exception thrown by the copy constructor of hash_set::key_type or by memory allocation
1266      * errors.
1267      */
1268     template <typename U, insert_enabler<U> = 0>
_unique_insert(U && k,const size_type & bucket_idx)1269     iterator _unique_insert(U &&k, const size_type &bucket_idx)
1270     {
1271         // Assert that key is not present already in the set.
1272         piranha_assert(find(std::forward<U>(k)) == end());
1273         // Assert bucket index is correct.
1274         piranha_assert(bucket_idx == _bucket(k));
1275         auto p = ptr()[bucket_idx].insert(std::forward<U>(k));
1276         return iterator(this, bucket_idx, local_iterator(p));
1277     }
1278     /// Find element (low-level).
1279     /**
1280      * Locate element in the set. The parameter \p bucket_idx is the index of the destination bucket for \p k and, for
1281      * a set with a nonzero number of buckets, must be equal to the output
1282      * of bucket() before the insertion. This method will not check if the value of \p bucket_idx is correct.
1283      *
1284      * @param k element to be located.
1285      * @param bucket_idx index of the destination bucket for \p k.
1286      *
1287      * @return hash_set::iterator to <tt>k</tt>'s position in the set, or end() if \p k is not in the set.
1288      *
1289      * @throws unspecified any exception thrown by calling the equality predicate.
1290      */
_find(const key_type & k,const size_type & bucket_idx) const1291     const_iterator _find(const key_type &k, const size_type &bucket_idx) const
1292     {
1293         // Assert bucket index is correct.
1294         piranha_assert(bucket_idx == _bucket(k) && bucket_idx < bucket_count());
1295         const auto &b = ptr()[bucket_idx];
1296         const auto it_f = b.end();
1297         const_iterator retval(end());
1298         for (auto it = b.begin(); it != it_f; ++it) {
1299             if (k_equal()(*it, k)) {
1300                 retval.m_idx = bucket_idx;
1301                 retval.m_it = it;
1302                 break;
1303             }
1304         }
1305         return retval;
1306     }
1307     /// Index of destination bucket from hash value.
1308     /**
1309      * Note that this method will not check if the number of buckets is zero.
1310      *
1311      * @param hash input hash value.
1312      *
1313      * @return index of the destination bucket for an object with hash value \p hash.
1314      */
_bucket_from_hash(const std::size_t & hash) const1315     size_type _bucket_from_hash(const std::size_t &hash) const
1316     {
1317         piranha_assert(bucket_count());
1318         return hash % (size_type(1u) << m_log2_size);
1319     }
1320     /// Index of destination bucket (low-level).
1321     /**
1322      * Equivalent to bucket(), with the exception that this method will not check
1323      * if the number of buckets is zero.
1324      *
1325      * @param k input argument.
1326      *
1327      * @return index of the destination bucket for \p k.
1328      *
1329      * @throws unspecified any exception thrown by the call operator of the hasher.
1330      */
_bucket(const key_type & k) const1331     size_type _bucket(const key_type &k) const
1332     {
1333         return _bucket_from_hash(hash()(k));
1334     }
1335     /// Force update of the number of elements.
1336     /**
1337      * After this call, size() will return \p new_size regardless of the true number of elements in the set.
1338      *
1339      * @param new_size new set size.
1340      */
_update_size(const size_type & new_size)1341     void _update_size(const size_type &new_size)
1342     {
1343         m_n_elements = new_size;
1344     }
1345     /// Increase bucket count.
1346     /**
1347      * Increase the number of buckets to the next implementation-defined value.
1348      *
1349      * @throws std::bad_alloc if the operation results in a resize of the set past an implementation-defined
1350      * maximum number of buckets.
1351      * @throws unspecified any exception thrown by rehash().
1352      */
_increase_size()1353     void _increase_size()
1354     {
1355         if (unlikely(m_log2_size >= m_n_nonzero_sizes - 1u)) {
1356             piranha_throw(std::bad_alloc, );
1357         }
1358         // We must take care here: if the set has zero buckets,
1359         // the next log2_size is 0u. Otherwise increase current log2_size.
1360         piranha_assert(ptr() || (!ptr() && !m_log2_size));
1361         const auto new_log2_size = (ptr()) ? (m_log2_size + 1u) : 0u;
1362         // Rehash to the new size.
1363         rehash(size_type(1u) << new_log2_size);
1364     }
1365     /// Const reference to list in bucket.
1366     /**
1367      * @param idx index of the bucket whose list will be returned.
1368      *
1369      * @return a const reference to the list of items contained in the bucket positioned
1370      * at index \p idx.
1371      */
_get_bucket_list(const size_type & idx) const1372     const list &_get_bucket_list(const size_type &idx) const
1373     {
1374         piranha_assert(idx < bucket_count());
1375         return ptr()[idx];
1376     }
1377     /// Erase element.
1378     /**
1379      * Erase the element to which \p it points. \p it must be a valid iterator
1380      * pointing to an element of the set.
1381      *
1382      * Erasing an element invalidates all iterators pointing to elements in the same bucket
1383      * as the erased element.
1384      *
1385      * This method will not update the number of elements in the set, nor it will try to access elements
1386      * outside the bucket to which \p it refers.
1387      *
1388      * @param it iterator to the element of the set to be removed.
1389      *
1390      * @return local iterator pointing to the element following \p it prior to the element being erased, or local end()
1391      * if
1392      * no such element exists.
1393      */
_erase(const_iterator it)1394     local_iterator _erase(const_iterator it)
1395     {
1396         // Verify the iterator is valid.
1397         piranha_assert(it.m_set == this);
1398         piranha_assert(it.m_idx < bucket_count());
1399         piranha_assert(!ptr()[it.m_idx].empty());
1400         piranha_assert(it.m_it != ptr()[it.m_idx].end());
1401         auto &bucket = ptr()[it.m_idx];
1402         // If the pointed-to element is the first one in the bucket, we need special care.
1403         if (&*it == &*bucket.m_node.ptr()) {
1404             // Destroy the payload.
1405             bucket.m_node.ptr()->~T();
1406             if (bucket.m_node.m_next == &bucket.terminator) {
1407                 // Special handling if this was the only element.
1408                 bucket.m_node.m_next = nullptr;
1409                 return bucket.end();
1410             } else {
1411                 // Store the link in the second element (this could be the terminator).
1412                 auto tmp = bucket.m_node.m_next->m_next;
1413                 // Move-construct from the second element, and then destroy it.
1414                 ::new (static_cast<void *>(&bucket.m_node.m_storage)) T(std::move(*bucket.m_node.m_next->ptr()));
1415                 bucket.m_node.m_next->ptr()->~T();
1416                 ::delete bucket.m_node.m_next;
1417                 // Establish the new link.
1418                 bucket.m_node.m_next = tmp;
1419                 return bucket.begin();
1420             }
1421         } else {
1422             auto b_it = bucket.begin();
1423             auto prev_b_it = b_it;
1424             const auto b_it_f = bucket.end();
1425             ++b_it;
1426             for (; b_it != b_it_f; ++b_it, ++prev_b_it) {
1427                 if (&*b_it == &*it) {
1428                     // Assign to the previous element the next link of the current one.
1429                     prev_b_it.m_ptr->m_next = b_it.m_ptr->m_next;
1430                     // Delete the current one.
1431                     b_it.m_ptr->ptr()->~T();
1432                     ::delete b_it.m_ptr;
1433                     break;
1434                 };
1435             }
1436             // We never want to go through the whole list, it means the element
1437             // to which 'it' refers is not here: assert that the iterator we just
1438             // erased was not end() - i.e., it was pointing to something.
1439             piranha_assert(b_it.m_ptr);
1440             // Move forward the iterator that originally preceded the erased item.
1441             // It will now point to the item past the erased one or to the local end().
1442             return ++prev_b_it;
1443         }
1444     }
1445     //@}
1446 private:
1447     pack_type m_pack;
1448     size_type m_log2_size;
1449     size_type m_n_elements;
1450 };
1451 
1452 template <typename T, typename Hash, typename Pred>
1453 typename hash_set<T, Hash, Pred>::node hash_set<T, Hash, Pred>::list::terminator;
1454 
1455 template <typename T, typename Hash, typename Pred>
1456 const typename hash_set<T, Hash, Pred>::size_type hash_set<T, Hash, Pred>::m_n_nonzero_sizes;
1457 
1458 inline namespace impl
1459 {
1460 
1461 // Enablers for boost s11n.
1462 template <typename Archive, typename T, typename Hash, typename Pred>
1463 using hash_set_boost_save_enabler
1464     = enable_if_t<conjunction<has_boost_save<Archive, T>,
1465                               has_boost_save<Archive, typename hash_set<T, Hash, Pred>::size_type>>::value>;
1466 
1467 template <typename Archive, typename T, typename Hash, typename Pred>
1468 using hash_set_boost_load_enabler
1469     = enable_if_t<conjunction<has_boost_load<Archive, T>,
1470                               has_boost_load<Archive, typename hash_set<T, Hash, Pred>::size_type>>::value>;
1471 }
1472 
1473 /// Specialisation of piranha::boost_save() for piranha::hash_set.
1474 /**
1475  * \note
1476  * This specialisation is enabled only if \p T and the size type of piranha::hash_set satisfy
1477  * piranha::has_boost_save.
1478  *
1479  * The hashing functor and the equality predicate are not serialized.
1480  *
1481  * @throws unspecified any exception thrown by piranha::boost_save().
1482  */
1483 template <typename Archive, typename T, typename Hash, typename Pred>
1484 struct boost_save_impl<Archive, hash_set<T, Hash, Pred>, hash_set_boost_save_enabler<Archive, T, Hash, Pred>>
1485     : boost_save_via_boost_api<Archive, hash_set<T, Hash, Pred>> {
1486 };
1487 
1488 /// Specialisation of piranha::boost_load() for piranha::hash_set.
1489 /**
1490  * \note
1491  * This specialisation is enabled only if \p T and the size type of piranha::hash_set satisfy
1492  * piranha::has_boost_load.
1493  *
1494  * In case duplicate elements are encountered during deserialization, an exception will be raised. Before performing
1495  * the deserialization, the output piranha::hash_set is reset with a default-constructed instance of piranha::hash_set.
1496  * The hashing functor and the equality predicate are not deserialized. The basic exception safety guarantee is
1497  * provided.
1498  *
1499  * @throws std::invalid_argument if a duplicate element is encountered during deserialization.
1500  * @throws unspecified any exception thrown by:
1501  * - the public interface of piranha::hash_set,
1502  * - piranha::boost_load(),
1503  * - <tt>boost::numeric_cast()</tt>.
1504  */
1505 template <typename Archive, typename T, typename Hash, typename Pred>
1506 struct boost_load_impl<Archive, hash_set<T, Hash, Pred>, hash_set_boost_load_enabler<Archive, T, Hash, Pred>>
1507     : boost_load_via_boost_api<Archive, hash_set<T, Hash, Pred>> {
1508 };
1509 
1510 #if defined(PIRANHA_WITH_MSGPACK)
1511 
1512 inline namespace impl
1513 {
1514 
1515 // Enablers for msgpack s11n.
1516 template <typename Stream, typename T, typename Hash, typename Pred>
1517 using hash_set_msgpack_pack_enabler
1518     = enable_if_t<conjunction<is_msgpack_stream<Stream>,
1519                               has_safe_cast<std::uint32_t, typename hash_set<T, Hash, Pred>::size_type>,
1520                               has_msgpack_pack<Stream, T>>::value>;
1521 
1522 template <typename T>
1523 using hash_set_msgpack_convert_enabler = enable_if_t<has_msgpack_convert<T>::value>;
1524 }
1525 
1526 /// Specialisation of piranha::msgpack_pack() for piranha::hash_set.
1527 /**
1528  * \note
1529  * This specialisation is enabled only if
1530  * - \p Stream satisfies piranha::is_msgpack_stream,
1531  * - \p T satisfies piranha::has_msgpack_pack,
1532  * - the size type of piranha::hash_set is safely convertible to \p std::uint32_t.
1533  */
1534 template <typename Stream, typename T, typename Hash, typename Pred>
1535 struct msgpack_pack_impl<Stream, hash_set<T, Hash, Pred>, hash_set_msgpack_pack_enabler<Stream, T, Hash, Pred>> {
1536     /// Call operator.
1537     /**
1538      * This method will serialize \p h into \p p using the format \p f. The hashing functor and the equality predicate
1539      * are not serialized. The msgpack representation of a piranha::hash_set consists of an array containing the
1540      * items in the set.
1541      *
1542      * @param p the target packer.
1543      * @param h the piranha::hash_set that will be serialized.
1544      * @param f the desired piranha::msgpack_format.
1545      *
1546      * @throws unspecified any exception thrown by:
1547      * - the public interface of <tt>msgpack::packer</tt>,
1548      * - piranha::safe_cast(),
1549      * - piranha::msgpack_pack().
1550      */
operator ()piranha::msgpack_pack_impl1551     void operator()(msgpack::packer<Stream> &p, const hash_set<T, Hash, Pred> &h, msgpack_format f) const
1552     {
1553         msgpack_pack_range(p, h.begin(), h.end(), h.size(), f);
1554     }
1555 };
1556 
1557 /// Specialisation of piranha::msgpack_convert() for piranha::hash_set.
1558 /**
1559  * \note
1560  * This specialisation is enabled only if \p T satisfies piranha::has_msgpack_convert.
1561  */
1562 template <typename T, typename Hash, typename Pred>
1563 struct msgpack_convert_impl<hash_set<T, Hash, Pred>, hash_set_msgpack_convert_enabler<T>> {
1564     /// Call operator.
1565     /**
1566      * This method will convert the input object \p o into \p h using the format \p f. In case duplicate elements
1567      * are encountered during deserialization, an exception will be raised. Before performing the deserialization,
1568      * \p h is reset with a default-constructed instance of piranha::hash_set. The hashing functor and the equality
1569      * predicate are not deserialized. This method provides the basic exception safety guarantee.
1570      *
1571      * @param h the target piranha::hash_set.
1572      * @param o the source <tt>msgpack::object</tt>.
1573      * @param f the desired piranha::msgpack_format.
1574      *
1575      * @throws std::invalid_argument if a duplicate element is encountered during deserialization.
1576      * @throws unspecified any exception thrown by:
1577      * - the public interface of piranha::hash_set and <tt>msgpack::object</tt>,
1578      * - <tt>boost::numeric_cast()</tt>,
1579      * - piranha::msgpack_convert().
1580      */
operator ()piranha::msgpack_convert_impl1581     void operator()(hash_set<T, Hash, Pred> &h, const msgpack::object &o, msgpack_format f) const
1582     {
1583         // Clear out the retval.
1584         h = hash_set<T, Hash, Pred>{};
1585         // Extract the array of items as a vector of objects.
1586         std::vector<msgpack::object> items;
1587         o.convert(items);
1588         // Prepare the number of buckets.
1589         h.rehash(boost::numeric_cast<typename hash_set<T, Hash, Pred>::size_type>(
1590             std::ceil(static_cast<double>(items.size()) / h.max_load_factor())));
1591         // Deserialize the items.
1592         for (const auto &obj : items) {
1593             T tmp;
1594             msgpack_convert(tmp, obj, f);
1595             const auto p = h.insert(std::move(tmp));
1596             if (unlikely(!p.second)) {
1597                 piranha_throw(std::invalid_argument, "while deserializing a hash_set from a msgpack object "
1598                                                      "a duplicate value was encountered");
1599             }
1600         }
1601     }
1602 };
1603 
1604 #endif
1605 }
1606 
1607 #endif
1608