1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 //    names, trademarks, service marks, or product names of the Licensor
11 //    and its affiliates, except as required to comply with Section 4(c) of
12 //    the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 //     http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_DENSE_HASH_SET_H
25 #define PXR_BASE_TF_DENSE_HASH_SET_H
26 
27 /// \file tf/denseHashSet.h
28 
29 #include "pxr/pxr.h"
30 #include "pxr/base/tf/hashmap.h"
31 
32 #include <memory>
33 #include <vector>
34 
35 #include <boost/compressed_pair.hpp>
36 #include <boost/iterator/iterator_facade.hpp>
37 #include <boost/utility.hpp>
38 
39 PXR_NAMESPACE_OPEN_SCOPE
40 
41 /// \class TfDenseHashSet
42 ///
43 /// This is a space efficient container that mimics the TfHashSet API that
44 /// uses a vector for storage when the size of the set is small.
45 ///
46 /// When the set gets bigger than \p Threshold a TfHashMap is allocated
47 /// that is used to accelerate lookup in the vector.
48 ///
49 /// \warning This differs from a TfHashSet in so far that inserting and
50 /// removing elements invalidate all iterators of the container.
51 ///
52 template <
53     class    Element,
54     class    HashFn,
55     class    EqualElement  = std::equal_to<Element>,
56     unsigned Threshold = 128
57 >
58 class TfDenseHashSet
59 {
60 public:
61 
62     typedef Element value_type;
63 
64 ////////////////////////////////////////////////////////////////////////////////
65 
66 private:
67 
68     // The vector type holding all data for this dense hash set.
69     typedef std::vector<Element> _Vector;
70 
71     // The hash map used when the map holds more than Threshold elements.
72     typedef TfHashMap<Element, size_t, HashFn, EqualElement> _HashMap;
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 
76 public:
77 
78     /// An iterator type for this set.  Note that this one is const as well,
79     /// as we can't allow in-place modification of elements due to the
80     /// potentially allocated hash map.
81     typedef typename _Vector::const_iterator iterator;
82 
83     /// A const_iterator type for this set.
84     typedef typename _Vector::const_iterator const_iterator;
85 
86     /// Return type for insert() method.
87     typedef std::pair<const_iterator, bool> insert_result;
88 
89 public:
90 
91     /// Ctor.
92     ///
93     explicit TfDenseHashSet(
94         const HashFn       &hashFn       = HashFn(),
95         const EqualElement &equalElement = EqualElement())
96     {
97         _hash() = hashFn;
98         _equ()  = equalElement;
99     }
100 
101     /// Copy Ctor.
102     ///
TfDenseHashSet(const TfDenseHashSet & rhs)103     TfDenseHashSet(const TfDenseHashSet &rhs)
104     :   _vectorHashFnEqualFn(rhs._vectorHashFnEqualFn) {
105         if (rhs._h) {
106             _h.reset(new _HashMap(*rhs._h));
107         }
108     }
109 
110     /// Move Ctor.
111     ///
112     TfDenseHashSet(TfDenseHashSet &&rhs) = default;
113 
114     /// Construct from range.
115     ///
116     template <class Iterator>
TfDenseHashSet(Iterator begin,Iterator end)117     TfDenseHashSet(Iterator begin, Iterator end) {
118         insert(begin, end);
119     }
120 
121     /// Construct from an initializer_list.
122     ///
TfDenseHashSet(std::initializer_list<Element> l)123     TfDenseHashSet(std::initializer_list<Element> l) {
124         insert(l.begin(), l.end());
125     }
126 
127     /// Copy assignment operator.
128     ///
129     TfDenseHashSet &operator=(const TfDenseHashSet &rhs) {
130         if (this != &rhs) {
131             TfDenseHashSet temp(rhs);
132             temp.swap(*this);
133         }
134         return *this;
135     }
136 
137     /// Move assignment operator.
138     ///
139     TfDenseHashSet &operator=(TfDenseHashSet &&rhs) = default;
140 
141     /// Assignment from an initializer_list.
142     ///
143     TfDenseHashSet &operator=(std::initializer_list<Element> l) {
144         clear();
145         insert(l.begin(), l.end());
146         return *this;
147     }
148 
149     /// Equality operator.
150     ///
151     bool operator==(const TfDenseHashSet &rhs) const {
152 
153         if (size() != rhs.size())
154             return false;
155 
156         //XXX: Should we compare the HashFn and EqualElement too?
157         const_iterator tend = end();
158 
159         for(const_iterator iter = begin(); iter != tend; ++iter) {
160             if (!rhs.count(*iter))
161                 return false;
162         }
163 
164         return true;
165     }
166 
167     bool operator!=(const TfDenseHashSet &rhs) const {
168         return !(*this == rhs);
169     }
170 
171     /// Erases all of the elements
172     ///
clear()173     void clear() {
174         _vec().clear();
175         _h.reset();
176     }
177 
178     /// Swaps the contents of two sets.
179     ///
swap(TfDenseHashSet & rhs)180     void swap(TfDenseHashSet &rhs) {
181         _vectorHashFnEqualFn.swap(rhs._vectorHashFnEqualFn);
182         _h.swap(rhs._h);
183     }
184 
185     /// \c true if the \c set's size is 0.
186     ///
empty()187     bool empty() const {
188         return _vec().empty();
189     }
190 
191     /// Returns the size of the set.
192     ///
size()193     size_t size() const {
194         return _vec().size();
195     }
196 
197     /// Returns an const_iterator pointing to the beginning of the set.
198     ///
begin()199     const_iterator begin() const {
200         return _vec().begin();
201     }
202 
203     /// Returns an const_iterator pointing to the end of the set.
204     ///
end()205     const_iterator end() const {
206         return _vec().end();
207     }
208 
209     /// Finds the element with key \p k.
210     ///
find(const Element & k)211     const_iterator find(const Element &k) const {
212 
213         if (_h) {
214             typename _HashMap::const_iterator iter = _h->find(k);
215             if (iter == _h->end())
216                 return end();
217 
218             return _vec().begin() + iter->second;
219         }
220 
221         typename _Vector::const_iterator iter, end = _vec().end();
222 
223         for(iter = _vec().begin(); iter != end; ++iter)
224             if (_equ()(*iter, k))
225                 break;
226 
227         return iter;
228     }
229 
230     /// Returns the number of elements with key \p k.  Which is either 0 or 1.
231     ///
count(const Element & k)232     size_t count(const Element &k) const {
233         return find(k) != end();
234     }
235 
236     /// Returns a pair of <iterator, bool> where iterator points to the element
237     /// in the list and bool is true if a new element was inserted.
238     ///
insert(const value_type & v)239     insert_result insert(const value_type &v) {
240 
241         if (_h) {
242 
243             // Attempt to insert the new index.  If this fails, we can't
244             // insert v.
245 
246             std::pair<typename _HashMap::iterator, bool> res =
247                 _h->insert(std::make_pair(v, size()));
248 
249             if (!res.second)
250                 return insert_result(_vec().begin() + res.first->second, false);
251 
252         } else {
253 
254             // Bail if already inserted.
255             const_iterator iter = find(v);
256             if (iter != end())
257                 return insert_result(iter, false);
258         }
259 
260         // Insert at end and create table if necessary.
261         _vec().push_back(v);
262         _CreateTableIfNeeded();
263 
264         return insert_result(std::prev(end()), true);
265     }
266 
267     /// Insert a range into the hash set.  Note that \p i0 and \p i1 can't
268     /// point into the hash set.
269     ///
270     template<class IteratorType>
insert(IteratorType i0,IteratorType i1)271     void insert(IteratorType i0, IteratorType i1) {
272         // Assume elements are more often than not unique, so if the sum of the
273         // current size and the size of the range is greater than or equal to
274         // the threshold, we create the table immediately so we don't do m*n
275         // work before creating the table.
276         if (size() + std::distance(i0, i1) >= Threshold)
277             _CreateTable();
278 
279         // Insert elements.
280         for (IteratorType iter = i0; iter != i1; ++iter)
281             insert(*iter);
282     }
283 
284     /// Insert a range of unique elements into the container.  [begin, end)
285     /// *must not* contain any duplicate elements.
286     ///
287     template <class Iterator>
insert_unique(Iterator begin,Iterator end)288     void insert_unique(Iterator begin, Iterator end) {
289         // Special-case empty container.
290         if (empty()) {
291             _vec().assign(begin, end);
292             _CreateTableIfNeeded();
293         } else {
294             // Just insert, since duplicate checking will use the hash.
295             insert(begin, end);
296         }
297     }
298 
299     /// Erase element with key \p k.  Returns the number of elements erased.
300     ///
erase(const Element & k)301     size_t erase(const Element &k) {
302 
303         const_iterator iter = find(k);
304         if (iter != end()) {
305             erase(iter);
306             return 1;
307         }
308         return 0;
309     }
310 
311     /// Erases element pointed to by \p iter.
312     ///
erase(const iterator & iter)313     void erase(const iterator &iter) {
314 
315         // Erase key from hash table if applicable.
316         if (_h)
317             _h->erase(*iter);
318 
319         // If we are not removing that last element...
320         if (iter != std::prev(end())) {
321             using std::swap;
322 
323             // ... move the last element into the erased placed.
324             // Note that we can cast constness away because we explicitly update
325             // the TfHashMap _h below.
326             swap(*const_cast<Element *>(&(*iter)), _vec().back());
327 
328             // ... and update the moved element's index.
329             if (_h)
330                 (*_h)[*iter] = iter - _vec().begin();
331         }
332 
333         _vec().pop_back();
334     }
335 
336     /// Erases a range from the set.
337     ///
erase(const iterator & i0,const iterator & i1)338     void erase(const iterator &i0, const iterator &i1) {
339 
340         if (_h) {
341             for(const_iterator iter = i0; iter != i1; ++iter)
342                 _h->erase(iter->first);
343         }
344 
345         const_iterator vremain = _vec().erase(i0, i1);
346 
347         if (_h) {
348             for(; vremain != _vec().end(); ++vremain)
349                 (*_h)[*vremain] = vremain - _vec().begin();
350         }
351     }
352 
353     /// Optimize storage space.
354     ///
shrink_to_fit()355     void shrink_to_fit() {
356 
357         // Shrink the vector to best size.
358         _vec().shrink_to_fit();
359 
360         if (!_h)
361             return;
362 
363         size_t sz = size();
364 
365         // If we have a hash map and are underneath the threshold, discard it.
366         if (sz < Threshold) {
367 
368             _h.reset();
369 
370         } else {
371 
372             // Otherwise, allocate a new hash map with the optimal size.
373             _h.reset(new _HashMap(sz, _hash(), _equ()));
374             for(size_t i=0; i<sz; ++i)
375                 (*_h)[_vec()[i]] = i;
376         }
377     }
378 
379     /// Index into set via \p index.
380     ///
381     const Element &operator[](size_t index) const {
382         TF_VERIFY(index < size());
383         return _vec()[index];
384     }
385 
386 ////////////////////////////////////////////////////////////////////////////////
387 
388 private:
389 
390     // Helper to access the storage vector.
_vec()391     _Vector &_vec() {
392         return _vectorHashFnEqualFn.first().first();
393     }
394 
395     // Helper to access the hash functor.
_hash()396     HashFn &_hash() {
397         return _vectorHashFnEqualFn.first().second();
398     }
399 
400     // Helper to access the equality functor.
_equ()401     EqualElement &_equ() {
402         return _vectorHashFnEqualFn.second();
403     }
404 
405     // Helper to access the storage vector.
_vec()406     const _Vector &_vec() const {
407         return _vectorHashFnEqualFn.first().first();
408     }
409 
410     // Helper to access the hash functor.
_hash()411     const HashFn &_hash() const {
412         return _vectorHashFnEqualFn.first().second();
413     }
414 
415     // Helper to access the equality functor.
_equ()416     const EqualElement &_equ() const {
417         return _vectorHashFnEqualFn.second();
418     }
419 
420     // Helper to create the acceleration table if size dictates.
_CreateTableIfNeeded()421     inline void _CreateTableIfNeeded() {
422         if (size() >= Threshold) {
423             _CreateTable();
424         }
425     }
426 
427     // Unconditionally create the acceleration table if it doesn't already
428     // exist.
_CreateTable()429     inline void _CreateTable() {
430         if (!_h) {
431             _h.reset(new _HashMap(Threshold, _hash(), _equ()));
432             for(size_t i=0; i < size(); ++i)
433                 (*_h)[_vec()[i]] = i;
434         }
435     }
436 
437     // Vector holding all elements along with the EqualElement functor.  Since
438     // sizeof(EqualElement) == 0 in many cases we use a compressed_pair to not
439     // pay a size penalty.
440 
441     typedef
442         boost::compressed_pair<
443             boost::compressed_pair<_Vector, HashFn>,
444             EqualElement>
445         _VectorHashFnEqualFn;
446 
447     _VectorHashFnEqualFn _vectorHashFnEqualFn;
448 
449     // Optional hash map that maps from keys to vector indices.
450     std::unique_ptr<_HashMap> _h;
451 };
452 
453 PXR_NAMESPACE_CLOSE_SCOPE
454 
455 #endif // PXR_BASE_TF_DENSE_HASH_SET_H
456