1 /** @file
2
3 Instrusive hash map.
4
5 @section license License
6
7 Licensed to the Apache Software Foundation (ASF) under one
8 or more contributor license agreements. See the NOTICE file
9 distributed with this work for additional information
10 regarding copyright ownership. The ASF licenses this file
11 to you under the Apache License, Version 2.0 (the
12 "License"); you may not use this file except in compliance
13 with the License. You may obtain a copy of the License at
14
15 http://www.apache.org/licenses/LICENSE-2.0
16
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
22 */
23
24 #pragma once
25
26 #include <vector>
27 #include <array>
28 #include <algorithm>
29 #include "tscpp/util/IntrusiveDList.h"
30
31 /** Intrusive Hash Table.
32
33 Values stored in this container are not destroyed when the container is destroyed or removed from the container.
34 They must be released by the client.
35
36 Duplicate keys are allowed. Clients must walk the list for multiple entries.
37 @see @c Location::operator++()
38
39 By default the table automatically expands to limit the average chain length. This can be tuned. If set
40 to @c MANUAL then the table will expand @b only when explicitly requested to do so by the client.
41 @see @c ExpansionPolicy
42 @see @c setExpansionPolicy()
43 @see @c setExpansionLimit()
44 @see @c expand()
45
46 The hash table is configured by a descriptor class. This must contain the following members
47
48 - The static method <tt>key_type key_of(value_type *)</tt> which returns the key for an instance of @c value_type.
49
50 - The static method <tt>bool equal(key_type lhs, key_type rhs)</tt> which checks if two instances of @c Key are the same.
51
52 - The static method <tt>hash_id hash_of(key_type)</tt> which computes the hash value of the key. @c ID must a numeric type.
53
54 - The static method <tt>value_type *& next_ptr(value_type *)</tt> which returns a reference to a forward pointer.
55
56 - The static method <tt>value_type *& prev_ptr(value_type *)</tt> which returns a reference to a backwards pointer.
57
58 These are the required members, it is permitted to have other methods (if the descriptor is used for other purposes)
59 or to provide overloads of the methods. Note this is compatible with @c IntrusiveDList.
60
61 Several internal types are deduced from these arguments.
62
63 @a Key is the return type of @a key_of and represents the key that distinguishes instances of @a value_type. Two
64 instances of @c value_type are considered the same if their respective @c Key values are @c equal. @c Key is
65 presumed cheap to copy. If the underlying key is not a simple type then @a Key should be a constant pointer or a
66 constant reference. The hash table will never attempt to modify a key.
67
68 @a ID The numeric type that is the hash value for an instance of @a Key.
69
70 Example for @c Http1ServerSession keyed by the origin server IP address.
71
72 @code
73 struct Descriptor {
74 static sockaddr const* key_of(Http1ServerSession const* value) { return &value->ip.sa }
75 static bool equal(sockaddr const* lhs, sockaddr const* rhs) { return ats_ip_eq(lhs, rhs); }
76 static uint32_t hash_of(sockaddr const* key) { return ats_ip_hash(key); }
77 static Http1ServerSession *& next_ptr(Http1ServerSession * ssn) { return ssn->_next; }
78 static Http1ServerSession *& prev_ptr(Http1ServerSession * ssn) { return ssn->_prev; }
79 };
80 using Table = IntrusiveHashMap<Descriptor>;
81 @endcode
82
83 */
84 template <typename H> class IntrusiveHashMap
85 {
86 using self_type = IntrusiveHashMap;
87
88 public:
89 /// Type of elements in the map.
90 using value_type = typename std::remove_pointer<typename std::remove_reference<decltype(H::next_ptr(nullptr))>::type>::type;
91 /// Key type for the elements.
92 using key_type = decltype(H::key_of(static_cast<value_type *>(nullptr)));
93 /// The numeric hash ID computed from a key.
94 using hash_id = decltype(H::hash_of(H::key_of(static_cast<value_type *>(nullptr))));
95
96 /// When the hash table is expanded.
97 enum ExpansionPolicy {
98 MANUAL, ///< Client must explicitly expand the table.
99 AVERAGE, ///< Table expands if average chain length exceeds limit. [default]
100 MAXIMUM ///< Table expands if any chain length exceeds limit.
101 };
102
103 protected:
104 /** List of elements.
105 * All table elements are in this list. The buckets reference their starting element in the list, or nothing if
106 * no elements are in that bucket.
107 */
108 using List = ts::IntrusiveDList<H>;
109
110 /// A bucket for the hash map.
111 struct Bucket {
112 /// Support for ts::IntrusiveDList<Bucket::Linkage>, definitions and link storage.
113 struct Linkage {
114 static Bucket *&next_ptr(Bucket *b); ///< Access next pointer.
115 static Bucket *&prev_ptr(Bucket *b); ///< Access prev pointer.
116 Bucket *_prev{nullptr}; ///< Prev pointer.
117 Bucket *_next{nullptr}; ///< Next pointer.
118 } _link;
119
120 value_type *_v{nullptr}; ///< First element in the bucket.
121 size_t _count{0}; ///< Number of elements in the bucket.
122
123 /** Marker for the chain having different keys.
124
125 This is used to determine if expanding the hash map would be useful - buckets that are not mixed
126 will not be changed by an expansion.
127 */
128 bool _mixed_p{false};
129
130 /// Compute the limit value for iteration in this bucket.
131 /// This is the value of the next bucket, or @c nullptr if no next bucket.
132 value_type *limit() const;
133
134 /// Verify @a v is in this bucket.
135 bool contains(value_type *v) const;
136
137 void clear(); ///< Reset to initial state.
138 };
139
140 public:
141 /// The default starting number of buckets.
142 static size_t constexpr DEFAULT_BUCKET_COUNT = 7; ///< POOMA.
143 /// The default expansion policy limit.
144 static size_t constexpr DEFAULT_EXPANSION_LIMIT = 4; ///< Value from previous version.
145 /// Expansion policy if not specified in constructor.
146 static ExpansionPolicy constexpr DEFAULT_EXPANSION_POLICY = AVERAGE;
147
148 using iterator = typename List::iterator;
149 using const_iterator = typename List::const_iterator;
150
151 /// A range of elements in the map.
152 /// It is a half open range, [first, last) in the usual STL style.
153 /// @internal I tried @c std::pair as a base for this but was unable to get STL container operations to work.
154 struct range : public std::pair<iterator, iterator> {
155 using super_type = std::pair<iterator, iterator>; ///< Super type.
156 using super_type::super_type; ///< Use super type constructors.
157
158 // These methods enable treating the range as a view in to the hash map.
159
160 /// Return @a first
161 iterator const &begin() const;
162 /// Return @a last
163 iterator const &end() const;
164 };
165
166 /// A range of constant elements in the map.
167 struct const_range : public std::pair<const_iterator, const_iterator> {
168 using super_type = std::pair<const_iterator, const_iterator>; ///< Super type.
169
170 /// Allow implicit conversion of range to const_range.
171 const_range(range const &r);
172
173 using super_type::super_type; ///< Use super type constructors.
174
175 // These methods enable treating the range as a view in to the hash map.
176
177 /// Return @a first
178 const_iterator const &begin() const;
179 /// Return @a last
180 const_iterator const &end() const;
181 };
182
183 /// Construct, starting with @n buckets.
184 /// This doubles as the default constructor.
185 IntrusiveHashMap(size_t n = DEFAULT_BUCKET_COUNT);
186
187 /** Remove all values from the table.
188
189 The values are not cleaned up. The values are not touched in this method, therefore it is safe
190 to destroy them first and then @c clear this table.
191 */
192 self_type &clear();
193
194 iterator begin(); ///< First element.
195 const_iterator begin() const; ///< First element.
196 iterator end(); ///< Past last element.
197 const_iterator end() const; ///< Past last element.
198
199 /** Insert a value in to the table.
200 The @a value must @b NOT already be in a table of this type.
201 @note The value itself is put in the table, @b not a copy.
202 */
203 void insert(value_type *v);
204
205 /** Find an element with a key equal to @a key.
206
207 @return A element with a matching key, or the end iterator if not found.
208 */
209 const_iterator find(key_type key) const;
210 iterator find(key_type key);
211
212 /** Get an iterator for an existing value @a v.
213
214 @return An iterator that references @a v, or the end iterator if @a v is not in the table.
215 */
216 const_iterator find(value_type const *v) const;
217 iterator find(value_type *v);
218
219 /** Find the range of objects with keys equal to @a key.
220
221 @return A iterator pair of [first, last) items with equal keys.
222 */
223 const_range equal_range(key_type key) const;
224 range equal_range(key_type key);
225
226 /** Get an @c iterator for the value @a v.
227
228 This is a bit obscure but needed in certain cases. Because the interface assumes iterators are always valid
229 this avoid containment checks, which is useful if @a v is already known to be in the container.
230 */
231 iterator iterator_for(value_type *v);
232 const_iterator iterator_for(const value_type *v) const;
233
234 /** Remove the value at @a loc from the table.
235
236 @note This does @b not clean up the removed elements. Use carefully to avoid leaks.
237
238 @return An iterator the next value past @a loc. [STL compatibility]
239 */
240 iterator erase(iterator const &loc);
241
242 /// Remove all elements in the @c range @a r.
243 iterator erase(range const &r);
244
245 /// Remove all elements in the range (start, limit]
246 iterator erase(iterator const &start, iterator const &limit);
247
248 /// Remove a @a value from the container.
249 /// @a value is checked for being a member of the container.
250 /// @return @c true if @a value was in the container and removed, @c false if it was not in the container.
251 bool erase(value_type *value);
252
253 /** Apply @a F(value_type&) to every element in the hash map.
254 *
255 * This is similar to a range for loop except the iteration is done in a way where destruction or alternation of
256 * the element does @b not affect the iterator. Primarily this is useful for @c delete to clean up the elements
257 * but it can have other uses.
258 *
259 * @tparam F A functional object of the form <tt>void F(value_type&)</tt>
260 * @param f The function to apply.
261 * @return
262 */
263 template <typename F> self_type &apply(F &&f);
264
265 /** Expand the hash if needed.
266
267 Useful primarily when the expansion policy is set to @c MANUAL.
268 */
269 void expand();
270
271 /// Number of elements in the map.
272 size_t count() const;
273
274 /// Number of buckets in the array.
275 size_t bucket_count() const;
276
277 /// Set the expansion policy to @a policy.
278 self_type &set_expansion_policy(ExpansionPolicy policy);
279
280 /// Get the current expansion policy
281 ExpansionPolicy get_expansion_policy() const;
282
283 /// Set the limit value for the expansion policy.
284 self_type &set_expansion_limit(size_t n);
285
286 /// Set the limit value for the expansion policy.
287 size_t get_expansion_limit() const;
288
289 protected:
290 /// The type of storage for the buckets.
291 using Table = std::vector<Bucket>;
292
293 List _list; ///< Elements in the table.
294 Table _table; ///< Array of buckets.
295
296 /// List of non-empty buckets.
297 ts::IntrusiveDList<typename Bucket::Linkage> _active_buckets;
298
299 Bucket *bucket_for(key_type key);
300
301 ExpansionPolicy _expansion_policy{DEFAULT_EXPANSION_POLICY}; ///< When to expand the table.
302 size_t _expansion_limit{DEFAULT_EXPANSION_LIMIT}; ///< Limit value for expansion.
303
304 // noncopyable
305 IntrusiveHashMap(const IntrusiveHashMap &) = delete;
306 IntrusiveHashMap &operator=(const IntrusiveHashMap &) = delete;
307
308 // Hash table size prime list.
309 static constexpr std::array<size_t, 29> PRIME = {{1, 3, 7, 13, 31, 61, 127, 251,
310 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521,
311 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 16777213,
312 33554393, 67108859, 134217689, 268435399, 536870909}};
313 };
314
315 template <typename H>
316 auto
317 IntrusiveHashMap<H>::Bucket::Linkage::next_ptr(Bucket *b) -> Bucket *&
318 {
319 return b->_link._next;
320 }
321
322 template <typename H>
323 auto
324 IntrusiveHashMap<H>::Bucket::Linkage::prev_ptr(Bucket *b) -> Bucket *&
325 {
326 return b->_link._prev;
327 }
328
329 // This is designed so that if the bucket is empty, then @c nullptr is returned, which will immediately terminate
330 // a search loop on an empty bucket because that will start with a nullptr candidate, matching the limit.
331 template <typename H>
332 auto
333 IntrusiveHashMap<H>::Bucket::limit() const -> value_type *
334 {
335 Bucket *n{_link._next};
336 return n ? n->_v : nullptr;
337 };
338
339 template <typename H>
340 void
clear()341 IntrusiveHashMap<H>::Bucket::clear()
342 {
343 _v = nullptr;
344 _count = 0;
345 _mixed_p = false;
346 // These can be left set during an expansion, when the bucket did have elements before but not
347 // after. Therefore make sure they are cleared.
348 _link._next = _link._prev = nullptr;
349 }
350
351 template <typename H>
352 bool
contains(value_type * v)353 IntrusiveHashMap<H>::Bucket::contains(value_type *v) const
354 {
355 value_type *x = _v;
356 value_type *limit = this->limit();
357 while (x != limit && x != v) {
358 x = H::next_ptr(x);
359 }
360 return x == v;
361 }
362
363 // ---------------------
364 template <typename H>
365 auto
366 IntrusiveHashMap<H>::range::begin() const -> iterator const &
367 {
368 return super_type::first;
369 }
370 template <typename H>
371 auto
372 IntrusiveHashMap<H>::range::end() const -> iterator const &
373 {
374 return super_type::second;
375 }
376
377 template <typename H>
378 auto
379 IntrusiveHashMap<H>::const_range::begin() const -> const_iterator const &
380 {
381 return super_type::first;
382 }
383
384 template <typename H>
385 auto
386 IntrusiveHashMap<H>::const_range::end() const -> const_iterator const &
387 {
388 return super_type::second;
389 }
390
391 // ---------------------
392
IntrusiveHashMap(size_t n)393 template <typename H> IntrusiveHashMap<H>::IntrusiveHashMap(size_t n)
394 {
395 if (n) {
396 _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), n));
397 }
398 }
399
400 template <typename H>
401 auto
402 IntrusiveHashMap<H>::bucket_for(key_type key) -> Bucket *
403 {
404 return &_table[H::hash_of(key) % _table.size()];
405 }
406
407 template <typename H>
408 auto
409 IntrusiveHashMap<H>::begin() -> iterator
410 {
411 return _list.begin();
412 }
413
414 template <typename H>
415 auto
416 IntrusiveHashMap<H>::begin() const -> const_iterator
417 {
418 return _list.begin();
419 }
420
421 template <typename H>
422 auto
423 IntrusiveHashMap<H>::end() -> iterator
424 {
425 return _list.end();
426 }
427
428 template <typename H>
429 auto
430 IntrusiveHashMap<H>::end() const -> const_iterator
431 {
432 return _list.end();
433 }
434
435 template <typename H>
436 auto
437 IntrusiveHashMap<H>::clear() -> self_type &
438 {
439 for (auto &b : _table) {
440 b.clear();
441 }
442 // Clear container data.
443 _list.clear();
444 _active_buckets.clear();
445 return *this;
446 }
447
448 template <typename H>
449 auto
450 IntrusiveHashMap<H>::find(key_type key) -> iterator
451 {
452 Bucket *b = this->bucket_for(key);
453 value_type *v = b->_v;
454 value_type *limit = b->limit();
455 while (v != limit && !H::equal(key, H::key_of(v))) {
456 v = H::next_ptr(v);
457 }
458 return v == limit ? _list.end() : _list.iterator_for(v);
459 }
460
461 template <typename H>
462 auto
463 IntrusiveHashMap<H>::find(key_type key) const -> const_iterator
464 {
465 return const_cast<self_type *>(this)->find(key);
466 }
467
468 template <typename H>
469 auto
470 IntrusiveHashMap<H>::equal_range(key_type key) -> range
471 {
472 iterator first{this->find(key)};
473 iterator last{first};
474 iterator limit{this->end()};
475
476 while (last != limit && H::equal(key, H::key_of(&*last))) {
477 ++last;
478 }
479
480 return range{first, last};
481 }
482
483 template <typename H>
484 auto
485 IntrusiveHashMap<H>::equal_range(key_type key) const -> const_range
486 {
487 return const_cast<self_type *>(this)->equal_range(key);
488 }
489
490 template <typename H>
491 auto
492 IntrusiveHashMap<H>::iterator_for(const value_type *v) const -> const_iterator
493 {
494 return _list.iterator_for(v);
495 }
496
497 template <typename H>
498 auto
499 IntrusiveHashMap<H>::iterator_for(value_type *v) -> iterator
500 {
501 return _list.iterator_for(v);
502 }
503
504 template <typename H>
505 auto
506 IntrusiveHashMap<H>::find(value_type *v) -> iterator
507 {
508 Bucket *b = this->bucket_for(H::key_of(v));
509 return b->contains(v) ? _list.iterator_for(v) : this->end();
510 }
511
512 template <typename H>
513 auto
514 IntrusiveHashMap<H>::find(value_type const *v) const -> const_iterator
515 {
516 return const_cast<self_type *>(this)->find(const_cast<value_type *>(v));
517 }
518
519 template <typename H>
520 void
insert(value_type * v)521 IntrusiveHashMap<H>::insert(value_type *v)
522 {
523 auto key = H::key_of(v);
524 Bucket *bucket = this->bucket_for(key);
525 value_type *spot = bucket->_v;
526 bool mixed_p = false; // Found a different key in the bucket.
527
528 if (nullptr == spot) { // currently empty bucket, set it and add to active list.
529 _list.append(v);
530 bucket->_v = v;
531 _active_buckets.append(bucket);
532 } else {
533 value_type *limit = bucket->limit();
534
535 // First search the bucket to see if the key is already in it.
536 while (spot != limit && !H::equal(key, H::key_of(spot))) {
537 spot = H::next_ptr(spot);
538 }
539 if (spot != bucket->_v) {
540 mixed_p = true; // found some other key, it's going to be mixed.
541 }
542 _list.insert_before(spot, v);
543 if (spot == bucket->_v) { // added before the bucket start, update the start.
544 bucket->_v = v;
545 }
546 bucket->_mixed_p = mixed_p;
547 }
548 ++bucket->_count;
549
550 // auto expand if appropriate.
551 if ((AVERAGE == _expansion_policy && (_list.count() / _table.size()) > _expansion_limit) ||
552 (MAXIMUM == _expansion_policy && bucket->_count > _expansion_limit && bucket->_mixed_p)) {
553 this->expand();
554 }
555 }
556
557 template <typename H>
558 auto
559 IntrusiveHashMap<H>::erase(iterator const &loc) -> iterator
560 {
561 value_type *v = loc;
562 iterator zret = ++(this->iterator_for(v)); // get around no const_iterator -> iterator.
563 Bucket *b = this->bucket_for(H::key_of(v));
564 value_type *nv = H::next_ptr(v);
565 value_type *limit = b->limit();
566 if (b->_v == v) { // removed first element in bucket, update bucket
567 if (limit == nv) { // that was also the only element, deactivate bucket
568 _active_buckets.erase(b);
569 b->clear();
570 } else {
571 b->_v = nv;
572 --b->_count;
573 }
574 }
575 _list.erase(loc);
576 return zret;
577 }
578
579 template <typename H>
580 bool
erase(value_type * value)581 IntrusiveHashMap<H>::erase(value_type *value)
582 {
583 auto loc = this->find(value);
584 if (loc != this->end()) {
585 this->erase(loc);
586 return true;
587 }
588 return false;
589 }
590
591 template <typename H>
592 auto
593 IntrusiveHashMap<H>::erase(iterator const &start, iterator const &limit) -> iterator
594 {
595 auto spot{start};
596 Bucket *bucket{this->bucket_for(spot)};
597 while (spot != limit) {
598 auto target = bucket;
599 bucket = bucket->_link._next; // bump now to avoid forward iteration problems in case of bucket removal.
600 value_type *v_limit = bucket ? bucket->_v : nullptr;
601 while (spot != v_limit && spot != limit) {
602 --(target->_count);
603 ++spot;
604 }
605 if (target->_count == 0) {
606 _active_buckets.erase(target);
607 }
608 }
609 _list.erase(start, limit);
610 return _list.iterator_for(limit); // convert from const_iterator back to iterator
611 };
612
613 template <typename H>
614 auto
615 IntrusiveHashMap<H>::erase(range const &r) -> iterator
616 {
617 return this->erase(r.first, r.second);
618 }
619
620 namespace detail
621 {
622 // Make @c apply more convenient by allowing the function to take a reference type or pointer type to the container
623 // elements. The pointer type is the base, plus a shim to convert from a reference type functor to a pointer pointer
624 // type. The complex return type definition forces only one, but not both, to be valid for a particular functor. This
625 // also must be done via free functions and not method overloads because the compiler forces a match up of method
626 // definitions and declarations before any template instantiation.
627
628 template <typename H, typename F>
629 auto
630 IntrusiveHashMapApply(IntrusiveHashMap<H> &map, F &&f)
631 -> decltype(f(*static_cast<typename IntrusiveHashMap<H>::value_type *>(nullptr)), map)
632 {
633 return map.apply([&f](typename IntrusiveHashMap<H>::value_type *v) { return f(*v); });
634 }
635
636 template <typename H, typename F>
637 auto
638 IntrusiveHashMapApply(IntrusiveHashMap<H> &map, F &&f)
639 -> decltype(f(static_cast<typename IntrusiveHashMap<H>::value_type *>(nullptr)), map)
640 {
641 auto spot{map.begin()};
642 auto limit{map.end()};
643 while (spot != limit) {
644 f(spot++); // post increment means @a spot is updated before @a f is applied.
645 }
646 return map;
647 }
648 } // namespace detail
649
650 template <typename H>
651 template <typename F>
652 auto
653 IntrusiveHashMap<H>::apply(F &&f) -> self_type &
654 {
655 return detail::IntrusiveHashMapApply(*this, f);
656 };
657
658 template <typename H>
659 void
expand()660 IntrusiveHashMap<H>::expand()
661 {
662 ExpansionPolicy org_expansion_policy = _expansion_policy; // save for restore.
663 value_type *old = _list.head(); // save for repopulating.
664 auto old_size = _table.size();
665
666 // Reset to empty state.
667 this->clear();
668 _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), old_size + 1));
669
670 _expansion_policy = MANUAL; // disable any auto expand while we're expanding.
671 while (old) {
672 value_type *v = old;
673 old = H::next_ptr(old);
674 this->insert(v);
675 }
676 // stashed array gets cleaned up when @a tmp goes out of scope.
677 _expansion_policy = org_expansion_policy; // reset to original value.
678 }
679
680 template <typename H>
681 size_t
count()682 IntrusiveHashMap<H>::count() const
683 {
684 return _list.count();
685 }
686
687 template <typename H>
688 size_t
bucket_count()689 IntrusiveHashMap<H>::bucket_count() const
690 {
691 return _table.size();
692 }
693
694 template <typename H>
695 auto
696 IntrusiveHashMap<H>::set_expansion_policy(ExpansionPolicy policy) -> self_type &
697 {
698 _expansion_policy = policy;
699 return *this;
700 }
701
702 template <typename H>
703 auto
704 IntrusiveHashMap<H>::get_expansion_policy() const -> ExpansionPolicy
705 {
706 return _expansion_policy;
707 }
708
709 template <typename H>
710 auto
711 IntrusiveHashMap<H>::set_expansion_limit(size_t n) -> self_type &
712 {
713 _expansion_limit = n;
714 return *this;
715 }
716
717 template <typename H>
718 size_t
get_expansion_limit()719 IntrusiveHashMap<H>::get_expansion_limit() const
720 {
721 return _expansion_limit;
722 }
723 /* ---------------------------------------------------------------------------------------------- */
724