1 /** @file
2 
3   Instrusive hash map.
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22 */
23 
24 #pragma once
25 
26 #include <vector>
27 #include <array>
28 #include <algorithm>
29 #include "tscpp/util/IntrusiveDList.h"
30 
31 /** Intrusive Hash Table.
32 
33     Values stored in this container are not destroyed when the container is destroyed or removed from the container.
34     They must be released by the client.
35 
36     Duplicate keys are allowed. Clients must walk the list for multiple entries.
37     @see @c Location::operator++()
38 
39     By default the table automatically expands to limit the average chain length. This can be tuned. If set
40     to @c MANUAL then the table will expand @b only when explicitly requested to do so by the client.
41     @see @c ExpansionPolicy
42     @see @c setExpansionPolicy()
43     @see @c setExpansionLimit()
44     @see @c expand()
45 
46     The hash table is configured by a descriptor class. This must contain the following members
47 
48     - The static method <tt>key_type key_of(value_type *)</tt> which returns the key for an instance of @c value_type.
49 
50     - The static method <tt>bool equal(key_type lhs, key_type rhs)</tt> which checks if two instances of @c Key are the same.
51 
52     - The static method <tt>hash_id hash_of(key_type)</tt> which computes the hash value of the key. @c ID must a numeric type.
53 
54     - The static method <tt>value_type *& next_ptr(value_type *)</tt> which returns a reference to a forward pointer.
55 
56     - The static method <tt>value_type *& prev_ptr(value_type *)</tt> which returns a reference to a backwards pointer.
57 
58     These are the required members, it is permitted to have other methods (if the descriptor is used for other purposes)
59     or to provide overloads of the methods. Note this is compatible with @c IntrusiveDList.
60 
61     Several internal types are deduced from these arguments.
62 
63     @a Key is the return type of @a key_of and represents the key that distinguishes instances of @a value_type. Two
64     instances of @c value_type are considered the same if their respective @c Key values are @c equal. @c Key is
65     presumed cheap to copy. If the underlying key is not a simple type then @a Key should be a constant pointer or a
66     constant reference. The hash table will never attempt to modify a key.
67 
68     @a ID The numeric type that is the hash value for an instance of @a Key.
69 
70     Example for @c Http1ServerSession keyed by the origin server IP address.
71 
72     @code
73     struct Descriptor {
74       static sockaddr const* key_of(Http1ServerSession const* value) { return &value->ip.sa }
75       static bool equal(sockaddr const* lhs, sockaddr const* rhs) { return ats_ip_eq(lhs, rhs); }
76       static uint32_t hash_of(sockaddr const* key) { return ats_ip_hash(key); }
77       static Http1ServerSession *& next_ptr(Http1ServerSession * ssn) { return ssn->_next; }
78       static Http1ServerSession *& prev_ptr(Http1ServerSession * ssn) { return ssn->_prev; }
79     };
80     using Table = IntrusiveHashMap<Descriptor>;
81     @endcode
82 
83  */
84 template <typename H> class IntrusiveHashMap
85 {
86   using self_type = IntrusiveHashMap;
87 
88 public:
89   /// Type of elements in the map.
90   using value_type = typename std::remove_pointer<typename std::remove_reference<decltype(H::next_ptr(nullptr))>::type>::type;
91   /// Key type for the elements.
92   using key_type = decltype(H::key_of(static_cast<value_type *>(nullptr)));
93   /// The numeric hash ID computed from a key.
94   using hash_id = decltype(H::hash_of(H::key_of(static_cast<value_type *>(nullptr))));
95 
96   /// When the hash table is expanded.
97   enum ExpansionPolicy {
98     MANUAL,  ///< Client must explicitly expand the table.
99     AVERAGE, ///< Table expands if average chain length exceeds limit. [default]
100     MAXIMUM  ///< Table expands if any chain length exceeds limit.
101   };
102 
103 protected:
104   /** List of elements.
105    * All table elements are in this list. The buckets reference their starting element in the list, or nothing if
106    * no elements are in that bucket.
107    */
108   using List = ts::IntrusiveDList<H>;
109 
110   /// A bucket for the hash map.
111   struct Bucket {
112     /// Support for ts::IntrusiveDList<Bucket::Linkage>, definitions and link storage.
113     struct Linkage {
114       static Bucket *&next_ptr(Bucket *b); ///< Access next pointer.
115       static Bucket *&prev_ptr(Bucket *b); ///< Access prev pointer.
116       Bucket *_prev{nullptr};              ///< Prev pointer.
117       Bucket *_next{nullptr};              ///< Next pointer.
118     } _link;
119 
120     value_type *_v{nullptr}; ///< First element in the bucket.
121     size_t _count{0};        ///< Number of elements in the bucket.
122 
123     /** Marker for the chain having different keys.
124 
125         This is used to determine if expanding the hash map would be useful - buckets that are not mixed
126         will not be changed by an expansion.
127      */
128     bool _mixed_p{false};
129 
130     /// Compute the limit value for iteration in this bucket.
131     /// This is the value of the next bucket, or @c nullptr if no next bucket.
132     value_type *limit() const;
133 
134     /// Verify @a v is in this bucket.
135     bool contains(value_type *v) const;
136 
137     void clear(); ///< Reset to initial state.
138   };
139 
140 public:
141   /// The default starting number of buckets.
142   static size_t constexpr DEFAULT_BUCKET_COUNT = 7; ///< POOMA.
143   /// The default expansion policy limit.
144   static size_t constexpr DEFAULT_EXPANSION_LIMIT = 4; ///< Value from previous version.
145   /// Expansion policy if not specified in constructor.
146   static ExpansionPolicy constexpr DEFAULT_EXPANSION_POLICY = AVERAGE;
147 
148   using iterator       = typename List::iterator;
149   using const_iterator = typename List::const_iterator;
150 
151   /// A range of elements in the map.
152   /// It is a half open range, [first, last) in the usual STL style.
153   /// @internal I tried @c std::pair as a base for this but was unable to get STL container operations to work.
154   struct range : public std::pair<iterator, iterator> {
155     using super_type = std::pair<iterator, iterator>; ///< Super type.
156     using super_type::super_type;                     ///< Use super type constructors.
157 
158     // These methods enable treating the range as a view in to the hash map.
159 
160     /// Return @a first
161     iterator const &begin() const;
162     /// Return @a last
163     iterator const &end() const;
164   };
165 
166   /// A range of constant elements in the map.
167   struct const_range : public std::pair<const_iterator, const_iterator> {
168     using super_type = std::pair<const_iterator, const_iterator>; ///< Super type.
169 
170     /// Allow implicit conversion of range to const_range.
171     const_range(range const &r);
172 
173     using super_type::super_type; ///< Use super type constructors.
174 
175     // These methods enable treating the range as a view in to the hash map.
176 
177     /// Return @a first
178     const_iterator const &begin() const;
179     /// Return @a last
180     const_iterator const &end() const;
181   };
182 
183   /// Construct, starting with @n buckets.
184   /// This doubles as the default constructor.
185   IntrusiveHashMap(size_t n = DEFAULT_BUCKET_COUNT);
186 
187   /** Remove all values from the table.
188 
189       The values are not cleaned up. The values are not touched in this method, therefore it is safe
190       to destroy them first and then @c clear this table.
191   */
192   self_type &clear();
193 
194   iterator begin();             ///< First element.
195   const_iterator begin() const; ///< First element.
196   iterator end();               ///< Past last element.
197   const_iterator end() const;   ///< Past last element.
198 
199   /** Insert a value in to the table.
200       The @a value must @b NOT already be in a table of this type.
201       @note The value itself is put in the table, @b not a copy.
202   */
203   void insert(value_type *v);
204 
205   /** Find an element with a key equal to @a key.
206 
207       @return A element with a matching key, or the end iterator if not found.
208   */
209   const_iterator find(key_type key) const;
210   iterator find(key_type key);
211 
212   /** Get an iterator for an existing value @a v.
213 
214       @return An iterator that references @a v, or the end iterator if @a v is not in the table.
215   */
216   const_iterator find(value_type const *v) const;
217   iterator find(value_type *v);
218 
219   /** Find the range of objects with keys equal to @a key.
220 
221       @return A iterator pair of [first, last) items with equal keys.
222   */
223   const_range equal_range(key_type key) const;
224   range equal_range(key_type key);
225 
226   /** Get an @c iterator for the value @a v.
227 
228       This is a bit obscure but needed in certain cases. Because the interface assumes iterators are always valid
229       this avoid containment checks, which is useful if @a v is already known to be in the container.
230    */
231   iterator iterator_for(value_type *v);
232   const_iterator iterator_for(const value_type *v) const;
233 
234   /** Remove the value at @a loc from the table.
235 
236       @note This does @b not clean up the removed elements. Use carefully to avoid leaks.
237 
238       @return An iterator the next value past @a loc. [STL compatibility]
239   */
240   iterator erase(iterator const &loc);
241 
242   /// Remove all elements in the @c range @a r.
243   iterator erase(range const &r);
244 
245   /// Remove all elements in the range (start, limit]
246   iterator erase(iterator const &start, iterator const &limit);
247 
248   /// Remove a @a value from the container.
249   /// @a value is checked for being a member of the container.
250   /// @return @c true if @a value was in the container and removed, @c false if it was not in the container.
251   bool erase(value_type *value);
252 
253   /** Apply @a F(value_type&) to every element in the hash map.
254    *
255    * This is similar to a range for loop except the iteration is done in a way where destruction or alternation of
256    * the element does @b not affect the iterator. Primarily this is useful for @c delete to clean up the elements
257    * but it can have other uses.
258    *
259    * @tparam F A functional object of the form <tt>void F(value_type&)</tt>
260    * @param f The function to apply.
261    * @return
262    */
263   template <typename F> self_type &apply(F &&f);
264 
265   /** Expand the hash if needed.
266 
267       Useful primarily when the expansion policy is set to @c MANUAL.
268    */
269   void expand();
270 
271   /// Number of elements in the map.
272   size_t count() const;
273 
274   /// Number of buckets in the array.
275   size_t bucket_count() const;
276 
277   /// Set the expansion policy to @a policy.
278   self_type &set_expansion_policy(ExpansionPolicy policy);
279 
280   /// Get the current expansion policy
281   ExpansionPolicy get_expansion_policy() const;
282 
283   /// Set the limit value for the expansion policy.
284   self_type &set_expansion_limit(size_t n);
285 
286   /// Set the limit value for the expansion policy.
287   size_t get_expansion_limit() const;
288 
289 protected:
290   /// The type of storage for the buckets.
291   using Table = std::vector<Bucket>;
292 
293   List _list;   ///< Elements in the table.
294   Table _table; ///< Array of buckets.
295 
296   /// List of non-empty buckets.
297   ts::IntrusiveDList<typename Bucket::Linkage> _active_buckets;
298 
299   Bucket *bucket_for(key_type key);
300 
301   ExpansionPolicy _expansion_policy{DEFAULT_EXPANSION_POLICY}; ///< When to expand the table.
302   size_t _expansion_limit{DEFAULT_EXPANSION_LIMIT};            ///< Limit value for expansion.
303 
304   // noncopyable
305   IntrusiveHashMap(const IntrusiveHashMap &) = delete;
306   IntrusiveHashMap &operator=(const IntrusiveHashMap &) = delete;
307 
308   // Hash table size prime list.
309   static constexpr std::array<size_t, 29> PRIME = {{1,        3,        7,         13,        31,       61,      127,     251,
310                                                     509,      1021,     2039,      4093,      8191,     16381,   32749,   65521,
311                                                     131071,   262139,   524287,    1048573,   2097143,  4194301, 8388593, 16777213,
312                                                     33554393, 67108859, 134217689, 268435399, 536870909}};
313 };
314 
315 template <typename H>
316 auto
317 IntrusiveHashMap<H>::Bucket::Linkage::next_ptr(Bucket *b) -> Bucket *&
318 {
319   return b->_link._next;
320 }
321 
322 template <typename H>
323 auto
324 IntrusiveHashMap<H>::Bucket::Linkage::prev_ptr(Bucket *b) -> Bucket *&
325 {
326   return b->_link._prev;
327 }
328 
329 // This is designed so that if the bucket is empty, then @c nullptr is returned, which will immediately terminate
330 // a search loop on an empty bucket because that will start with a nullptr candidate, matching the limit.
331 template <typename H>
332 auto
333 IntrusiveHashMap<H>::Bucket::limit() const -> value_type *
334 {
335   Bucket *n{_link._next};
336   return n ? n->_v : nullptr;
337 };
338 
339 template <typename H>
340 void
clear()341 IntrusiveHashMap<H>::Bucket::clear()
342 {
343   _v       = nullptr;
344   _count   = 0;
345   _mixed_p = false;
346   // These can be left set during an expansion, when the bucket did have elements before but not
347   // after. Therefore make sure they are cleared.
348   _link._next = _link._prev = nullptr;
349 }
350 
351 template <typename H>
352 bool
contains(value_type * v)353 IntrusiveHashMap<H>::Bucket::contains(value_type *v) const
354 {
355   value_type *x     = _v;
356   value_type *limit = this->limit();
357   while (x != limit && x != v) {
358     x = H::next_ptr(x);
359   }
360   return x == v;
361 }
362 
363 // ---------------------
364 template <typename H>
365 auto
366 IntrusiveHashMap<H>::range::begin() const -> iterator const &
367 {
368   return super_type::first;
369 }
370 template <typename H>
371 auto
372 IntrusiveHashMap<H>::range::end() const -> iterator const &
373 {
374   return super_type::second;
375 }
376 
377 template <typename H>
378 auto
379 IntrusiveHashMap<H>::const_range::begin() const -> const_iterator const &
380 {
381   return super_type::first;
382 }
383 
384 template <typename H>
385 auto
386 IntrusiveHashMap<H>::const_range::end() const -> const_iterator const &
387 {
388   return super_type::second;
389 }
390 
391 // ---------------------
392 
IntrusiveHashMap(size_t n)393 template <typename H> IntrusiveHashMap<H>::IntrusiveHashMap(size_t n)
394 {
395   if (n) {
396     _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), n));
397   }
398 }
399 
400 template <typename H>
401 auto
402 IntrusiveHashMap<H>::bucket_for(key_type key) -> Bucket *
403 {
404   return &_table[H::hash_of(key) % _table.size()];
405 }
406 
407 template <typename H>
408 auto
409 IntrusiveHashMap<H>::begin() -> iterator
410 {
411   return _list.begin();
412 }
413 
414 template <typename H>
415 auto
416 IntrusiveHashMap<H>::begin() const -> const_iterator
417 {
418   return _list.begin();
419 }
420 
421 template <typename H>
422 auto
423 IntrusiveHashMap<H>::end() -> iterator
424 {
425   return _list.end();
426 }
427 
428 template <typename H>
429 auto
430 IntrusiveHashMap<H>::end() const -> const_iterator
431 {
432   return _list.end();
433 }
434 
435 template <typename H>
436 auto
437 IntrusiveHashMap<H>::clear() -> self_type &
438 {
439   for (auto &b : _table) {
440     b.clear();
441   }
442   // Clear container data.
443   _list.clear();
444   _active_buckets.clear();
445   return *this;
446 }
447 
448 template <typename H>
449 auto
450 IntrusiveHashMap<H>::find(key_type key) -> iterator
451 {
452   Bucket *b         = this->bucket_for(key);
453   value_type *v     = b->_v;
454   value_type *limit = b->limit();
455   while (v != limit && !H::equal(key, H::key_of(v))) {
456     v = H::next_ptr(v);
457   }
458   return v == limit ? _list.end() : _list.iterator_for(v);
459 }
460 
461 template <typename H>
462 auto
463 IntrusiveHashMap<H>::find(key_type key) const -> const_iterator
464 {
465   return const_cast<self_type *>(this)->find(key);
466 }
467 
468 template <typename H>
469 auto
470 IntrusiveHashMap<H>::equal_range(key_type key) -> range
471 {
472   iterator first{this->find(key)};
473   iterator last{first};
474   iterator limit{this->end()};
475 
476   while (last != limit && H::equal(key, H::key_of(&*last))) {
477     ++last;
478   }
479 
480   return range{first, last};
481 }
482 
483 template <typename H>
484 auto
485 IntrusiveHashMap<H>::equal_range(key_type key) const -> const_range
486 {
487   return const_cast<self_type *>(this)->equal_range(key);
488 }
489 
490 template <typename H>
491 auto
492 IntrusiveHashMap<H>::iterator_for(const value_type *v) const -> const_iterator
493 {
494   return _list.iterator_for(v);
495 }
496 
497 template <typename H>
498 auto
499 IntrusiveHashMap<H>::iterator_for(value_type *v) -> iterator
500 {
501   return _list.iterator_for(v);
502 }
503 
504 template <typename H>
505 auto
506 IntrusiveHashMap<H>::find(value_type *v) -> iterator
507 {
508   Bucket *b = this->bucket_for(H::key_of(v));
509   return b->contains(v) ? _list.iterator_for(v) : this->end();
510 }
511 
512 template <typename H>
513 auto
514 IntrusiveHashMap<H>::find(value_type const *v) const -> const_iterator
515 {
516   return const_cast<self_type *>(this)->find(const_cast<value_type *>(v));
517 }
518 
519 template <typename H>
520 void
insert(value_type * v)521 IntrusiveHashMap<H>::insert(value_type *v)
522 {
523   auto key         = H::key_of(v);
524   Bucket *bucket   = this->bucket_for(key);
525   value_type *spot = bucket->_v;
526   bool mixed_p     = false; // Found a different key in the bucket.
527 
528   if (nullptr == spot) { // currently empty bucket, set it and add to active list.
529     _list.append(v);
530     bucket->_v = v;
531     _active_buckets.append(bucket);
532   } else {
533     value_type *limit = bucket->limit();
534 
535     // First search the bucket to see if the key is already in it.
536     while (spot != limit && !H::equal(key, H::key_of(spot))) {
537       spot = H::next_ptr(spot);
538     }
539     if (spot != bucket->_v) {
540       mixed_p = true; // found some other key, it's going to be mixed.
541     }
542     _list.insert_before(spot, v);
543     if (spot == bucket->_v) { // added before the bucket start, update the start.
544       bucket->_v = v;
545     }
546     bucket->_mixed_p = mixed_p;
547   }
548   ++bucket->_count;
549 
550   // auto expand if appropriate.
551   if ((AVERAGE == _expansion_policy && (_list.count() / _table.size()) > _expansion_limit) ||
552       (MAXIMUM == _expansion_policy && bucket->_count > _expansion_limit && bucket->_mixed_p)) {
553     this->expand();
554   }
555 }
556 
557 template <typename H>
558 auto
559 IntrusiveHashMap<H>::erase(iterator const &loc) -> iterator
560 {
561   value_type *v     = loc;
562   iterator zret     = ++(this->iterator_for(v)); // get around no const_iterator -> iterator.
563   Bucket *b         = this->bucket_for(H::key_of(v));
564   value_type *nv    = H::next_ptr(v);
565   value_type *limit = b->limit();
566   if (b->_v == v) {    // removed first element in bucket, update bucket
567     if (limit == nv) { // that was also the only element, deactivate bucket
568       _active_buckets.erase(b);
569       b->clear();
570     } else {
571       b->_v = nv;
572       --b->_count;
573     }
574   }
575   _list.erase(loc);
576   return zret;
577 }
578 
579 template <typename H>
580 bool
erase(value_type * value)581 IntrusiveHashMap<H>::erase(value_type *value)
582 {
583   auto loc = this->find(value);
584   if (loc != this->end()) {
585     this->erase(loc);
586     return true;
587   }
588   return false;
589 }
590 
591 template <typename H>
592 auto
593 IntrusiveHashMap<H>::erase(iterator const &start, iterator const &limit) -> iterator
594 {
595   auto spot{start};
596   Bucket *bucket{this->bucket_for(spot)};
597   while (spot != limit) {
598     auto target         = bucket;
599     bucket              = bucket->_link._next; // bump now to avoid forward iteration problems in case of bucket removal.
600     value_type *v_limit = bucket ? bucket->_v : nullptr;
601     while (spot != v_limit && spot != limit) {
602       --(target->_count);
603       ++spot;
604     }
605     if (target->_count == 0) {
606       _active_buckets.erase(target);
607     }
608   }
609   _list.erase(start, limit);
610   return _list.iterator_for(limit); // convert from const_iterator back to iterator
611 };
612 
613 template <typename H>
614 auto
615 IntrusiveHashMap<H>::erase(range const &r) -> iterator
616 {
617   return this->erase(r.first, r.second);
618 }
619 
620 namespace detail
621 {
622 // Make @c apply more convenient by allowing the function to take a reference type or pointer type to the container
623 // elements. The pointer type is the base, plus a shim to convert from a reference type functor to a pointer pointer
624 // type. The complex return type definition forces only one, but not both, to be valid for a particular functor. This
625 // also must be done via free functions and not method overloads because the compiler forces a match up of method
626 // definitions and declarations before any template instantiation.
627 
628 template <typename H, typename F>
629 auto
630 IntrusiveHashMapApply(IntrusiveHashMap<H> &map, F &&f)
631   -> decltype(f(*static_cast<typename IntrusiveHashMap<H>::value_type *>(nullptr)), map)
632 {
633   return map.apply([&f](typename IntrusiveHashMap<H>::value_type *v) { return f(*v); });
634 }
635 
636 template <typename H, typename F>
637 auto
638 IntrusiveHashMapApply(IntrusiveHashMap<H> &map, F &&f)
639   -> decltype(f(static_cast<typename IntrusiveHashMap<H>::value_type *>(nullptr)), map)
640 {
641   auto spot{map.begin()};
642   auto limit{map.end()};
643   while (spot != limit) {
644     f(spot++); // post increment means @a spot is updated before @a f is applied.
645   }
646   return map;
647 }
648 } // namespace detail
649 
650 template <typename H>
651 template <typename F>
652 auto
653 IntrusiveHashMap<H>::apply(F &&f) -> self_type &
654 {
655   return detail::IntrusiveHashMapApply(*this, f);
656 };
657 
658 template <typename H>
659 void
expand()660 IntrusiveHashMap<H>::expand()
661 {
662   ExpansionPolicy org_expansion_policy = _expansion_policy; // save for restore.
663   value_type *old                      = _list.head();      // save for repopulating.
664   auto old_size                        = _table.size();
665 
666   // Reset to empty state.
667   this->clear();
668   _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), old_size + 1));
669 
670   _expansion_policy = MANUAL; // disable any auto expand while we're expanding.
671   while (old) {
672     value_type *v = old;
673     old           = H::next_ptr(old);
674     this->insert(v);
675   }
676   // stashed array gets cleaned up when @a tmp goes out of scope.
677   _expansion_policy = org_expansion_policy; // reset to original value.
678 }
679 
680 template <typename H>
681 size_t
count()682 IntrusiveHashMap<H>::count() const
683 {
684   return _list.count();
685 }
686 
687 template <typename H>
688 size_t
bucket_count()689 IntrusiveHashMap<H>::bucket_count() const
690 {
691   return _table.size();
692 }
693 
694 template <typename H>
695 auto
696 IntrusiveHashMap<H>::set_expansion_policy(ExpansionPolicy policy) -> self_type &
697 {
698   _expansion_policy = policy;
699   return *this;
700 }
701 
702 template <typename H>
703 auto
704 IntrusiveHashMap<H>::get_expansion_policy() const -> ExpansionPolicy
705 {
706   return _expansion_policy;
707 }
708 
709 template <typename H>
710 auto
711 IntrusiveHashMap<H>::set_expansion_limit(size_t n) -> self_type &
712 {
713   _expansion_limit = n;
714   return *this;
715 }
716 
717 template <typename H>
718 size_t
get_expansion_limit()719 IntrusiveHashMap<H>::get_expansion_limit() const
720 {
721   return _expansion_limit;
722 }
723 /* ---------------------------------------------------------------------------------------------- */
724