1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * AtomicHashMap --
19  *
20  * A high-performance concurrent hash map with int32_t or int64_t keys. Supports
21  * insert, find(key), findAt(index), erase(key), size, and more.  Memory cannot
22  * be freed or reclaimed by erase.  Can grow to a maximum of about 18 times the
23  * initial capacity, but performance degrades linearly with growth. Can also be
24  * used as an object store with unique 32-bit references directly into the
25  * internal storage (retrieved with iterator::getIndex()).
26  *
27  * Advantages:
28  *    - High-performance (~2-4x tbb::concurrent_hash_map in heavily
29  *      multi-threaded environments).
30  *    - Efficient memory usage if initial capacity is not over estimated
31  *      (especially for small keys and values).
32  *    - Good fragmentation properties (only allocates in large slabs which can
33  *      be reused with clear() and never move).
34  *    - Can generate unique, long-lived 32-bit references for efficient lookup
35  *      (see findAt()).
36  *
37  * Disadvantages:
38  *    - Keys must be native int32_t or int64_t, or explicitly converted.
39  *    - Must be able to specify unique empty, locked, and erased keys
40  *    - Performance degrades linearly as size grows beyond initialization
41  *      capacity.
42  *    - Max size limit of ~18x initial size (dependent on max load factor).
43  *    - Memory is not freed or reclaimed by erase.
44  *
45  * Usage and Operation Details:
46  *   Simple performance/memory tradeoff with maxLoadFactor.  Higher load factors
47  *   give better memory utilization but probe lengths increase, reducing
48  *   performance.
49  *
50  * Implementation and Performance Details:
51  *   AHArray is a fixed size contiguous block of value_type cells.  When
52  *   writing a cell, the key is locked while the rest of the record is
53  *   written.  Once done, the cell is unlocked by setting the key.  find()
54  *   is completely wait-free and doesn't require any non-relaxed atomic
55  *   operations.  AHA cannot grow beyond initialization capacity, but is
56  *   faster because of reduced data indirection.
57  *
58  *   AHMap is a wrapper around AHArray sub-maps that allows growth and provides
59  *   an interface closer to the STL UnorderedAssociativeContainer concept. These
60  *   sub-maps are allocated on the fly and are processed in series, so the more
61  *   there are (from growing past initial capacity), the worse the performance.
62  *
63  *   Insert returns false if there is a key collision and throws if the max size
64  *   of the map is exceeded.
65  *
66  *   Benchmark performance with 8 simultaneous threads processing 1 million
67  *   unique <int64_t, int64_t> entries on a 4-core, 2.5 GHz machine:
68  *
69  *     Load Factor   Mem Efficiency   usec/Insert   usec/Find
70  *         50%             50%           0.19         0.05
71  *         85%             85%           0.20         0.06
72  *         90%             90%           0.23         0.08
73  *         95%             95%           0.27         0.10
74  *
75  *   See folly/tests/AtomicHashMapTest.cpp for more benchmarks.
76  *
77  * @author Spencer Ahrens <sahrens@fb.com>
78  * @author Jordan DeLong <delong.j@fb.com>
79  *
80  */
81 
82 #pragma once
83 #define FOLLY_ATOMICHASHMAP_H_
84 
85 #include <atomic>
86 #include <functional>
87 #include <stdexcept>
88 
89 #include <folly/AtomicHashArray.h>
90 #include <folly/CPortability.h>
91 #include <folly/Likely.h>
92 #include <folly/ThreadCachedInt.h>
93 #include <folly/container/Foreach.h>
94 #include <folly/hash/Hash.h>
95 
96 namespace folly {
97 
98 /*
99  * AtomicHashMap provides an interface somewhat similar to the
100  * UnorderedAssociativeContainer concept in C++.  This does not
101  * exactly match this concept (or even the basic Container concept),
102  * because of some restrictions imposed by our datastructure.
103  *
104  * Specific differences (there are quite a few):
105  *
106  * - Efficiently thread safe for inserts (main point of this stuff),
107  *   wait-free for lookups.
108  *
109  * - You can erase from this container, but the cell containing the key will
110  *   not be free or reclaimed.
111  *
112  * - You can erase everything by calling clear() (and you must guarantee only
113  *   one thread can be using the container to do that).
114  *
115  * - We aren't DefaultConstructible, CopyConstructible, Assignable, or
116  *   EqualityComparable.  (Most of these are probably not something
117  *   you actually want to do with this anyway.)
118  *
119  * - We don't support the various bucket functions, rehash(),
120  *   reserve(), or equal_range().  Also no constructors taking
121  *   iterators, although this could change.
122  *
123  * - Several insertion functions, notably operator[], are not
124  *   implemented.  It is a little too easy to misuse these functions
125  *   with this container, where part of the point is that when an
126  *   insertion happens for a new key, it will atomically have the
127  *   desired value.
128  *
129  * - The map has no templated insert() taking an iterator range, but
130  *   we do provide an insert(key, value).  The latter seems more
131  *   frequently useful for this container (to avoid sprinkling
132  *   make_pair everywhere), and providing both can lead to some gross
133  *   template error messages.
134  *
135  * - The Allocator must not be stateful (a new instance will be spun up for
136  *   each allocation), and its allocate() method must take a raw number of
137  *   bytes.
138  *
139  * - KeyT must be a 32 bit or 64 bit atomic integer type, and you must
140  *   define special 'locked' and 'empty' key values in the ctor
141  *
142  * - We don't take the Hash function object as an instance in the
143  *   constructor.
144  *
145  */
146 
147 // Thrown when insertion fails due to running out of space for
148 // submaps.
149 struct FOLLY_EXPORT AtomicHashMapFullError : std::runtime_error {
AtomicHashMapFullErrorAtomicHashMapFullError150   explicit AtomicHashMapFullError()
151       : std::runtime_error("AtomicHashMap is full") {}
152 };
153 
154 template <
155     class KeyT,
156     class ValueT,
157     class HashFcn,
158     class EqualFcn,
159     class Allocator,
160     class ProbeFcn,
161     class KeyConvertFcn>
162 class AtomicHashMap {
163   typedef AtomicHashArray<
164       KeyT,
165       ValueT,
166       HashFcn,
167       EqualFcn,
168       Allocator,
169       ProbeFcn,
170       KeyConvertFcn>
171       SubMap;
172 
173  public:
174   typedef KeyT key_type;
175   typedef ValueT mapped_type;
176   typedef std::pair<const KeyT, ValueT> value_type;
177   typedef HashFcn hasher;
178   typedef EqualFcn key_equal;
179   typedef KeyConvertFcn key_convert;
180   typedef value_type* pointer;
181   typedef value_type& reference;
182   typedef const value_type& const_reference;
183   typedef std::ptrdiff_t difference_type;
184   typedef std::size_t size_type;
185   typedef typename SubMap::Config Config;
186 
187   template <class ContT, class IterVal, class SubIt>
188   struct ahm_iterator;
189 
190   typedef ahm_iterator<
191       const AtomicHashMap,
192       const value_type,
193       typename SubMap::const_iterator>
194       const_iterator;
195   typedef ahm_iterator<AtomicHashMap, value_type, typename SubMap::iterator>
196       iterator;
197 
198  public:
199   const float kGrowthFrac_; // How much to grow when we run out of capacity.
200 
201   // The constructor takes a finalSizeEst which is the optimal
202   // number of elements to maximize space utilization and performance,
203   // and a Config object to specify more advanced options.
204   explicit AtomicHashMap(size_t finalSizeEst, const Config& c = Config());
205 
206   AtomicHashMap(const AtomicHashMap&) = delete;
207   AtomicHashMap& operator=(const AtomicHashMap&) = delete;
208 
~AtomicHashMap()209   ~AtomicHashMap() {
210     const unsigned int numMaps =
211         numMapsAllocated_.load(std::memory_order_relaxed);
212     FOR_EACH_RANGE (i, 0, numMaps) {
213       SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
214       DCHECK(thisMap);
215       SubMap::destroy(thisMap);
216     }
217   }
218 
key_eq()219   key_equal key_eq() const { return key_equal(); }
hash_function()220   hasher hash_function() const { return hasher(); }
221 
222   /*
223    * insert --
224    *
225    *   Returns a pair with iterator to the element at r.first and
226    *   success.  Retrieve the index with ret.first.getIndex().
227    *
228    *   Does not overwrite on key collision, but returns an iterator to
229    *   the existing element (since this could due to a race with
230    *   another thread, it is often important to check this return
231    *   value).
232    *
233    *   Allocates new sub maps as the existing ones become full.  If
234    *   all sub maps are full, no element is inserted, and
235    *   AtomicHashMapFullError is thrown.
236    */
insert(const value_type & r)237   std::pair<iterator, bool> insert(const value_type& r) {
238     return emplace(r.first, r.second);
239   }
insert(key_type k,const mapped_type & v)240   std::pair<iterator, bool> insert(key_type k, const mapped_type& v) {
241     return emplace(k, v);
242   }
insert(value_type && r)243   std::pair<iterator, bool> insert(value_type&& r) {
244     return emplace(r.first, std::move(r.second));
245   }
insert(key_type k,mapped_type && v)246   std::pair<iterator, bool> insert(key_type k, mapped_type&& v) {
247     return emplace(k, std::move(v));
248   }
249 
250   /*
251    * emplace --
252    *
253    *   Same contract as insert(), but performs in-place construction
254    *   of the value type using the specified arguments.
255    *
256    *   Also, like find(), this method optionally allows 'key_in' to have a type
257    *   different from that stored in the table; see find(). If and only if no
258    *   equal key is already present, this method converts 'key_in' to a key of
259    *   type KeyT using the provided LookupKeyToKeyFcn.
260    */
261   template <
262       typename LookupKeyT = key_type,
263       typename LookupHashFcn = hasher,
264       typename LookupEqualFcn = key_equal,
265       typename LookupKeyToKeyFcn = key_convert,
266       typename... ArgTs>
267   std::pair<iterator, bool> emplace(LookupKeyT k, ArgTs&&... vCtorArg);
268 
269   /*
270    * find --
271    *
272    *   Returns the iterator to the element if found, otherwise end().
273    *
274    *   As an optional feature, the type of the key to look up (LookupKeyT) is
275    *   allowed to be different from the type of keys actually stored (KeyT).
276    *
277    *   This enables use cases where materializing the key is costly and usually
278    *   redundant, e.g., canonicalizing/interning a set of strings and being able
279    *   to look up by StringPiece. To use this feature, LookupHashFcn must take
280    *   a LookupKeyT, and LookupEqualFcn must take KeyT and LookupKeyT as first
281    *   and second parameter, respectively.
282    *
283    *   See folly/test/ArrayHashMapTest.cpp for sample usage.
284    */
285   template <
286       typename LookupKeyT = key_type,
287       typename LookupHashFcn = hasher,
288       typename LookupEqualFcn = key_equal>
289   iterator find(LookupKeyT k);
290 
291   template <
292       typename LookupKeyT = key_type,
293       typename LookupHashFcn = hasher,
294       typename LookupEqualFcn = key_equal>
295   const_iterator find(LookupKeyT k) const;
296 
297   /*
298    * erase --
299    *
300    *   Erases key k from the map
301    *
302    *   Returns 1 iff the key is found and erased, and 0 otherwise.
303    */
304   size_type erase(key_type k);
305 
306   /*
307    * clear --
308    *
309    *   Wipes all keys and values from primary map and destroys all secondary
310    *   maps.  Primary map remains allocated and thus the memory can be reused
311    *   in place.  Not thread safe.
312    *
313    */
314   void clear();
315 
316   /*
317    * size --
318    *
319    *  Returns the exact size of the map.  Note this is not as cheap as typical
320    *  size() implementations because, for each AtomicHashArray in this AHM, we
321    *  need to grab a lock and accumulate the values from all the thread local
322    *  counters.  See folly/ThreadCachedInt.h for more details.
323    */
324   size_t size() const;
325 
empty()326   bool empty() const { return size() == 0; }
327 
count(key_type k)328   size_type count(key_type k) const { return find(k) == end() ? 0 : 1; }
329 
330   /*
331    * findAt --
332    *
333    *   Returns an iterator into the map.
334    *
335    *   idx should only be an unmodified value returned by calling getIndex() on
336    *   a valid iterator returned by find() or insert(). If idx is invalid you
337    *   have a bug and the process aborts.
338    */
findAt(uint32_t idx)339   iterator findAt(uint32_t idx) {
340     SimpleRetT ret = findAtInternal(idx);
341     DCHECK_LT(ret.i, numSubMaps());
342     return iterator(
343         this,
344         ret.i,
345         subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j));
346   }
findAt(uint32_t idx)347   const_iterator findAt(uint32_t idx) const {
348     return const_cast<AtomicHashMap*>(this)->findAt(idx);
349   }
350 
351   // Total capacity - summation of capacities of all submaps.
352   size_t capacity() const;
353 
354   // Number of new insertions until current submaps are all at max load factor.
355   size_t spaceRemaining() const;
356 
setEntryCountThreadCacheSize(int32_t newSize)357   void setEntryCountThreadCacheSize(int32_t newSize) {
358     const int numMaps = numMapsAllocated_.load(std::memory_order_acquire);
359     for (int i = 0; i < numMaps; ++i) {
360       SubMap* map = subMaps_[i].load(std::memory_order_relaxed);
361       map->setEntryCountThreadCacheSize(newSize);
362     }
363   }
364 
365   // Number of sub maps allocated so far to implement this map.  The more there
366   // are, the worse the performance.
numSubMaps()367   int numSubMaps() const {
368     return numMapsAllocated_.load(std::memory_order_acquire);
369   }
370 
begin()371   iterator begin() {
372     iterator it(this, 0, subMaps_[0].load(std::memory_order_relaxed)->begin());
373     it.checkAdvanceToNextSubmap();
374     return it;
375   }
376 
begin()377   const_iterator begin() const {
378     const_iterator it(
379         this, 0, subMaps_[0].load(std::memory_order_relaxed)->begin());
380     it.checkAdvanceToNextSubmap();
381     return it;
382   }
383 
end()384   iterator end() { return iterator(); }
385 
end()386   const_iterator end() const { return const_iterator(); }
387 
388   /* Advanced functions for direct access: */
389 
390   inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) {
391     SimpleRetT ret =
392         mayInsert ? insertInternal(r.first, r.second) : findInternal(r.first);
393     return encodeIndex(ret.i, ret.j);
394   }
395 
396   inline uint32_t recToIdx(value_type&& r, bool mayInsert = true) {
397     SimpleRetT ret = mayInsert ? insertInternal(r.first, std::move(r.second))
398                                : findInternal(r.first);
399     return encodeIndex(ret.i, ret.j);
400   }
401 
402   inline uint32_t recToIdx(
403       key_type k, const mapped_type& v, bool mayInsert = true) {
404     SimpleRetT ret = mayInsert ? insertInternal(k, v) : findInternal(k);
405     return encodeIndex(ret.i, ret.j);
406   }
407 
408   inline uint32_t recToIdx(key_type k, mapped_type&& v, bool mayInsert = true) {
409     SimpleRetT ret =
410         mayInsert ? insertInternal(k, std::move(v)) : findInternal(k);
411     return encodeIndex(ret.i, ret.j);
412   }
413 
414   inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) {
415     return recToIdx(value_type(k), mayInsert);
416   }
417 
idxToRec(uint32_t idx)418   inline const value_type& idxToRec(uint32_t idx) const {
419     SimpleRetT ret = findAtInternal(idx);
420     return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j);
421   }
422 
423   /* Private data and helper functions... */
424 
425  private:
426   // This limits primary submap size to 2^31 ~= 2 billion, secondary submap
427   // size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps
428   // to 2^kNumSubMapBits_ = 16.
429   static const uint32_t kNumSubMapBits_ = 4;
430   static const uint32_t kSecondaryMapBit_ = 1u << 31; // Highest bit
431   static const uint32_t kSubMapIndexShift_ = 32 - kNumSubMapBits_ - 1;
432   static const uint32_t kSubMapIndexMask_ = (1 << kSubMapIndexShift_) - 1;
433   static const uint32_t kNumSubMaps_ = 1 << kNumSubMapBits_;
434   static const uintptr_t kLockedPtr_ = 0x88ULL << 48; // invalid pointer
435 
436   struct SimpleRetT {
437     uint32_t i;
438     size_t j;
439     bool success;
SimpleRetTSimpleRetT440     SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {}
441     SimpleRetT() = default;
442   };
443 
444   template <
445       typename LookupKeyT = key_type,
446       typename LookupHashFcn = hasher,
447       typename LookupEqualFcn = key_equal,
448       typename LookupKeyToKeyFcn = key_convert,
449       typename... ArgTs>
450   SimpleRetT insertInternal(LookupKeyT key, ArgTs&&... value);
451 
452   template <
453       typename LookupKeyT = key_type,
454       typename LookupHashFcn = hasher,
455       typename LookupEqualFcn = key_equal>
456   SimpleRetT findInternal(const LookupKeyT k) const;
457 
458   SimpleRetT findAtInternal(uint32_t idx) const;
459 
460   std::atomic<SubMap*> subMaps_[kNumSubMaps_];
461   std::atomic<uint32_t> numMapsAllocated_;
462 
tryLockMap(unsigned int idx)463   inline bool tryLockMap(unsigned int idx) {
464     SubMap* val = nullptr;
465     return subMaps_[idx].compare_exchange_strong(
466         val, (SubMap*)kLockedPtr_, std::memory_order_acquire);
467   }
468 
469   static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx);
470 
471 }; // AtomicHashMap
472 
473 template <
474     class KeyT,
475     class ValueT,
476     class HashFcn = std::hash<KeyT>,
477     class EqualFcn = std::equal_to<KeyT>,
478     class Allocator = std::allocator<char>>
479 using QuadraticProbingAtomicHashMap = AtomicHashMap<
480     KeyT,
481     ValueT,
482     HashFcn,
483     EqualFcn,
484     Allocator,
485     AtomicHashArrayQuadraticProbeFcn>;
486 } // namespace folly
487 
488 #include <folly/AtomicHashMap-inl.h>
489