1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * AtomicHashMap -- 19 * 20 * A high-performance concurrent hash map with int32_t or int64_t keys. Supports 21 * insert, find(key), findAt(index), erase(key), size, and more. Memory cannot 22 * be freed or reclaimed by erase. Can grow to a maximum of about 18 times the 23 * initial capacity, but performance degrades linearly with growth. Can also be 24 * used as an object store with unique 32-bit references directly into the 25 * internal storage (retrieved with iterator::getIndex()). 26 * 27 * Advantages: 28 * - High-performance (~2-4x tbb::concurrent_hash_map in heavily 29 * multi-threaded environments). 30 * - Efficient memory usage if initial capacity is not over estimated 31 * (especially for small keys and values). 32 * - Good fragmentation properties (only allocates in large slabs which can 33 * be reused with clear() and never move). 34 * - Can generate unique, long-lived 32-bit references for efficient lookup 35 * (see findAt()). 36 * 37 * Disadvantages: 38 * - Keys must be native int32_t or int64_t, or explicitly converted. 39 * - Must be able to specify unique empty, locked, and erased keys 40 * - Performance degrades linearly as size grows beyond initialization 41 * capacity. 42 * - Max size limit of ~18x initial size (dependent on max load factor). 43 * - Memory is not freed or reclaimed by erase. 44 * 45 * Usage and Operation Details: 46 * Simple performance/memory tradeoff with maxLoadFactor. Higher load factors 47 * give better memory utilization but probe lengths increase, reducing 48 * performance. 49 * 50 * Implementation and Performance Details: 51 * AHArray is a fixed size contiguous block of value_type cells. When 52 * writing a cell, the key is locked while the rest of the record is 53 * written. Once done, the cell is unlocked by setting the key. find() 54 * is completely wait-free and doesn't require any non-relaxed atomic 55 * operations. AHA cannot grow beyond initialization capacity, but is 56 * faster because of reduced data indirection. 57 * 58 * AHMap is a wrapper around AHArray sub-maps that allows growth and provides 59 * an interface closer to the STL UnorderedAssociativeContainer concept. These 60 * sub-maps are allocated on the fly and are processed in series, so the more 61 * there are (from growing past initial capacity), the worse the performance. 62 * 63 * Insert returns false if there is a key collision and throws if the max size 64 * of the map is exceeded. 65 * 66 * Benchmark performance with 8 simultaneous threads processing 1 million 67 * unique <int64_t, int64_t> entries on a 4-core, 2.5 GHz machine: 68 * 69 * Load Factor Mem Efficiency usec/Insert usec/Find 70 * 50% 50% 0.19 0.05 71 * 85% 85% 0.20 0.06 72 * 90% 90% 0.23 0.08 73 * 95% 95% 0.27 0.10 74 * 75 * See folly/tests/AtomicHashMapTest.cpp for more benchmarks. 76 * 77 * @author Spencer Ahrens <sahrens@fb.com> 78 * @author Jordan DeLong <delong.j@fb.com> 79 * 80 */ 81 82 #pragma once 83 #define FOLLY_ATOMICHASHMAP_H_ 84 85 #include <atomic> 86 #include <functional> 87 #include <stdexcept> 88 89 #include <folly/AtomicHashArray.h> 90 #include <folly/CPortability.h> 91 #include <folly/Likely.h> 92 #include <folly/ThreadCachedInt.h> 93 #include <folly/container/Foreach.h> 94 #include <folly/hash/Hash.h> 95 96 namespace folly { 97 98 /* 99 * AtomicHashMap provides an interface somewhat similar to the 100 * UnorderedAssociativeContainer concept in C++. This does not 101 * exactly match this concept (or even the basic Container concept), 102 * because of some restrictions imposed by our datastructure. 103 * 104 * Specific differences (there are quite a few): 105 * 106 * - Efficiently thread safe for inserts (main point of this stuff), 107 * wait-free for lookups. 108 * 109 * - You can erase from this container, but the cell containing the key will 110 * not be free or reclaimed. 111 * 112 * - You can erase everything by calling clear() (and you must guarantee only 113 * one thread can be using the container to do that). 114 * 115 * - We aren't DefaultConstructible, CopyConstructible, Assignable, or 116 * EqualityComparable. (Most of these are probably not something 117 * you actually want to do with this anyway.) 118 * 119 * - We don't support the various bucket functions, rehash(), 120 * reserve(), or equal_range(). Also no constructors taking 121 * iterators, although this could change. 122 * 123 * - Several insertion functions, notably operator[], are not 124 * implemented. It is a little too easy to misuse these functions 125 * with this container, where part of the point is that when an 126 * insertion happens for a new key, it will atomically have the 127 * desired value. 128 * 129 * - The map has no templated insert() taking an iterator range, but 130 * we do provide an insert(key, value). The latter seems more 131 * frequently useful for this container (to avoid sprinkling 132 * make_pair everywhere), and providing both can lead to some gross 133 * template error messages. 134 * 135 * - The Allocator must not be stateful (a new instance will be spun up for 136 * each allocation), and its allocate() method must take a raw number of 137 * bytes. 138 * 139 * - KeyT must be a 32 bit or 64 bit atomic integer type, and you must 140 * define special 'locked' and 'empty' key values in the ctor 141 * 142 * - We don't take the Hash function object as an instance in the 143 * constructor. 144 * 145 */ 146 147 // Thrown when insertion fails due to running out of space for 148 // submaps. 149 struct FOLLY_EXPORT AtomicHashMapFullError : std::runtime_error { AtomicHashMapFullErrorAtomicHashMapFullError150 explicit AtomicHashMapFullError() 151 : std::runtime_error("AtomicHashMap is full") {} 152 }; 153 154 template < 155 class KeyT, 156 class ValueT, 157 class HashFcn, 158 class EqualFcn, 159 class Allocator, 160 class ProbeFcn, 161 class KeyConvertFcn> 162 class AtomicHashMap { 163 typedef AtomicHashArray< 164 KeyT, 165 ValueT, 166 HashFcn, 167 EqualFcn, 168 Allocator, 169 ProbeFcn, 170 KeyConvertFcn> 171 SubMap; 172 173 public: 174 typedef KeyT key_type; 175 typedef ValueT mapped_type; 176 typedef std::pair<const KeyT, ValueT> value_type; 177 typedef HashFcn hasher; 178 typedef EqualFcn key_equal; 179 typedef KeyConvertFcn key_convert; 180 typedef value_type* pointer; 181 typedef value_type& reference; 182 typedef const value_type& const_reference; 183 typedef std::ptrdiff_t difference_type; 184 typedef std::size_t size_type; 185 typedef typename SubMap::Config Config; 186 187 template <class ContT, class IterVal, class SubIt> 188 struct ahm_iterator; 189 190 typedef ahm_iterator< 191 const AtomicHashMap, 192 const value_type, 193 typename SubMap::const_iterator> 194 const_iterator; 195 typedef ahm_iterator<AtomicHashMap, value_type, typename SubMap::iterator> 196 iterator; 197 198 public: 199 const float kGrowthFrac_; // How much to grow when we run out of capacity. 200 201 // The constructor takes a finalSizeEst which is the optimal 202 // number of elements to maximize space utilization and performance, 203 // and a Config object to specify more advanced options. 204 explicit AtomicHashMap(size_t finalSizeEst, const Config& c = Config()); 205 206 AtomicHashMap(const AtomicHashMap&) = delete; 207 AtomicHashMap& operator=(const AtomicHashMap&) = delete; 208 ~AtomicHashMap()209 ~AtomicHashMap() { 210 const unsigned int numMaps = 211 numMapsAllocated_.load(std::memory_order_relaxed); 212 FOR_EACH_RANGE (i, 0, numMaps) { 213 SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed); 214 DCHECK(thisMap); 215 SubMap::destroy(thisMap); 216 } 217 } 218 key_eq()219 key_equal key_eq() const { return key_equal(); } hash_function()220 hasher hash_function() const { return hasher(); } 221 222 /* 223 * insert -- 224 * 225 * Returns a pair with iterator to the element at r.first and 226 * success. Retrieve the index with ret.first.getIndex(). 227 * 228 * Does not overwrite on key collision, but returns an iterator to 229 * the existing element (since this could due to a race with 230 * another thread, it is often important to check this return 231 * value). 232 * 233 * Allocates new sub maps as the existing ones become full. If 234 * all sub maps are full, no element is inserted, and 235 * AtomicHashMapFullError is thrown. 236 */ insert(const value_type & r)237 std::pair<iterator, bool> insert(const value_type& r) { 238 return emplace(r.first, r.second); 239 } insert(key_type k,const mapped_type & v)240 std::pair<iterator, bool> insert(key_type k, const mapped_type& v) { 241 return emplace(k, v); 242 } insert(value_type && r)243 std::pair<iterator, bool> insert(value_type&& r) { 244 return emplace(r.first, std::move(r.second)); 245 } insert(key_type k,mapped_type && v)246 std::pair<iterator, bool> insert(key_type k, mapped_type&& v) { 247 return emplace(k, std::move(v)); 248 } 249 250 /* 251 * emplace -- 252 * 253 * Same contract as insert(), but performs in-place construction 254 * of the value type using the specified arguments. 255 * 256 * Also, like find(), this method optionally allows 'key_in' to have a type 257 * different from that stored in the table; see find(). If and only if no 258 * equal key is already present, this method converts 'key_in' to a key of 259 * type KeyT using the provided LookupKeyToKeyFcn. 260 */ 261 template < 262 typename LookupKeyT = key_type, 263 typename LookupHashFcn = hasher, 264 typename LookupEqualFcn = key_equal, 265 typename LookupKeyToKeyFcn = key_convert, 266 typename... ArgTs> 267 std::pair<iterator, bool> emplace(LookupKeyT k, ArgTs&&... vCtorArg); 268 269 /* 270 * find -- 271 * 272 * Returns the iterator to the element if found, otherwise end(). 273 * 274 * As an optional feature, the type of the key to look up (LookupKeyT) is 275 * allowed to be different from the type of keys actually stored (KeyT). 276 * 277 * This enables use cases where materializing the key is costly and usually 278 * redundant, e.g., canonicalizing/interning a set of strings and being able 279 * to look up by StringPiece. To use this feature, LookupHashFcn must take 280 * a LookupKeyT, and LookupEqualFcn must take KeyT and LookupKeyT as first 281 * and second parameter, respectively. 282 * 283 * See folly/test/ArrayHashMapTest.cpp for sample usage. 284 */ 285 template < 286 typename LookupKeyT = key_type, 287 typename LookupHashFcn = hasher, 288 typename LookupEqualFcn = key_equal> 289 iterator find(LookupKeyT k); 290 291 template < 292 typename LookupKeyT = key_type, 293 typename LookupHashFcn = hasher, 294 typename LookupEqualFcn = key_equal> 295 const_iterator find(LookupKeyT k) const; 296 297 /* 298 * erase -- 299 * 300 * Erases key k from the map 301 * 302 * Returns 1 iff the key is found and erased, and 0 otherwise. 303 */ 304 size_type erase(key_type k); 305 306 /* 307 * clear -- 308 * 309 * Wipes all keys and values from primary map and destroys all secondary 310 * maps. Primary map remains allocated and thus the memory can be reused 311 * in place. Not thread safe. 312 * 313 */ 314 void clear(); 315 316 /* 317 * size -- 318 * 319 * Returns the exact size of the map. Note this is not as cheap as typical 320 * size() implementations because, for each AtomicHashArray in this AHM, we 321 * need to grab a lock and accumulate the values from all the thread local 322 * counters. See folly/ThreadCachedInt.h for more details. 323 */ 324 size_t size() const; 325 empty()326 bool empty() const { return size() == 0; } 327 count(key_type k)328 size_type count(key_type k) const { return find(k) == end() ? 0 : 1; } 329 330 /* 331 * findAt -- 332 * 333 * Returns an iterator into the map. 334 * 335 * idx should only be an unmodified value returned by calling getIndex() on 336 * a valid iterator returned by find() or insert(). If idx is invalid you 337 * have a bug and the process aborts. 338 */ findAt(uint32_t idx)339 iterator findAt(uint32_t idx) { 340 SimpleRetT ret = findAtInternal(idx); 341 DCHECK_LT(ret.i, numSubMaps()); 342 return iterator( 343 this, 344 ret.i, 345 subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j)); 346 } findAt(uint32_t idx)347 const_iterator findAt(uint32_t idx) const { 348 return const_cast<AtomicHashMap*>(this)->findAt(idx); 349 } 350 351 // Total capacity - summation of capacities of all submaps. 352 size_t capacity() const; 353 354 // Number of new insertions until current submaps are all at max load factor. 355 size_t spaceRemaining() const; 356 setEntryCountThreadCacheSize(int32_t newSize)357 void setEntryCountThreadCacheSize(int32_t newSize) { 358 const int numMaps = numMapsAllocated_.load(std::memory_order_acquire); 359 for (int i = 0; i < numMaps; ++i) { 360 SubMap* map = subMaps_[i].load(std::memory_order_relaxed); 361 map->setEntryCountThreadCacheSize(newSize); 362 } 363 } 364 365 // Number of sub maps allocated so far to implement this map. The more there 366 // are, the worse the performance. numSubMaps()367 int numSubMaps() const { 368 return numMapsAllocated_.load(std::memory_order_acquire); 369 } 370 begin()371 iterator begin() { 372 iterator it(this, 0, subMaps_[0].load(std::memory_order_relaxed)->begin()); 373 it.checkAdvanceToNextSubmap(); 374 return it; 375 } 376 begin()377 const_iterator begin() const { 378 const_iterator it( 379 this, 0, subMaps_[0].load(std::memory_order_relaxed)->begin()); 380 it.checkAdvanceToNextSubmap(); 381 return it; 382 } 383 end()384 iterator end() { return iterator(); } 385 end()386 const_iterator end() const { return const_iterator(); } 387 388 /* Advanced functions for direct access: */ 389 390 inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) { 391 SimpleRetT ret = 392 mayInsert ? insertInternal(r.first, r.second) : findInternal(r.first); 393 return encodeIndex(ret.i, ret.j); 394 } 395 396 inline uint32_t recToIdx(value_type&& r, bool mayInsert = true) { 397 SimpleRetT ret = mayInsert ? insertInternal(r.first, std::move(r.second)) 398 : findInternal(r.first); 399 return encodeIndex(ret.i, ret.j); 400 } 401 402 inline uint32_t recToIdx( 403 key_type k, const mapped_type& v, bool mayInsert = true) { 404 SimpleRetT ret = mayInsert ? insertInternal(k, v) : findInternal(k); 405 return encodeIndex(ret.i, ret.j); 406 } 407 408 inline uint32_t recToIdx(key_type k, mapped_type&& v, bool mayInsert = true) { 409 SimpleRetT ret = 410 mayInsert ? insertInternal(k, std::move(v)) : findInternal(k); 411 return encodeIndex(ret.i, ret.j); 412 } 413 414 inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) { 415 return recToIdx(value_type(k), mayInsert); 416 } 417 idxToRec(uint32_t idx)418 inline const value_type& idxToRec(uint32_t idx) const { 419 SimpleRetT ret = findAtInternal(idx); 420 return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j); 421 } 422 423 /* Private data and helper functions... */ 424 425 private: 426 // This limits primary submap size to 2^31 ~= 2 billion, secondary submap 427 // size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps 428 // to 2^kNumSubMapBits_ = 16. 429 static const uint32_t kNumSubMapBits_ = 4; 430 static const uint32_t kSecondaryMapBit_ = 1u << 31; // Highest bit 431 static const uint32_t kSubMapIndexShift_ = 32 - kNumSubMapBits_ - 1; 432 static const uint32_t kSubMapIndexMask_ = (1 << kSubMapIndexShift_) - 1; 433 static const uint32_t kNumSubMaps_ = 1 << kNumSubMapBits_; 434 static const uintptr_t kLockedPtr_ = 0x88ULL << 48; // invalid pointer 435 436 struct SimpleRetT { 437 uint32_t i; 438 size_t j; 439 bool success; SimpleRetTSimpleRetT440 SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {} 441 SimpleRetT() = default; 442 }; 443 444 template < 445 typename LookupKeyT = key_type, 446 typename LookupHashFcn = hasher, 447 typename LookupEqualFcn = key_equal, 448 typename LookupKeyToKeyFcn = key_convert, 449 typename... ArgTs> 450 SimpleRetT insertInternal(LookupKeyT key, ArgTs&&... value); 451 452 template < 453 typename LookupKeyT = key_type, 454 typename LookupHashFcn = hasher, 455 typename LookupEqualFcn = key_equal> 456 SimpleRetT findInternal(const LookupKeyT k) const; 457 458 SimpleRetT findAtInternal(uint32_t idx) const; 459 460 std::atomic<SubMap*> subMaps_[kNumSubMaps_]; 461 std::atomic<uint32_t> numMapsAllocated_; 462 tryLockMap(unsigned int idx)463 inline bool tryLockMap(unsigned int idx) { 464 SubMap* val = nullptr; 465 return subMaps_[idx].compare_exchange_strong( 466 val, (SubMap*)kLockedPtr_, std::memory_order_acquire); 467 } 468 469 static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx); 470 471 }; // AtomicHashMap 472 473 template < 474 class KeyT, 475 class ValueT, 476 class HashFcn = std::hash<KeyT>, 477 class EqualFcn = std::equal_to<KeyT>, 478 class Allocator = std::allocator<char>> 479 using QuadraticProbingAtomicHashMap = AtomicHashMap< 480 KeyT, 481 ValueT, 482 HashFcn, 483 EqualFcn, 484 Allocator, 485 AtomicHashArrayQuadraticProbeFcn>; 486 } // namespace folly 487 488 #include <folly/AtomicHashMap-inl.h> 489