1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 #pragma once 10 11 #include <string> 12 13 #include "cache/sharded_cache.h" 14 15 #include "port/malloc.h" 16 #include "port/port.h" 17 #include "util/autovector.h" 18 19 namespace ROCKSDB_NAMESPACE { 20 21 // LRU cache implementation. This class is not thread-safe. 22 23 // An entry is a variable length heap-allocated structure. 24 // Entries are referenced by cache and/or by any external entity. 25 // The cache keeps all its entries in a hash table. Some elements 26 // are also stored on LRU list. 27 // 28 // LRUHandle can be in these states: 29 // 1. Referenced externally AND in hash table. 30 // In that case the entry is *not* in the LRU list 31 // (refs >= 1 && in_cache == true) 32 // 2. Not referenced externally AND in hash table. 33 // In that case the entry is in the LRU list and can be freed. 34 // (refs == 0 && in_cache == true) 35 // 3. Referenced externally AND not in hash table. 36 // In that case the entry is not in the LRU list and not in hash table. 37 // The entry can be freed when refs becomes 0. 38 // (refs >= 1 && in_cache == false) 39 // 40 // All newly created LRUHandles are in state 1. If you call 41 // LRUCacheShard::Release on entry in state 1, it will go into state 2. 42 // To move from state 1 to state 3, either call LRUCacheShard::Erase or 43 // LRUCacheShard::Insert with the same key (but possibly different value). 44 // To move from state 2 to state 1, use LRUCacheShard::Lookup. 45 // Before destruction, make sure that no handles are in state 1. This means 46 // that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a 47 // matching LRUCache::Release (to move into state 2) or LRUCacheShard::Erase 48 // (to move into state 3). 49 50 struct LRUHandle { 51 void* value; 52 void (*deleter)(const Slice&, void* value); 53 LRUHandle* next_hash; 54 LRUHandle* next; 55 LRUHandle* prev; 56 size_t charge; // TODO(opt): Only allow uint32_t? 57 size_t key_length; 58 // The hash of key(). Used for fast sharding and comparisons. 59 uint32_t hash; 60 // The number of external refs to this entry. The cache itself is not counted. 61 uint32_t refs; 62 63 enum Flags : uint8_t { 64 // Whether this entry is referenced by the hash table. 65 IN_CACHE = (1 << 0), 66 // Whether this entry is high priority entry. 67 IS_HIGH_PRI = (1 << 1), 68 // Whether this entry is in high-pri pool. 69 IN_HIGH_PRI_POOL = (1 << 2), 70 // Wwhether this entry has had any lookups (hits). 71 HAS_HIT = (1 << 3), 72 }; 73 74 uint8_t flags; 75 76 // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) 77 char key_data[1]; 78 79 Slice key() const { return Slice(key_data, key_length); } 80 81 // Increase the reference count by 1. 82 void Ref() { refs++; } 83 84 // Just reduce the reference count by 1. Return true if it was last reference. 85 bool Unref() { 86 assert(refs > 0); 87 refs--; 88 return refs == 0; 89 } 90 91 // Return true if there are external refs, false otherwise. 92 bool HasRefs() const { return refs > 0; } 93 94 bool InCache() const { return flags & IN_CACHE; } 95 bool IsHighPri() const { return flags & IS_HIGH_PRI; } 96 bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; } 97 bool HasHit() const { return flags & HAS_HIT; } 98 99 void SetInCache(bool in_cache) { 100 if (in_cache) { 101 flags |= IN_CACHE; 102 } else { 103 flags &= ~IN_CACHE; 104 } 105 } 106 107 void SetPriority(Cache::Priority priority) { 108 if (priority == Cache::Priority::HIGH) { 109 flags |= IS_HIGH_PRI; 110 } else { 111 flags &= ~IS_HIGH_PRI; 112 } 113 } 114 115 void SetInHighPriPool(bool in_high_pri_pool) { 116 if (in_high_pri_pool) { 117 flags |= IN_HIGH_PRI_POOL; 118 } else { 119 flags &= ~IN_HIGH_PRI_POOL; 120 } 121 } 122 123 void SetHit() { flags |= HAS_HIT; } 124 125 void Free() { 126 assert(refs == 0); 127 if (deleter) { 128 (*deleter)(key(), value); 129 } 130 delete[] reinterpret_cast<char*>(this); 131 } 132 133 // Caclculate the memory usage by metadata 134 inline size_t CalcTotalCharge( 135 CacheMetadataChargePolicy metadata_charge_policy) { 136 size_t meta_charge = 0; 137 if (metadata_charge_policy == kFullChargeCacheMetadata) { 138 #ifdef ROCKSDB_MALLOC_USABLE_SIZE 139 meta_charge += malloc_usable_size(static_cast<void*>(this)); 140 #else 141 // This is the size that is used when a new handle is created 142 meta_charge += sizeof(LRUHandle) - 1 + key_length; 143 #endif 144 } 145 return charge + meta_charge; 146 } 147 }; 148 149 // We provide our own simple hash table since it removes a whole bunch 150 // of porting hacks and is also faster than some of the built-in hash 151 // table implementations in some of the compiler/runtime combinations 152 // we have tested. E.g., readrandom speeds up by ~5% over the g++ 153 // 4.4.3's builtin hashtable. 154 class LRUHandleTable { 155 public: 156 LRUHandleTable(); 157 ~LRUHandleTable(); 158 159 LRUHandle* Lookup(const Slice& key, uint32_t hash); 160 LRUHandle* Insert(LRUHandle* h); 161 LRUHandle* Remove(const Slice& key, uint32_t hash); 162 163 template <typename T> 164 void ApplyToAllCacheEntries(T func) { 165 for (uint32_t i = 0; i < length_; i++) { 166 LRUHandle* h = list_[i]; 167 while (h != nullptr) { 168 auto n = h->next_hash; 169 assert(h->InCache()); 170 func(h); 171 h = n; 172 } 173 } 174 } 175 176 private: 177 // Return a pointer to slot that points to a cache entry that 178 // matches key/hash. If there is no such cache entry, return a 179 // pointer to the trailing slot in the corresponding linked list. 180 LRUHandle** FindPointer(const Slice& key, uint32_t hash); 181 182 void Resize(); 183 184 // The table consists of an array of buckets where each bucket is 185 // a linked list of cache entries that hash into the bucket. 186 LRUHandle** list_; 187 uint32_t length_; 188 uint32_t elems_; 189 }; 190 191 // A single shard of sharded cache. 192 class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { 193 public: 194 LRUCacheShard(size_t capacity, bool strict_capacity_limit, 195 double high_pri_pool_ratio, bool use_adaptive_mutex, 196 CacheMetadataChargePolicy metadata_charge_policy); 197 virtual ~LRUCacheShard() override = default; 198 199 // Separate from constructor so caller can easily make an array of LRUCache 200 // if current usage is more than new capacity, the function will attempt to 201 // free the needed space 202 virtual void SetCapacity(size_t capacity) override; 203 204 // Set the flag to reject insertion if cache if full. 205 virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override; 206 207 // Set percentage of capacity reserved for high-pri cache entries. 208 void SetHighPriorityPoolRatio(double high_pri_pool_ratio); 209 210 // Like Cache methods, but with an extra "hash" parameter. 211 virtual Status Insert(const Slice& key, uint32_t hash, void* value, 212 size_t charge, 213 void (*deleter)(const Slice& key, void* value), 214 Cache::Handle** handle, 215 Cache::Priority priority) override; 216 virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; 217 virtual bool Ref(Cache::Handle* handle) override; 218 virtual bool Release(Cache::Handle* handle, 219 bool force_erase = false) override; 220 virtual void Erase(const Slice& key, uint32_t hash) override; 221 222 // Although in some platforms the update of size_t is atomic, to make sure 223 // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll 224 // protect them with mutex_. 225 226 virtual size_t GetUsage() const override; 227 virtual size_t GetPinnedUsage() const override; 228 229 virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), 230 bool thread_safe) override; 231 232 virtual void EraseUnRefEntries() override; 233 234 virtual std::string GetPrintableOptions() const override; 235 236 void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri); 237 238 // Retrieves number of elements in LRU, for unit test purpose only 239 // not threadsafe 240 size_t TEST_GetLRUSize(); 241 242 // Retrives high pri pool ratio 243 double GetHighPriPoolRatio(); 244 245 private: 246 void LRU_Remove(LRUHandle* e); 247 void LRU_Insert(LRUHandle* e); 248 249 // Overflow the last entry in high-pri pool to low-pri pool until size of 250 // high-pri pool is no larger than the size specify by high_pri_pool_pct. 251 void MaintainPoolSize(); 252 253 // Free some space following strict LRU policy until enough space 254 // to hold (usage_ + charge) is freed or the lru list is empty 255 // This function is not thread safe - it needs to be executed while 256 // holding the mutex_ 257 void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted); 258 259 // Initialized before use. 260 size_t capacity_; 261 262 // Memory size for entries in high-pri pool. 263 size_t high_pri_pool_usage_; 264 265 // Whether to reject insertion if cache reaches its full capacity. 266 bool strict_capacity_limit_; 267 268 // Ratio of capacity reserved for high priority cache entries. 269 double high_pri_pool_ratio_; 270 271 // High-pri pool size, equals to capacity * high_pri_pool_ratio. 272 // Remember the value to avoid recomputing each time. 273 double high_pri_pool_capacity_; 274 275 // Dummy head of LRU list. 276 // lru.prev is newest entry, lru.next is oldest entry. 277 // LRU contains items which can be evicted, ie reference only by cache 278 LRUHandle lru_; 279 280 // Pointer to head of low-pri pool in LRU list. 281 LRUHandle* lru_low_pri_; 282 283 // ------------^^^^^^^^^^^^^----------- 284 // Not frequently modified data members 285 // ------------------------------------ 286 // 287 // We separate data members that are updated frequently from the ones that 288 // are not frequently updated so that they don't share the same cache line 289 // which will lead into false cache sharing 290 // 291 // ------------------------------------ 292 // Frequently modified data members 293 // ------------vvvvvvvvvvvvv----------- 294 LRUHandleTable table_; 295 296 // Memory size for entries residing in the cache 297 size_t usage_; 298 299 // Memory size for entries residing only in the LRU list 300 size_t lru_usage_; 301 302 // mutex_ protects the following state. 303 // We don't count mutex_ as the cache's internal state so semantically we 304 // don't mind mutex_ invoking the non-const actions. 305 mutable port::Mutex mutex_; 306 }; 307 308 class LRUCache 309 #ifdef NDEBUG 310 final 311 #endif 312 : public ShardedCache { 313 public: 314 LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, 315 double high_pri_pool_ratio, 316 std::shared_ptr<MemoryAllocator> memory_allocator = nullptr, 317 bool use_adaptive_mutex = kDefaultToAdaptiveMutex, 318 CacheMetadataChargePolicy metadata_charge_policy = 319 kDontChargeCacheMetadata); 320 virtual ~LRUCache(); 321 virtual const char* Name() const override { return "LRUCache"; } 322 virtual CacheShard* GetShard(int shard) override; 323 virtual const CacheShard* GetShard(int shard) const override; 324 virtual void* Value(Handle* handle) override; 325 virtual size_t GetCharge(Handle* handle) const override; 326 virtual uint32_t GetHash(Handle* handle) const override; 327 virtual void DisownData() override; 328 329 // Retrieves number of elements in LRU, for unit test purpose only 330 size_t TEST_GetLRUSize(); 331 // Retrives high pri pool ratio 332 double GetHighPriPoolRatio(); 333 334 private: 335 LRUCacheShard* shards_ = nullptr; 336 int num_shards_ = 0; 337 }; 338 339 } // namespace ROCKSDB_NAMESPACE 340