1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10 
11 #include <memory>
12 #include <string>
13 
14 #include "cache/sharded_cache.h"
15 #include "port/lang.h"
16 #include "port/malloc.h"
17 #include "port/port.h"
18 #include "rocksdb/secondary_cache.h"
19 #include "util/autovector.h"
20 
21 namespace ROCKSDB_NAMESPACE {
22 
23 // LRU cache implementation. This class is not thread-safe.
24 
25 // An entry is a variable length heap-allocated structure.
26 // Entries are referenced by cache and/or by any external entity.
27 // The cache keeps all its entries in a hash table. Some elements
28 // are also stored on LRU list.
29 //
30 // LRUHandle can be in these states:
31 // 1. Referenced externally AND in hash table.
32 //    In that case the entry is *not* in the LRU list
33 //    (refs >= 1 && in_cache == true)
34 // 2. Not referenced externally AND in hash table.
35 //    In that case the entry is in the LRU list and can be freed.
36 //    (refs == 0 && in_cache == true)
37 // 3. Referenced externally AND not in hash table.
38 //    In that case the entry is not in the LRU list and not in hash table.
39 //    The entry can be freed when refs becomes 0.
40 //    (refs >= 1 && in_cache == false)
41 //
42 // All newly created LRUHandles are in state 1. If you call
43 // LRUCacheShard::Release on entry in state 1, it will go into state 2.
44 // To move from state 1 to state 3, either call LRUCacheShard::Erase or
45 // LRUCacheShard::Insert with the same key (but possibly different value).
46 // To move from state 2 to state 1, use LRUCacheShard::Lookup.
47 // Before destruction, make sure that no handles are in state 1. This means
48 // that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a
49 // matching LRUCache::Release (to move into state 2) or LRUCacheShard::Erase
50 // (to move into state 3).
51 
52 struct LRUHandle {
53   void* value;
54   union Info {
Info()55     Info() {}
~Info()56     ~Info() {}
57     Cache::DeleterFn deleter;
58     const ShardedCache::CacheItemHelper* helper;
59   } info_;
60   // An entry is not added to the LRUHandleTable until the secondary cache
61   // lookup is complete, so its safe to have this union.
62   union {
63     LRUHandle* next_hash;
64     SecondaryCacheResultHandle* sec_handle;
65   };
66   LRUHandle* next;
67   LRUHandle* prev;
68   size_t charge;  // TODO(opt): Only allow uint32_t?
69   size_t key_length;
70   // The hash of key(). Used for fast sharding and comparisons.
71   uint32_t hash;
72   // The number of external refs to this entry. The cache itself is not counted.
73   uint32_t refs;
74 
75   enum Flags : uint8_t {
76     // Whether this entry is referenced by the hash table.
77     IN_CACHE = (1 << 0),
78     // Whether this entry is high priority entry.
79     IS_HIGH_PRI = (1 << 1),
80     // Whether this entry is in high-pri pool.
81     IN_HIGH_PRI_POOL = (1 << 2),
82     // Whether this entry has had any lookups (hits).
83     HAS_HIT = (1 << 3),
84     // Can this be inserted into the secondary cache
85     IS_SECONDARY_CACHE_COMPATIBLE = (1 << 4),
86     // Is the handle still being read from a lower tier
87     IS_PENDING = (1 << 5),
88     // Has the item been promoted from a lower tier
89     IS_PROMOTED = (1 << 6),
90   };
91 
92   uint8_t flags;
93 
94 #ifdef __SANITIZE_THREAD__
95   // TSAN can report a false data race on flags, where one thread is writing
96   // to one of the mutable bits and another thread is reading this immutable
97   // bit. So precisely suppress that TSAN warning, we separate out this bit
98   // during TSAN runs.
99   bool is_secondary_cache_compatible_for_tsan;
100 #endif  // __SANITIZE_THREAD__
101 
102   // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
103   char key_data[1];
104 
keyLRUHandle105   Slice key() const { return Slice(key_data, key_length); }
106 
107   // Increase the reference count by 1.
RefLRUHandle108   void Ref() { refs++; }
109 
110   // Just reduce the reference count by 1. Return true if it was last reference.
UnrefLRUHandle111   bool Unref() {
112     assert(refs > 0);
113     refs--;
114     return refs == 0;
115   }
116 
117   // Return true if there are external refs, false otherwise.
HasRefsLRUHandle118   bool HasRefs() const { return refs > 0; }
119 
InCacheLRUHandle120   bool InCache() const { return flags & IN_CACHE; }
IsHighPriLRUHandle121   bool IsHighPri() const { return flags & IS_HIGH_PRI; }
InHighPriPoolLRUHandle122   bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; }
HasHitLRUHandle123   bool HasHit() const { return flags & HAS_HIT; }
IsSecondaryCacheCompatibleLRUHandle124   bool IsSecondaryCacheCompatible() const {
125 #ifdef __SANITIZE_THREAD__
126     return is_secondary_cache_compatible_for_tsan;
127 #else
128     return flags & IS_SECONDARY_CACHE_COMPATIBLE;
129 #endif  // __SANITIZE_THREAD__
130   }
IsPendingLRUHandle131   bool IsPending() const { return flags & IS_PENDING; }
IsPromotedLRUHandle132   bool IsPromoted() const { return flags & IS_PROMOTED; }
133 
SetInCacheLRUHandle134   void SetInCache(bool in_cache) {
135     if (in_cache) {
136       flags |= IN_CACHE;
137     } else {
138       flags &= ~IN_CACHE;
139     }
140   }
141 
SetPriorityLRUHandle142   void SetPriority(Cache::Priority priority) {
143     if (priority == Cache::Priority::HIGH) {
144       flags |= IS_HIGH_PRI;
145     } else {
146       flags &= ~IS_HIGH_PRI;
147     }
148   }
149 
SetInHighPriPoolLRUHandle150   void SetInHighPriPool(bool in_high_pri_pool) {
151     if (in_high_pri_pool) {
152       flags |= IN_HIGH_PRI_POOL;
153     } else {
154       flags &= ~IN_HIGH_PRI_POOL;
155     }
156   }
157 
SetHitLRUHandle158   void SetHit() { flags |= HAS_HIT; }
159 
SetSecondaryCacheCompatibleLRUHandle160   void SetSecondaryCacheCompatible(bool compat) {
161     if (compat) {
162       flags |= IS_SECONDARY_CACHE_COMPATIBLE;
163     } else {
164       flags &= ~IS_SECONDARY_CACHE_COMPATIBLE;
165     }
166 #ifdef __SANITIZE_THREAD__
167     is_secondary_cache_compatible_for_tsan = compat;
168 #endif  // __SANITIZE_THREAD__
169   }
170 
SetIncompleteLRUHandle171   void SetIncomplete(bool incomp) {
172     if (incomp) {
173       flags |= IS_PENDING;
174     } else {
175       flags &= ~IS_PENDING;
176     }
177   }
178 
SetPromotedLRUHandle179   void SetPromoted(bool promoted) {
180     if (promoted) {
181       flags |= IS_PROMOTED;
182     } else {
183       flags &= ~IS_PROMOTED;
184     }
185   }
186 
FreeLRUHandle187   void Free() {
188     assert(refs == 0);
189 #ifdef __SANITIZE_THREAD__
190     // Here we can safely assert they are the same without a data race reported
191     assert(((flags & IS_SECONDARY_CACHE_COMPATIBLE) != 0) ==
192            is_secondary_cache_compatible_for_tsan);
193 #endif  // __SANITIZE_THREAD__
194     if (!IsSecondaryCacheCompatible() && info_.deleter) {
195       (*info_.deleter)(key(), value);
196     } else if (IsSecondaryCacheCompatible()) {
197       if (IsPending()) {
198         assert(sec_handle != nullptr);
199         SecondaryCacheResultHandle* tmp_sec_handle = sec_handle;
200         tmp_sec_handle->Wait();
201         value = tmp_sec_handle->Value();
202         delete tmp_sec_handle;
203       }
204       if (value) {
205         (*info_.helper->del_cb)(key(), value);
206       }
207     }
208     delete[] reinterpret_cast<char*>(this);
209   }
210 
211   // Calculate the memory usage by metadata
CalcTotalChargeLRUHandle212   inline size_t CalcTotalCharge(
213       CacheMetadataChargePolicy metadata_charge_policy) {
214     size_t meta_charge = 0;
215     if (metadata_charge_policy == kFullChargeCacheMetadata) {
216 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
217       meta_charge += malloc_usable_size(static_cast<void*>(this));
218 #else
219       // This is the size that is used when a new handle is created
220       meta_charge += sizeof(LRUHandle) - 1 + key_length;
221 #endif
222     }
223     return charge + meta_charge;
224   }
225 };
226 
227 // We provide our own simple hash table since it removes a whole bunch
228 // of porting hacks and is also faster than some of the built-in hash
229 // table implementations in some of the compiler/runtime combinations
230 // we have tested.  E.g., readrandom speeds up by ~5% over the g++
231 // 4.4.3's builtin hashtable.
232 class LRUHandleTable {
233  public:
234   // If the table uses more hash bits than `max_upper_hash_bits`,
235   // it will eat into the bits used for sharding, which are constant
236   // for a given LRUHandleTable.
237   explicit LRUHandleTable(int max_upper_hash_bits);
238   ~LRUHandleTable();
239 
240   LRUHandle* Lookup(const Slice& key, uint32_t hash);
241   LRUHandle* Insert(LRUHandle* h);
242   LRUHandle* Remove(const Slice& key, uint32_t hash);
243 
244   template <typename T>
ApplyToEntriesRange(T func,uint32_t index_begin,uint32_t index_end)245   void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
246     for (uint32_t i = index_begin; i < index_end; i++) {
247       LRUHandle* h = list_[i];
248       while (h != nullptr) {
249         auto n = h->next_hash;
250         assert(h->InCache());
251         func(h);
252         h = n;
253       }
254     }
255   }
256 
GetLengthBits()257   int GetLengthBits() const { return length_bits_; }
258 
259  private:
260   // Return a pointer to slot that points to a cache entry that
261   // matches key/hash.  If there is no such cache entry, return a
262   // pointer to the trailing slot in the corresponding linked list.
263   LRUHandle** FindPointer(const Slice& key, uint32_t hash);
264 
265   void Resize();
266 
267   // Number of hash bits (upper because lower bits used for sharding)
268   // used for table index. Length == 1 << length_bits_
269   int length_bits_;
270 
271   // The table consists of an array of buckets where each bucket is
272   // a linked list of cache entries that hash into the bucket.
273   std::unique_ptr<LRUHandle*[]> list_;
274 
275   // Number of elements currently in the table
276   uint32_t elems_;
277 
278   // Set from max_upper_hash_bits (see constructor)
279   const int max_length_bits_;
280 };
281 
282 // A single shard of sharded cache.
ALIGN_AS(CACHE_LINE_SIZE)283 class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
284  public:
285   LRUCacheShard(size_t capacity, bool strict_capacity_limit,
286                 double high_pri_pool_ratio, bool use_adaptive_mutex,
287                 CacheMetadataChargePolicy metadata_charge_policy,
288                 int max_upper_hash_bits,
289                 const std::shared_ptr<SecondaryCache>& secondary_cache);
290   virtual ~LRUCacheShard() override = default;
291 
292   // Separate from constructor so caller can easily make an array of LRUCache
293   // if current usage is more than new capacity, the function will attempt to
294   // free the needed space
295   virtual void SetCapacity(size_t capacity) override;
296 
297   // Set the flag to reject insertion if cache if full.
298   virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
299 
300   // Set percentage of capacity reserved for high-pri cache entries.
301   void SetHighPriorityPoolRatio(double high_pri_pool_ratio);
302 
303   // Like Cache methods, but with an extra "hash" parameter.
304   virtual Status Insert(const Slice& key, uint32_t hash, void* value,
305                         size_t charge, Cache::DeleterFn deleter,
306                         Cache::Handle** handle,
307                         Cache::Priority priority) override {
308     return Insert(key, hash, value, charge, deleter, nullptr, handle, priority);
309   }
310   virtual Status Insert(const Slice& key, uint32_t hash, void* value,
311                         const Cache::CacheItemHelper* helper, size_t charge,
312                         Cache::Handle** handle,
313                         Cache::Priority priority) override {
314     assert(helper);
315     return Insert(key, hash, value, charge, nullptr, helper, handle, priority);
316   }
317   // If helper_cb is null, the values of the following arguments don't
318   // matter
319   virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash,
320                                 const ShardedCache::CacheItemHelper* helper,
321                                 const ShardedCache::CreateCallback& create_cb,
322                                 ShardedCache::Priority priority, bool wait,
323                                 Statistics* stats) override;
324   virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override {
325     return Lookup(key, hash, nullptr, nullptr, Cache::Priority::LOW, true,
326                   nullptr);
327   }
328   virtual bool Release(Cache::Handle* handle, bool /*useful*/,
329                        bool force_erase) override {
330     return Release(handle, force_erase);
331   }
332   virtual bool IsReady(Cache::Handle* /*handle*/) override;
333   virtual void Wait(Cache::Handle* /*handle*/) override {}
334   virtual bool Ref(Cache::Handle* handle) override;
335   virtual bool Release(Cache::Handle* handle,
336                        bool force_erase = false) override;
337   virtual void Erase(const Slice& key, uint32_t hash) override;
338 
339   // Although in some platforms the update of size_t is atomic, to make sure
340   // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
341   // protect them with mutex_.
342 
343   virtual size_t GetUsage() const override;
344   virtual size_t GetPinnedUsage() const override;
345 
346   virtual void ApplyToSomeEntries(
347       const std::function<void(const Slice& key, void* value, size_t charge,
348                                DeleterFn deleter)>& callback,
349       uint32_t average_entries_per_lock, uint32_t* state) override;
350 
351   virtual void EraseUnRefEntries() override;
352 
353   virtual std::string GetPrintableOptions() const override;
354 
355   void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri);
356 
357   //  Retrieves number of elements in LRU, for unit test purpose only
358   //  not threadsafe
359   size_t TEST_GetLRUSize();
360 
361   //  Retrieves high pri pool ratio
362   double GetHighPriPoolRatio();
363 
364  private:
365   friend class LRUCache;
366   // Insert an item into the hash table and, if handle is null, insert into
367   // the LRU list. Older items are evicted as necessary. If the cache is full
368   // and free_handle_on_fail is true, the item is deleted and handle is set to.
369   Status InsertItem(LRUHandle* item, Cache::Handle** handle,
370                     bool free_handle_on_fail);
371   Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
372                 DeleterFn deleter, const Cache::CacheItemHelper* helper,
373                 Cache::Handle** handle, Cache::Priority priority);
374   // Promote an item looked up from the secondary cache to the LRU cache. The
375   // item is only inserted into the hash table and not the LRU list, and only
376   // if the cache is not at full capacity, as is the case during Insert.  The
377   // caller should hold a reference on the LRUHandle. When the caller releases
378   // the last reference, the item is added to the LRU list.
379   // The item is promoted to the high pri or low pri pool as specified by the
380   // caller in Lookup.
381   void Promote(LRUHandle* e);
382   void LRU_Remove(LRUHandle* e);
383   void LRU_Insert(LRUHandle* e);
384 
385   // Overflow the last entry in high-pri pool to low-pri pool until size of
386   // high-pri pool is no larger than the size specify by high_pri_pool_pct.
387   void MaintainPoolSize();
388 
389   // Free some space following strict LRU policy until enough space
390   // to hold (usage_ + charge) is freed or the lru list is empty
391   // This function is not thread safe - it needs to be executed while
392   // holding the mutex_
393   void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
394 
395   // Initialized before use.
396   size_t capacity_;
397 
398   // Memory size for entries in high-pri pool.
399   size_t high_pri_pool_usage_;
400 
401   // Whether to reject insertion if cache reaches its full capacity.
402   bool strict_capacity_limit_;
403 
404   // Ratio of capacity reserved for high priority cache entries.
405   double high_pri_pool_ratio_;
406 
407   // High-pri pool size, equals to capacity * high_pri_pool_ratio.
408   // Remember the value to avoid recomputing each time.
409   double high_pri_pool_capacity_;
410 
411   // Dummy head of LRU list.
412   // lru.prev is newest entry, lru.next is oldest entry.
413   // LRU contains items which can be evicted, ie reference only by cache
414   LRUHandle lru_;
415 
416   // Pointer to head of low-pri pool in LRU list.
417   LRUHandle* lru_low_pri_;
418 
419   // ------------^^^^^^^^^^^^^-----------
420   // Not frequently modified data members
421   // ------------------------------------
422   //
423   // We separate data members that are updated frequently from the ones that
424   // are not frequently updated so that they don't share the same cache line
425   // which will lead into false cache sharing
426   //
427   // ------------------------------------
428   // Frequently modified data members
429   // ------------vvvvvvvvvvvvv-----------
430   LRUHandleTable table_;
431 
432   // Memory size for entries residing in the cache
433   size_t usage_;
434 
435   // Memory size for entries residing only in the LRU list
436   size_t lru_usage_;
437 
438   // mutex_ protects the following state.
439   // We don't count mutex_ as the cache's internal state so semantically we
440   // don't mind mutex_ invoking the non-const actions.
441   mutable port::Mutex mutex_;
442 
443   std::shared_ptr<SecondaryCache> secondary_cache_;
444 };
445 
446 class LRUCache
447 #ifdef NDEBUG
448     final
449 #endif
450     : public ShardedCache {
451  public:
452   LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
453            double high_pri_pool_ratio,
454            std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
455            bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
456            CacheMetadataChargePolicy metadata_charge_policy =
457                kDontChargeCacheMetadata,
458            const std::shared_ptr<SecondaryCache>& secondary_cache = nullptr);
459   virtual ~LRUCache();
Name()460   virtual const char* Name() const override { return "LRUCache"; }
461   virtual CacheShard* GetShard(uint32_t shard) override;
462   virtual const CacheShard* GetShard(uint32_t shard) const override;
463   virtual void* Value(Handle* handle) override;
464   virtual size_t GetCharge(Handle* handle) const override;
465   virtual uint32_t GetHash(Handle* handle) const override;
466   virtual DeleterFn GetDeleter(Handle* handle) const override;
467   virtual void DisownData() override;
468   virtual void WaitAll(std::vector<Handle*>& handles) override;
469 
470   //  Retrieves number of elements in LRU, for unit test purpose only
471   size_t TEST_GetLRUSize();
472   //  Retrieves high pri pool ratio
473   double GetHighPriPoolRatio();
474 
475  private:
476   LRUCacheShard* shards_ = nullptr;
477   int num_shards_ = 0;
478   std::shared_ptr<SecondaryCache> secondary_cache_;
479 };
480 
481 }  // namespace ROCKSDB_NAMESPACE
482