1 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 #pragma once
7 
8 #ifndef ROCKSDB_LITE
9 
10 #include <limits>
11 #include <list>
12 #include <map>
13 #include <string>
14 #include <vector>
15 
16 #include "monitoring/histogram.h"
17 #include "rocksdb/env.h"
18 #include "rocksdb/persistent_cache.h"
19 #include "rocksdb/status.h"
20 #include "rocksdb/system_clock.h"
21 
22 // Persistent Cache
23 //
24 // Persistent cache is tiered key-value cache that can use persistent medium. It
25 // is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM.
26 // The code has been kept generic but significant benchmark/design/development
27 // time has been spent to make sure the cache performs appropriately for
28 // respective storage medium.
29 // The file defines
30 // PersistentCacheTier    : Implementation that handles individual cache tier
31 // PersistentTieresCache  : Implementation that handles all tiers as a logical
32 //                          unit
33 //
34 // PersistentTieredCache architecture:
35 // +--------------------------+ PersistentCacheTier that handles multiple tiers
36 // | +----------------+       |
37 // | | RAM            | PersistentCacheTier that handles RAM (VolatileCacheImpl)
38 // | +----------------+       |
39 // |   | next                 |
40 // |   v                      |
41 // | +----------------+       |
42 // | | NVM            | PersistentCacheTier implementation that handles NVM
43 // | +----------------+ (BlockCacheImpl)
44 // |   | next                 |
45 // |   V                      |
46 // | +----------------+       |
47 // | | LE-SSD         | PersistentCacheTier implementation that handles LE-SSD
48 // | +----------------+ (BlockCacheImpl)
49 // |   |                      |
50 // |   V                      |
51 // |  null                    |
52 // +--------------------------+
53 //               |
54 //               V
55 //              null
56 namespace ROCKSDB_NAMESPACE {
57 
58 // Persistent Cache Config
59 //
60 // This struct captures all the options that are used to configure persistent
61 // cache. Some of the terminologies used in naming the options are
62 //
63 // dispatch size :
64 // This is the size in which IO is dispatched to the device
65 //
66 // write buffer size :
67 // This is the size of an individual write buffer size. Write buffers are
68 // grouped to form buffered file.
69 //
70 // cache size :
71 // This is the logical maximum for the cache size
72 //
73 // qdepth :
74 // This is the max number of IOs that can issues to the device in parallel
75 //
76 // pepeling :
77 // The writer code path follows pipelined architecture, which means the
78 // operations are handed off from one stage to another
79 //
80 // pipelining backlog size :
81 // With the pipelined architecture, there can always be backlogging of ops in
82 // pipeline queues. This is the maximum backlog size after which ops are dropped
83 // from queue
84 struct PersistentCacheConfig {
85   explicit PersistentCacheConfig(
86       Env* const _env, const std::string& _path, const uint64_t _cache_size,
87       const std::shared_ptr<Logger>& _log,
88       const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) {
89     env = _env;
90     clock = (env != nullptr) ? env->GetSystemClock().get()
91                              : SystemClock::Default().get();
92     path = _path;
93     log = _log;
94     cache_size = _cache_size;
95     writer_dispatch_size = write_buffer_size = _write_buffer_size;
96   }
97 
98   //
99   // Validate the settings. Our intentions are to catch erroneous settings ahead
100   // of time instead going violating invariants or causing dead locks.
101   //
ValidateSettingsPersistentCacheConfig102   Status ValidateSettings() const {
103     // (1) check pre-conditions for variables
104     if (!env || path.empty()) {
105       return Status::InvalidArgument("empty or null args");
106     }
107 
108     // (2) assert size related invariants
109     // - cache size cannot be less than cache file size
110     // - individual write buffer size cannot be greater than cache file size
111     // - total write buffer size cannot be less than 2X cache file size
112     if (cache_size < cache_file_size || write_buffer_size >= cache_file_size ||
113         write_buffer_size * write_buffer_count() < 2 * cache_file_size) {
114       return Status::InvalidArgument("invalid cache size");
115     }
116 
117     // (2) check writer settings
118     // - Queue depth cannot be 0
119     // - writer_dispatch_size cannot be greater than writer_buffer_size
120     // - dispatch size and buffer size need to be aligned
121     if (!writer_qdepth || writer_dispatch_size > write_buffer_size ||
122         write_buffer_size % writer_dispatch_size) {
123       return Status::InvalidArgument("invalid writer settings");
124     }
125 
126     return Status::OK();
127   }
128 
129   //
130   // Env abstraction to use for system level operations
131   //
132   Env* env;
133   SystemClock* clock;
134   //
135   // Path for the block cache where blocks are persisted
136   //
137   std::string path;
138 
139   //
140   // Log handle for logging messages
141   //
142   std::shared_ptr<Logger> log;
143 
144   //
145   // Enable direct IO for reading
146   //
147   bool enable_direct_reads = true;
148 
149   //
150   // Enable direct IO for writing
151   //
152   bool enable_direct_writes = false;
153 
154   //
155   // Logical cache size
156   //
157   uint64_t cache_size = std::numeric_limits<uint64_t>::max();
158 
159   // cache-file-size
160   //
161   // Cache consists of multiples of small files. This parameter defines the
162   // size of an individual cache file
163   //
164   // default: 1M
165   uint32_t cache_file_size = 100ULL * 1024 * 1024;
166 
167   // writer-qdepth
168   //
169   // The writers can issues IO to the devices in parallel. This parameter
170   // controls the max number if IOs that can issues in parallel to the block
171   // device
172   //
173   // default :1
174   uint32_t writer_qdepth = 1;
175 
176   // pipeline-writes
177   //
178   // The write optionally follow pipelined architecture. This helps
179   // avoid regression in the eviction code path of the primary tier. This
180   // parameter defines if pipelining is enabled or disabled
181   //
182   // default: true
183   bool pipeline_writes = true;
184 
185   // max-write-pipeline-backlog-size
186   //
187   // Max pipeline buffer size. This is the maximum backlog we can accumulate
188   // while waiting for writes. After the limit, new ops will be dropped.
189   //
190   // Default: 1GiB
191   uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024;
192 
193   // write-buffer-size
194   //
195   // This is the size in which buffer slabs are allocated.
196   //
197   // Default: 1M
198   uint32_t write_buffer_size = 1ULL * 1024 * 1024;
199 
200   // write-buffer-count
201   //
202   // This is the total number of buffer slabs. This is calculated as a factor of
203   // file size in order to avoid dead lock.
write_buffer_countPersistentCacheConfig204   size_t write_buffer_count() const {
205     assert(write_buffer_size);
206     return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size /
207                                write_buffer_size);
208   }
209 
210   // writer-dispatch-size
211   //
212   // The writer thread will dispatch the IO at the specified IO size
213   //
214   // default: 1M
215   uint64_t writer_dispatch_size = 1ULL * 1024 * 1024;
216 
217   // is_compressed
218   //
219   // This option determines if the cache will run in compressed mode or
220   // uncompressed mode
221   bool is_compressed = true;
222 
223   PersistentCacheConfig MakePersistentCacheConfig(
224       const std::string& path, const uint64_t size,
225       const std::shared_ptr<Logger>& log);
226 
227   std::string ToString() const;
228 };
229 
230 // Persistent Cache Tier
231 //
232 // This a logical abstraction that defines a tier of the persistent cache. Tiers
233 // can be stacked over one another. PersistentCahe provides the basic definition
234 // for accessing/storing in the cache. PersistentCacheTier extends the interface
235 // to enable management and stacking of tiers.
236 class PersistentCacheTier : public PersistentCache {
237  public:
238   typedef std::shared_ptr<PersistentCacheTier> Tier;
239 
~PersistentCacheTier()240   virtual ~PersistentCacheTier() {}
241 
242   // Open the persistent cache tier
243   virtual Status Open();
244 
245   // Close the persistent cache tier
246   virtual Status Close();
247 
248   // Reserve space up to 'size' bytes
249   virtual bool Reserve(const size_t size);
250 
251   // Erase a key from the cache
252   virtual bool Erase(const Slice& key);
253 
254   // Print stats to string recursively
255   virtual std::string PrintStats();
256 
257   virtual PersistentCache::StatsType Stats() override;
258 
259   // Insert to page cache
260   virtual Status Insert(const Slice& page_key, const char* data,
261                         const size_t size) override = 0;
262 
263   // Lookup page cache by page identifier
264   virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
265                         size_t* size) override = 0;
266 
267   // Does it store compressed data ?
268   virtual bool IsCompressed() override = 0;
269 
270   virtual std::string GetPrintableOptions() const override = 0;
271 
272   virtual uint64_t NewId() override;
273 
274   // Return a reference to next tier
next_tier()275   virtual Tier& next_tier() { return next_tier_; }
276 
277   // Set the value for next tier
set_next_tier(const Tier & tier)278   virtual void set_next_tier(const Tier& tier) {
279     assert(!next_tier_);
280     next_tier_ = tier;
281   }
282 
TEST_Flush()283   virtual void TEST_Flush() {
284     if (next_tier_) {
285       next_tier_->TEST_Flush();
286     }
287   }
288 
289  private:
290   Tier next_tier_;  // next tier
291   std::atomic<uint64_t> last_id_{1};
292 };
293 
294 // PersistentTieredCache
295 //
296 // Abstraction that helps you construct a tiers of persistent caches as a
297 // unified cache. The tier(s) of cache will act a single tier for management
298 // ease and support PersistentCache methods for accessing data.
299 class PersistentTieredCache : public PersistentCacheTier {
300  public:
301   virtual ~PersistentTieredCache();
302 
303   Status Open() override;
304   Status Close() override;
305   bool Erase(const Slice& key) override;
306   std::string PrintStats() override;
307   PersistentCache::StatsType Stats() override;
308   Status Insert(const Slice& page_key, const char* data,
309                 const size_t size) override;
310   Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
311                 size_t* size) override;
312   bool IsCompressed() override;
313 
GetPrintableOptions()314   std::string GetPrintableOptions() const override {
315     return "PersistentTieredCache";
316   }
317 
318   void AddTier(const Tier& tier);
319 
next_tier()320   Tier& next_tier() override {
321     auto it = tiers_.end();
322     return (*it)->next_tier();
323   }
324 
set_next_tier(const Tier & tier)325   void set_next_tier(const Tier& tier) override {
326     auto it = tiers_.end();
327     (*it)->set_next_tier(tier);
328   }
329 
TEST_Flush()330   void TEST_Flush() override {
331     assert(!tiers_.empty());
332     tiers_.front()->TEST_Flush();
333     PersistentCacheTier::TEST_Flush();
334   }
335 
336  protected:
337   std::list<Tier> tiers_;  // list of tiers top-down
338 };
339 
340 }  // namespace ROCKSDB_NAMESPACE
341 
342 #endif
343