1 // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 #pragma once 7 8 #ifndef ROCKSDB_LITE 9 10 #include <limits> 11 #include <list> 12 #include <map> 13 #include <string> 14 #include <vector> 15 16 #include "monitoring/histogram.h" 17 #include "rocksdb/env.h" 18 #include "rocksdb/persistent_cache.h" 19 #include "rocksdb/status.h" 20 #include "rocksdb/system_clock.h" 21 22 // Persistent Cache 23 // 24 // Persistent cache is tiered key-value cache that can use persistent medium. It 25 // is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM. 26 // The code has been kept generic but significant benchmark/design/development 27 // time has been spent to make sure the cache performs appropriately for 28 // respective storage medium. 29 // The file defines 30 // PersistentCacheTier : Implementation that handles individual cache tier 31 // PersistentTieresCache : Implementation that handles all tiers as a logical 32 // unit 33 // 34 // PersistentTieredCache architecture: 35 // +--------------------------+ PersistentCacheTier that handles multiple tiers 36 // | +----------------+ | 37 // | | RAM | PersistentCacheTier that handles RAM (VolatileCacheImpl) 38 // | +----------------+ | 39 // | | next | 40 // | v | 41 // | +----------------+ | 42 // | | NVM | PersistentCacheTier implementation that handles NVM 43 // | +----------------+ (BlockCacheImpl) 44 // | | next | 45 // | V | 46 // | +----------------+ | 47 // | | LE-SSD | PersistentCacheTier implementation that handles LE-SSD 48 // | +----------------+ (BlockCacheImpl) 49 // | | | 50 // | V | 51 // | null | 52 // +--------------------------+ 53 // | 54 // V 55 // null 56 namespace ROCKSDB_NAMESPACE { 57 58 // Persistent Cache Config 59 // 60 // This struct captures all the options that are used to configure persistent 61 // cache. Some of the terminologies used in naming the options are 62 // 63 // dispatch size : 64 // This is the size in which IO is dispatched to the device 65 // 66 // write buffer size : 67 // This is the size of an individual write buffer size. Write buffers are 68 // grouped to form buffered file. 69 // 70 // cache size : 71 // This is the logical maximum for the cache size 72 // 73 // qdepth : 74 // This is the max number of IOs that can issues to the device in parallel 75 // 76 // pepeling : 77 // The writer code path follows pipelined architecture, which means the 78 // operations are handed off from one stage to another 79 // 80 // pipelining backlog size : 81 // With the pipelined architecture, there can always be backlogging of ops in 82 // pipeline queues. This is the maximum backlog size after which ops are dropped 83 // from queue 84 struct PersistentCacheConfig { 85 explicit PersistentCacheConfig( 86 Env* const _env, const std::string& _path, const uint64_t _cache_size, 87 const std::shared_ptr<Logger>& _log, 88 const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) { 89 env = _env; 90 clock = (env != nullptr) ? env->GetSystemClock().get() 91 : SystemClock::Default().get(); 92 path = _path; 93 log = _log; 94 cache_size = _cache_size; 95 writer_dispatch_size = write_buffer_size = _write_buffer_size; 96 } 97 98 // 99 // Validate the settings. Our intentions are to catch erroneous settings ahead 100 // of time instead going violating invariants or causing dead locks. 101 // ValidateSettingsPersistentCacheConfig102 Status ValidateSettings() const { 103 // (1) check pre-conditions for variables 104 if (!env || path.empty()) { 105 return Status::InvalidArgument("empty or null args"); 106 } 107 108 // (2) assert size related invariants 109 // - cache size cannot be less than cache file size 110 // - individual write buffer size cannot be greater than cache file size 111 // - total write buffer size cannot be less than 2X cache file size 112 if (cache_size < cache_file_size || write_buffer_size >= cache_file_size || 113 write_buffer_size * write_buffer_count() < 2 * cache_file_size) { 114 return Status::InvalidArgument("invalid cache size"); 115 } 116 117 // (2) check writer settings 118 // - Queue depth cannot be 0 119 // - writer_dispatch_size cannot be greater than writer_buffer_size 120 // - dispatch size and buffer size need to be aligned 121 if (!writer_qdepth || writer_dispatch_size > write_buffer_size || 122 write_buffer_size % writer_dispatch_size) { 123 return Status::InvalidArgument("invalid writer settings"); 124 } 125 126 return Status::OK(); 127 } 128 129 // 130 // Env abstraction to use for system level operations 131 // 132 Env* env; 133 SystemClock* clock; 134 // 135 // Path for the block cache where blocks are persisted 136 // 137 std::string path; 138 139 // 140 // Log handle for logging messages 141 // 142 std::shared_ptr<Logger> log; 143 144 // 145 // Enable direct IO for reading 146 // 147 bool enable_direct_reads = true; 148 149 // 150 // Enable direct IO for writing 151 // 152 bool enable_direct_writes = false; 153 154 // 155 // Logical cache size 156 // 157 uint64_t cache_size = std::numeric_limits<uint64_t>::max(); 158 159 // cache-file-size 160 // 161 // Cache consists of multiples of small files. This parameter defines the 162 // size of an individual cache file 163 // 164 // default: 1M 165 uint32_t cache_file_size = 100ULL * 1024 * 1024; 166 167 // writer-qdepth 168 // 169 // The writers can issues IO to the devices in parallel. This parameter 170 // controls the max number if IOs that can issues in parallel to the block 171 // device 172 // 173 // default :1 174 uint32_t writer_qdepth = 1; 175 176 // pipeline-writes 177 // 178 // The write optionally follow pipelined architecture. This helps 179 // avoid regression in the eviction code path of the primary tier. This 180 // parameter defines if pipelining is enabled or disabled 181 // 182 // default: true 183 bool pipeline_writes = true; 184 185 // max-write-pipeline-backlog-size 186 // 187 // Max pipeline buffer size. This is the maximum backlog we can accumulate 188 // while waiting for writes. After the limit, new ops will be dropped. 189 // 190 // Default: 1GiB 191 uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024; 192 193 // write-buffer-size 194 // 195 // This is the size in which buffer slabs are allocated. 196 // 197 // Default: 1M 198 uint32_t write_buffer_size = 1ULL * 1024 * 1024; 199 200 // write-buffer-count 201 // 202 // This is the total number of buffer slabs. This is calculated as a factor of 203 // file size in order to avoid dead lock. write_buffer_countPersistentCacheConfig204 size_t write_buffer_count() const { 205 assert(write_buffer_size); 206 return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size / 207 write_buffer_size); 208 } 209 210 // writer-dispatch-size 211 // 212 // The writer thread will dispatch the IO at the specified IO size 213 // 214 // default: 1M 215 uint64_t writer_dispatch_size = 1ULL * 1024 * 1024; 216 217 // is_compressed 218 // 219 // This option determines if the cache will run in compressed mode or 220 // uncompressed mode 221 bool is_compressed = true; 222 223 PersistentCacheConfig MakePersistentCacheConfig( 224 const std::string& path, const uint64_t size, 225 const std::shared_ptr<Logger>& log); 226 227 std::string ToString() const; 228 }; 229 230 // Persistent Cache Tier 231 // 232 // This a logical abstraction that defines a tier of the persistent cache. Tiers 233 // can be stacked over one another. PersistentCahe provides the basic definition 234 // for accessing/storing in the cache. PersistentCacheTier extends the interface 235 // to enable management and stacking of tiers. 236 class PersistentCacheTier : public PersistentCache { 237 public: 238 typedef std::shared_ptr<PersistentCacheTier> Tier; 239 ~PersistentCacheTier()240 virtual ~PersistentCacheTier() {} 241 242 // Open the persistent cache tier 243 virtual Status Open(); 244 245 // Close the persistent cache tier 246 virtual Status Close(); 247 248 // Reserve space up to 'size' bytes 249 virtual bool Reserve(const size_t size); 250 251 // Erase a key from the cache 252 virtual bool Erase(const Slice& key); 253 254 // Print stats to string recursively 255 virtual std::string PrintStats(); 256 257 virtual PersistentCache::StatsType Stats() override; 258 259 // Insert to page cache 260 virtual Status Insert(const Slice& page_key, const char* data, 261 const size_t size) override = 0; 262 263 // Lookup page cache by page identifier 264 virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data, 265 size_t* size) override = 0; 266 267 // Does it store compressed data ? 268 virtual bool IsCompressed() override = 0; 269 270 virtual std::string GetPrintableOptions() const override = 0; 271 272 virtual uint64_t NewId() override; 273 274 // Return a reference to next tier next_tier()275 virtual Tier& next_tier() { return next_tier_; } 276 277 // Set the value for next tier set_next_tier(const Tier & tier)278 virtual void set_next_tier(const Tier& tier) { 279 assert(!next_tier_); 280 next_tier_ = tier; 281 } 282 TEST_Flush()283 virtual void TEST_Flush() { 284 if (next_tier_) { 285 next_tier_->TEST_Flush(); 286 } 287 } 288 289 private: 290 Tier next_tier_; // next tier 291 std::atomic<uint64_t> last_id_{1}; 292 }; 293 294 // PersistentTieredCache 295 // 296 // Abstraction that helps you construct a tiers of persistent caches as a 297 // unified cache. The tier(s) of cache will act a single tier for management 298 // ease and support PersistentCache methods for accessing data. 299 class PersistentTieredCache : public PersistentCacheTier { 300 public: 301 virtual ~PersistentTieredCache(); 302 303 Status Open() override; 304 Status Close() override; 305 bool Erase(const Slice& key) override; 306 std::string PrintStats() override; 307 PersistentCache::StatsType Stats() override; 308 Status Insert(const Slice& page_key, const char* data, 309 const size_t size) override; 310 Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data, 311 size_t* size) override; 312 bool IsCompressed() override; 313 GetPrintableOptions()314 std::string GetPrintableOptions() const override { 315 return "PersistentTieredCache"; 316 } 317 318 void AddTier(const Tier& tier); 319 next_tier()320 Tier& next_tier() override { 321 auto it = tiers_.end(); 322 return (*it)->next_tier(); 323 } 324 set_next_tier(const Tier & tier)325 void set_next_tier(const Tier& tier) override { 326 auto it = tiers_.end(); 327 (*it)->set_next_tier(tier); 328 } 329 TEST_Flush()330 void TEST_Flush() override { 331 assert(!tiers_.empty()); 332 tiers_.front()->TEST_Flush(); 333 PersistentCacheTier::TEST_Flush(); 334 } 335 336 protected: 337 std::list<Tier> tiers_; // list of tiers top-down 338 }; 339 340 } // namespace ROCKSDB_NAMESPACE 341 342 #endif 343