1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #ifndef ROCKSDB_LITE
9 
10 #include <functional>
11 #include <limits>
12 #include <string>
13 #include <vector>
14 
15 #include "rocksdb/db.h"
16 #include "rocksdb/status.h"
17 #include "rocksdb/utilities/stackable_db.h"
18 
19 namespace ROCKSDB_NAMESPACE {
20 
21 namespace blob_db {
22 
23 // A wrapped database which puts values of KV pairs in a separate log
24 // and store location to the log in the underlying DB.
25 //
26 // The factory needs to be moved to include/rocksdb/utilities to allow
27 // users to use blob DB.
28 
29 constexpr uint64_t kNoExpiration = std::numeric_limits<uint64_t>::max();
30 
31 struct BlobDBOptions {
32   // Name of the directory under the base DB where blobs will be stored. Using
33   // a directory where the base DB stores its SST files is not supported.
34   // Default is "blob_dir"
35   std::string blob_dir = "blob_dir";
36 
37   // whether the blob_dir path is relative or absolute.
38   bool path_relative = true;
39 
40   // When max_db_size is reached, evict blob files to free up space
41   // instead of returnning NoSpace error on write. Blob files will be
42   // evicted from oldest to newest, based on file creation time.
43   bool is_fifo = false;
44 
45   // Maximum size of the database (including SST files and blob files).
46   //
47   // Default: 0 (no limits)
48   uint64_t max_db_size = 0;
49 
50   // a new bucket is opened, for ttl_range. So if ttl_range is 600seconds
51   // (10 minutes), and the first bucket starts at 1471542000
52   // then the blob buckets will be
53   // first bucket is 1471542000 - 1471542600
54   // second bucket is 1471542600 - 1471543200
55   // and so on
56   uint64_t ttl_range_secs = 3600;
57 
58   // The smallest value to store in blob log. Values smaller than this threshold
59   // will be inlined in base DB together with the key.
60   uint64_t min_blob_size = 0;
61 
62   // Allows OS to incrementally sync blob files to disk for every
63   // bytes_per_sync bytes written. Users shouldn't rely on it for
64   // persistency guarantee.
65   uint64_t bytes_per_sync = 512 * 1024;
66 
67   // the target size of each blob file. File will become immutable
68   // after it exceeds that size
69   uint64_t blob_file_size = 256 * 1024 * 1024;
70 
71   // what compression to use for Blob's
72   CompressionType compression = kNoCompression;
73 
74   // If enabled, BlobDB cleans up stale blobs in non-TTL files during compaction
75   // by rewriting the remaining live blobs to new files.
76   bool enable_garbage_collection = false;
77 
78   // The cutoff in terms of blob file age for garbage collection. Blobs in
79   // the oldest N non-TTL blob files will be rewritten when encountered during
80   // compaction, where N = garbage_collection_cutoff * number_of_non_TTL_files.
81   double garbage_collection_cutoff = 0.25;
82 
83   // Disable all background job. Used for test only.
84   bool disable_background_tasks = false;
85 
86   void Dump(Logger* log) const;
87 };
88 
89 class BlobDB : public StackableDB {
90  public:
91   using ROCKSDB_NAMESPACE::StackableDB::Put;
92   virtual Status Put(const WriteOptions& options, const Slice& key,
93                      const Slice& value) override = 0;
Put(const WriteOptions & options,ColumnFamilyHandle * column_family,const Slice & key,const Slice & value)94   virtual Status Put(const WriteOptions& options,
95                      ColumnFamilyHandle* column_family, const Slice& key,
96                      const Slice& value) override {
97     if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
98       return Status::NotSupported(
99           "Blob DB doesn't support non-default column family.");
100     }
101     return Put(options, key, value);
102   }
103 
104   using ROCKSDB_NAMESPACE::StackableDB::Delete;
Delete(const WriteOptions & options,ColumnFamilyHandle * column_family,const Slice & key)105   virtual Status Delete(const WriteOptions& options,
106                         ColumnFamilyHandle* column_family,
107                         const Slice& key) override {
108     if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
109       return Status::NotSupported(
110           "Blob DB doesn't support non-default column family.");
111     }
112     assert(db_ != nullptr);
113     return db_->Delete(options, column_family, key);
114   }
115 
116   virtual Status PutWithTTL(const WriteOptions& options, const Slice& key,
117                             const Slice& value, uint64_t ttl) = 0;
PutWithTTL(const WriteOptions & options,ColumnFamilyHandle * column_family,const Slice & key,const Slice & value,uint64_t ttl)118   virtual Status PutWithTTL(const WriteOptions& options,
119                             ColumnFamilyHandle* column_family, const Slice& key,
120                             const Slice& value, uint64_t ttl) {
121     if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
122       return Status::NotSupported(
123           "Blob DB doesn't support non-default column family.");
124     }
125     return PutWithTTL(options, key, value, ttl);
126   }
127 
128   // Put with expiration. Key with expiration time equal to
129   // std::numeric_limits<uint64_t>::max() means the key don't expire.
130   virtual Status PutUntil(const WriteOptions& options, const Slice& key,
131                           const Slice& value, uint64_t expiration) = 0;
PutUntil(const WriteOptions & options,ColumnFamilyHandle * column_family,const Slice & key,const Slice & value,uint64_t expiration)132   virtual Status PutUntil(const WriteOptions& options,
133                           ColumnFamilyHandle* column_family, const Slice& key,
134                           const Slice& value, uint64_t expiration) {
135     if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
136       return Status::NotSupported(
137           "Blob DB doesn't support non-default column family.");
138     }
139     return PutUntil(options, key, value, expiration);
140   }
141 
142   using ROCKSDB_NAMESPACE::StackableDB::Get;
143   virtual Status Get(const ReadOptions& options,
144                      ColumnFamilyHandle* column_family, const Slice& key,
145                      PinnableSlice* value) override = 0;
146 
147   // Get value and expiration.
148   virtual Status Get(const ReadOptions& options,
149                      ColumnFamilyHandle* column_family, const Slice& key,
150                      PinnableSlice* value, uint64_t* expiration) = 0;
Get(const ReadOptions & options,const Slice & key,PinnableSlice * value,uint64_t * expiration)151   virtual Status Get(const ReadOptions& options, const Slice& key,
152                      PinnableSlice* value, uint64_t* expiration) {
153     return Get(options, DefaultColumnFamily(), key, value, expiration);
154   }
155 
156   using ROCKSDB_NAMESPACE::StackableDB::MultiGet;
157   virtual std::vector<Status> MultiGet(
158       const ReadOptions& options,
159       const std::vector<Slice>& keys,
160       std::vector<std::string>* values) override = 0;
MultiGet(const ReadOptions & options,const std::vector<ColumnFamilyHandle * > & column_families,const std::vector<Slice> & keys,std::vector<std::string> * values)161   virtual std::vector<Status> MultiGet(
162       const ReadOptions& options,
163       const std::vector<ColumnFamilyHandle*>& column_families,
164       const std::vector<Slice>& keys,
165       std::vector<std::string>* values) override {
166     for (auto column_family : column_families) {
167       if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
168         return std::vector<Status>(
169             column_families.size(),
170             Status::NotSupported(
171                 "Blob DB doesn't support non-default column family."));
172       }
173     }
174     return MultiGet(options, keys, values);
175   }
176   virtual void MultiGet(const ReadOptions& /*options*/,
177                         ColumnFamilyHandle* /*column_family*/,
178                         const size_t num_keys, const Slice* /*keys*/,
179                         PinnableSlice* /*values*/, Status* statuses,
180                         const bool /*sorted_input*/ = false) override {
181     for (size_t i = 0; i < num_keys; ++i) {
182       statuses[i] = Status::NotSupported(
183           "Blob DB doesn't support batched MultiGet");
184     }
185   }
186 
187   using ROCKSDB_NAMESPACE::StackableDB::SingleDelete;
SingleDelete(const WriteOptions &,ColumnFamilyHandle *,const Slice &)188   virtual Status SingleDelete(const WriteOptions& /*wopts*/,
189                               ColumnFamilyHandle* /*column_family*/,
190                               const Slice& /*key*/) override {
191     return Status::NotSupported("Not supported operation in blob db.");
192   }
193 
194   using ROCKSDB_NAMESPACE::StackableDB::Merge;
Merge(const WriteOptions &,ColumnFamilyHandle *,const Slice &,const Slice &)195   virtual Status Merge(const WriteOptions& /*options*/,
196                        ColumnFamilyHandle* /*column_family*/,
197                        const Slice& /*key*/, const Slice& /*value*/) override {
198     return Status::NotSupported("Not supported operation in blob db.");
199   }
200 
201   virtual Status Write(const WriteOptions& opts,
202                        WriteBatch* updates) override = 0;
203   using ROCKSDB_NAMESPACE::StackableDB::NewIterator;
204   virtual Iterator* NewIterator(const ReadOptions& options) override = 0;
NewIterator(const ReadOptions & options,ColumnFamilyHandle * column_family)205   virtual Iterator* NewIterator(const ReadOptions& options,
206                                 ColumnFamilyHandle* column_family) override {
207     if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
208       // Blob DB doesn't support non-default column family.
209       return nullptr;
210     }
211     return NewIterator(options);
212   }
213 
214   Status CompactFiles(
215       const CompactionOptions& compact_options,
216       const std::vector<std::string>& input_file_names, const int output_level,
217       const int output_path_id = -1,
218       std::vector<std::string>* const output_file_names = nullptr,
219       CompactionJobInfo* compaction_job_info = nullptr) override = 0;
220   Status CompactFiles(
221       const CompactionOptions& compact_options,
222       ColumnFamilyHandle* column_family,
223       const std::vector<std::string>& input_file_names, const int output_level,
224       const int output_path_id = -1,
225       std::vector<std::string>* const output_file_names = nullptr,
226       CompactionJobInfo* compaction_job_info = nullptr) override {
227     if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
228       return Status::NotSupported(
229           "Blob DB doesn't support non-default column family.");
230     }
231 
232     return CompactFiles(compact_options, input_file_names, output_level,
233                         output_path_id, output_file_names, compaction_job_info);
234   }
235 
236   using ROCKSDB_NAMESPACE::StackableDB::Close;
237   virtual Status Close() override = 0;
238 
239   // Opening blob db.
240   static Status Open(const Options& options, const BlobDBOptions& bdb_options,
241                      const std::string& dbname, BlobDB** blob_db);
242 
243   static Status Open(const DBOptions& db_options,
244                      const BlobDBOptions& bdb_options,
245                      const std::string& dbname,
246                      const std::vector<ColumnFamilyDescriptor>& column_families,
247                      std::vector<ColumnFamilyHandle*>* handles,
248                      BlobDB** blob_db);
249 
250   virtual BlobDBOptions GetBlobDBOptions() const = 0;
251 
252   virtual Status SyncBlobFiles() = 0;
253 
~BlobDB()254   virtual ~BlobDB() {}
255 
256  protected:
257   explicit BlobDB();
258 };
259 
260 // Destroy the content of the database.
261 Status DestroyBlobDB(const std::string& dbname, const Options& options,
262                      const BlobDBOptions& bdb_options);
263 
264 }  // namespace blob_db
265 }  // namespace ROCKSDB_NAMESPACE
266 #endif  // ROCKSDB_LITE
267