1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 
9 #pragma once
10 
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <map>
14 #include <memory>
15 #include <string>
16 #include <unordered_map>
17 #include <vector>
18 #include "rocksdb/iterator.h"
19 #include "rocksdb/listener.h"
20 #include "rocksdb/metadata.h"
21 #include "rocksdb/options.h"
22 #include "rocksdb/snapshot.h"
23 #include "rocksdb/sst_file_writer.h"
24 #include "rocksdb/thread_status.h"
25 #include "rocksdb/transaction_log.h"
26 #include "rocksdb/types.h"
27 #include "rocksdb/version.h"
28 
29 #ifdef _WIN32
30 // Windows API macro interference
31 #undef DeleteFile
32 #endif
33 
34 #if defined(__GNUC__) || defined(__clang__)
35 #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
36 #elif _WIN32
37 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
38 #endif
39 
40 namespace ROCKSDB_NAMESPACE {
41 
42 struct ColumnFamilyOptions;
43 struct CompactionOptions;
44 struct CompactRangeOptions;
45 struct DBOptions;
46 struct ExternalSstFileInfo;
47 struct FlushOptions;
48 struct Options;
49 struct ReadOptions;
50 struct TableProperties;
51 struct WriteOptions;
52 #ifdef ROCKSDB_LITE
53 class CompactionJobInfo;
54 #endif
55 class Env;
56 class EventListener;
57 class FileSystem;
58 #ifndef ROCKSDB_LITE
59 class Replayer;
60 #endif
61 class StatsHistoryIterator;
62 #ifndef ROCKSDB_LITE
63 class TraceReader;
64 class TraceWriter;
65 #endif
66 class WriteBatch;
67 
68 extern const std::string kDefaultColumnFamilyName;
69 extern const std::string kPersistentStatsColumnFamilyName;
70 struct ColumnFamilyDescriptor {
71   std::string name;
72   ColumnFamilyOptions options;
ColumnFamilyDescriptorColumnFamilyDescriptor73   ColumnFamilyDescriptor()
74       : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
ColumnFamilyDescriptorColumnFamilyDescriptor75   ColumnFamilyDescriptor(const std::string& _name,
76                          const ColumnFamilyOptions& _options)
77       : name(_name), options(_options) {}
78 };
79 
80 class ColumnFamilyHandle {
81  public:
~ColumnFamilyHandle()82   virtual ~ColumnFamilyHandle() {}
83   // Returns the name of the column family associated with the current handle.
84   virtual const std::string& GetName() const = 0;
85   // Returns the ID of the column family associated with the current handle.
86   virtual uint32_t GetID() const = 0;
87   // Fills "*desc" with the up-to-date descriptor of the column family
88   // associated with this handle. Since it fills "*desc" with the up-to-date
89   // information, this call might internally lock and release DB mutex to
90   // access the up-to-date CF options.  In addition, all the pointer-typed
91   // options cannot be referenced any longer than the original options exist.
92   //
93   // Note that this function is not supported in RocksDBLite.
94   virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
95   // Returns the comparator of the column family associated with the
96   // current handle.
97   virtual const Comparator* GetComparator() const = 0;
98 };
99 
100 static const int kMajorVersion = __ROCKSDB_MAJOR__;
101 static const int kMinorVersion = __ROCKSDB_MINOR__;
102 
103 // A range of keys
104 struct Range {
105   Slice start;
106   Slice limit;
107 
RangeRange108   Range() {}
RangeRange109   Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
110 };
111 
112 struct RangePtr {
113   const Slice* start;
114   const Slice* limit;
115 
RangePtrRangePtr116   RangePtr() : start(nullptr), limit(nullptr) {}
RangePtrRangePtr117   RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {}
118 };
119 
120 // It is valid that files_checksums and files_checksum_func_names are both
121 // empty (no checksum information is provided for ingestion). Otherwise,
122 // their sizes should be the same as external_files. The file order should
123 // be the same in three vectors and guaranteed by the caller.
124 // Note that, we assume the temperatures of this batch of files to be
125 // ingested are the same.
126 struct IngestExternalFileArg {
127   ColumnFamilyHandle* column_family = nullptr;
128   std::vector<std::string> external_files;
129   IngestExternalFileOptions options;
130   std::vector<std::string> files_checksums;
131   std::vector<std::string> files_checksum_func_names;
132   Temperature file_temperature = Temperature::kUnknown;
133 };
134 
135 struct GetMergeOperandsOptions {
136   int expected_max_number_of_operands = 0;
137 };
138 
139 // A collections of table properties objects, where
140 //  key: is the table's file name.
141 //  value: the table properties object of the given table.
142 using TablePropertiesCollection =
143     std::unordered_map<std::string, std::shared_ptr<const TableProperties>>;
144 
145 // A DB is a persistent, versioned ordered map from keys to values.
146 // A DB is safe for concurrent access from multiple threads without
147 // any external synchronization.
148 // DB is an abstract base class with one primary implementation (DBImpl)
149 // and a number of wrapper implementations.
150 class DB {
151  public:
152   // Open the database with the specified "name" for reads and writes.
153   // Stores a pointer to a heap-allocated database in *dbptr and returns
154   // OK on success.
155   // Stores nullptr in *dbptr and returns a non-OK status on error, including
156   // if the DB is already open (read-write) by another DB object. (This
157   // guarantee depends on options.env->LockFile(), which might not provide
158   // this guarantee in a custom Env implementation.)
159   //
160   // Caller must delete *dbptr when it is no longer needed.
161   static Status Open(const Options& options, const std::string& name,
162                      DB** dbptr);
163 
164   // Open the database for read only. All DB interfaces
165   // that modify data, like put/delete, will return error.
166   // If the db is opened in read only mode, then no compactions
167   // will happen.
168   //
169   // While a given DB can be simultaneously open via OpenForReadOnly
170   // by any number of readers, if a DB is simultaneously open by Open
171   // and OpenForReadOnly, the read-only instance has undefined behavior
172   // (though can often succeed if quickly closed) and the read-write
173   // instance is unaffected. See also OpenAsSecondary.
174   //
175   // Not supported in ROCKSDB_LITE, in which case the function will
176   // return Status::NotSupported.
177   static Status OpenForReadOnly(const Options& options, const std::string& name,
178                                 DB** dbptr,
179                                 bool error_if_wal_file_exists = false);
180 
181   // Open the database for read only with column families. When opening DB with
182   // read only, you can specify only a subset of column families in the
183   // database that should be opened. However, you always need to specify default
184   // column family. The default column family name is 'default' and it's stored
185   // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName
186   //
187   // While a given DB can be simultaneously open via OpenForReadOnly
188   // by any number of readers, if a DB is simultaneously open by Open
189   // and OpenForReadOnly, the read-only instance has undefined behavior
190   // (though can often succeed if quickly closed) and the read-write
191   // instance is unaffected. See also OpenAsSecondary.
192   //
193   // Not supported in ROCKSDB_LITE, in which case the function will
194   // return Status::NotSupported.
195   static Status OpenForReadOnly(
196       const DBOptions& db_options, const std::string& name,
197       const std::vector<ColumnFamilyDescriptor>& column_families,
198       std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
199       bool error_if_wal_file_exists = false);
200 
201   // The following OpenAsSecondary functions create a secondary instance that
202   // can dynamically tail the MANIFEST of a primary that must have already been
203   // created. User can call TryCatchUpWithPrimary to make the secondary
204   // instance catch up with primary (WAL tailing is NOT supported now) whenever
205   // the user feels necessary. Column families created by the primary after the
206   // secondary instance starts are currently ignored by the secondary instance.
207   // Column families opened by secondary and dropped by the primary will be
208   // dropped by secondary as well. However the user of the secondary instance
209   // can still access the data of such dropped column family as long as they
210   // do not destroy the corresponding column family handle.
211   // WAL tailing is not supported at present, but will arrive soon.
212   //
213   // The options argument specifies the options to open the secondary instance.
214   // The name argument specifies the name of the primary db that you have used
215   // to open the primary instance.
216   // The secondary_path argument points to a directory where the secondary
217   // instance stores its info log.
218   // The dbptr is an out-arg corresponding to the opened secondary instance.
219   // The pointer points to a heap-allocated database, and the user should
220   // delete it after use.
221   // Open DB as secondary instance with only the default column family.
222   // Return OK on success, non-OK on failures.
223   static Status OpenAsSecondary(const Options& options, const std::string& name,
224                                 const std::string& secondary_path, DB** dbptr);
225 
226   // Open DB as secondary instance with column families. You can open a subset
227   // of column families in secondary mode.
228   // The db_options specify the database specific options.
229   // The name argument specifies the name of the primary db that you have used
230   // to open the primary instance.
231   // The secondary_path argument points to a directory where the secondary
232   // instance stores its info log.
233   // The column_families argument specifies a list of column families to open.
234   // If any of the column families does not exist, the function returns non-OK
235   // status.
236   // The handles is an out-arg corresponding to the opened database column
237   // family handles.
238   // The dbptr is an out-arg corresponding to the opened secondary instance.
239   // The pointer points to a heap-allocated database, and the caller should
240   // delete it after use. Before deleting the dbptr, the user should also
241   // delete the pointers stored in handles vector.
242   // Return OK on success, on-OK on failures.
243   static Status OpenAsSecondary(
244       const DBOptions& db_options, const std::string& name,
245       const std::string& secondary_path,
246       const std::vector<ColumnFamilyDescriptor>& column_families,
247       std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
248 
249   // Open DB with column families.
250   // db_options specify database specific options
251   // column_families is the vector of all column families in the database,
252   // containing column family name and options. You need to open ALL column
253   // families in the database. To get the list of column families, you can use
254   // ListColumnFamilies(). Also, you can open only a subset of column families
255   // for read-only access.
256   // The default column family name is 'default' and it's stored
257   // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName.
258   // If everything is OK, handles will on return be the same size
259   // as column_families --- handles[i] will be a handle that you
260   // will use to operate on column family column_family[i].
261   // Before delete DB, you have to close All column families by calling
262   // DestroyColumnFamilyHandle() with all the handles.
263   static Status Open(const DBOptions& db_options, const std::string& name,
264                      const std::vector<ColumnFamilyDescriptor>& column_families,
265                      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
266 
267   // Open DB and run the compaction.
268   // It's a read-only operation, the result won't be installed to the DB, it
269   // will be output to the `output_directory`. The API should only be used with
270   // `options.CompactionService` to run compaction triggered by
271   // `CompactionService`.
272   static Status OpenAndCompact(
273       const std::string& name, const std::string& output_directory,
274       const std::string& input, std::string* output,
275       const CompactionServiceOptionsOverride& override_options);
276 
Resume()277   virtual Status Resume() { return Status::NotSupported(); }
278 
279   // Close the DB by releasing resources, closing files etc. This should be
280   // called before calling the destructor so that the caller can get back a
281   // status in case there are any errors. This will not fsync the WAL files.
282   // If syncing is required, the caller must first call SyncWAL(), or Write()
283   // using an empty write batch with WriteOptions.sync=true.
284   // Regardless of the return status, the DB must be freed.
285   // If the return status is Aborted(), closing fails because there is
286   // unreleased snapshot in the system. In this case, users can release
287   // the unreleased snapshots and try again and expect it to succeed. For
288   // other status, re-calling Close() will be no-op and return the original
289   // close status. If the return status is NotSupported(), then the DB
290   // implementation does cleanup in the destructor
Close()291   virtual Status Close() { return Status::NotSupported(); }
292 
293   // ListColumnFamilies will open the DB specified by argument name
294   // and return the list of all column families in that DB
295   // through column_families argument. The ordering of
296   // column families in column_families is unspecified.
297   static Status ListColumnFamilies(const DBOptions& db_options,
298                                    const std::string& name,
299                                    std::vector<std::string>* column_families);
300 
301   // Abstract class ctor
DB()302   DB() {}
303   // No copying allowed
304   DB(const DB&) = delete;
305   void operator=(const DB&) = delete;
306 
307   virtual ~DB();
308 
309   // Create a column_family and return the handle of column family
310   // through the argument handle.
311   virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
312                                     const std::string& column_family_name,
313                                     ColumnFamilyHandle** handle);
314 
315   // Bulk create column families with the same column family options.
316   // Return the handles of the column families through the argument handles.
317   // In case of error, the request may succeed partially, and handles will
318   // contain column family handles that it managed to create, and have size
319   // equal to the number of created column families.
320   virtual Status CreateColumnFamilies(
321       const ColumnFamilyOptions& options,
322       const std::vector<std::string>& column_family_names,
323       std::vector<ColumnFamilyHandle*>* handles);
324 
325   // Bulk create column families.
326   // Return the handles of the column families through the argument handles.
327   // In case of error, the request may succeed partially, and handles will
328   // contain column family handles that it managed to create, and have size
329   // equal to the number of created column families.
330   virtual Status CreateColumnFamilies(
331       const std::vector<ColumnFamilyDescriptor>& column_families,
332       std::vector<ColumnFamilyHandle*>* handles);
333 
334   // Drop a column family specified by column_family handle. This call
335   // only records a drop record in the manifest and prevents the column
336   // family from flushing and compacting.
337   virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
338 
339   // Bulk drop column families. This call only records drop records in the
340   // manifest and prevents the column families from flushing and compacting.
341   // In case of error, the request may succeed partially. User may call
342   // ListColumnFamilies to check the result.
343   virtual Status DropColumnFamilies(
344       const std::vector<ColumnFamilyHandle*>& column_families);
345 
346   // Close a column family specified by column_family handle and destroy
347   // the column family handle specified to avoid double deletion. This call
348   // deletes the column family handle by default. Use this method to
349   // close column family instead of deleting column family handle directly
350   virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
351 
352   // Set the database entry for "key" to "value".
353   // If "key" already exists, it will be overwritten.
354   // Returns OK on success, and a non-OK status on error.
355   // Note: consider setting options.sync = true.
356   virtual Status Put(const WriteOptions& options,
357                      ColumnFamilyHandle* column_family, const Slice& key,
358                      const Slice& value) = 0;
Put(const WriteOptions & options,const Slice & key,const Slice & value)359   virtual Status Put(const WriteOptions& options, const Slice& key,
360                      const Slice& value) {
361     return Put(options, DefaultColumnFamily(), key, value);
362   }
363 
364   // Remove the database entry (if any) for "key".  Returns OK on
365   // success, and a non-OK status on error.  It is not an error if "key"
366   // did not exist in the database.
367   // Note: consider setting options.sync = true.
368   virtual Status Delete(const WriteOptions& options,
369                         ColumnFamilyHandle* column_family,
370                         const Slice& key) = 0;
Delete(const WriteOptions & options,const Slice & key)371   virtual Status Delete(const WriteOptions& options, const Slice& key) {
372     return Delete(options, DefaultColumnFamily(), key);
373   }
374 
375   // Remove the database entry for "key". Requires that the key exists
376   // and was not overwritten. Returns OK on success, and a non-OK status
377   // on error.  It is not an error if "key" did not exist in the database.
378   //
379   // If a key is overwritten (by calling Put() multiple times), then the result
380   // of calling SingleDelete() on this key is undefined.  SingleDelete() only
381   // behaves correctly if there has been only one Put() for this key since the
382   // previous call to SingleDelete() for this key.
383   //
384   // This feature is currently an experimental performance optimization
385   // for a very specific workload.  It is up to the caller to ensure that
386   // SingleDelete is only used for a key that is not deleted using Delete() or
387   // written using Merge().  Mixing SingleDelete operations with Deletes and
388   // Merges can result in undefined behavior.
389   //
390   // Note: consider setting options.sync = true.
391   virtual Status SingleDelete(const WriteOptions& options,
392                               ColumnFamilyHandle* column_family,
393                               const Slice& key) = 0;
SingleDelete(const WriteOptions & options,const Slice & key)394   virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
395     return SingleDelete(options, DefaultColumnFamily(), key);
396   }
397 
398   // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
399   // including "begin_key" and excluding "end_key". Returns OK on success, and
400   // a non-OK status on error. It is not an error if the database does not
401   // contain any existing data in the range ["begin_key", "end_key").
402   //
403   // If "end_key" comes before "start_key" according to the user's comparator,
404   // a `Status::InvalidArgument` is returned.
405   //
406   // This feature is now usable in production, with the following caveats:
407   // 1) Accumulating many range tombstones in the memtable will degrade read
408   // performance; this can be avoided by manually flushing occasionally.
409   // 2) Limiting the maximum number of open files in the presence of range
410   // tombstones can degrade read performance. To avoid this problem, set
411   // max_open_files to -1 whenever possible.
412   virtual Status DeleteRange(const WriteOptions& options,
413                              ColumnFamilyHandle* column_family,
414                              const Slice& begin_key, const Slice& end_key);
415 
416   // Merge the database entry for "key" with "value".  Returns OK on success,
417   // and a non-OK status on error. The semantics of this operation is
418   // determined by the user provided merge_operator when opening DB.
419   // Note: consider setting options.sync = true.
420   virtual Status Merge(const WriteOptions& options,
421                        ColumnFamilyHandle* column_family, const Slice& key,
422                        const Slice& value) = 0;
Merge(const WriteOptions & options,const Slice & key,const Slice & value)423   virtual Status Merge(const WriteOptions& options, const Slice& key,
424                        const Slice& value) {
425     return Merge(options, DefaultColumnFamily(), key, value);
426   }
427 
428   // Apply the specified updates to the database.
429   // If `updates` contains no update, WAL will still be synced if
430   // options.sync=true.
431   // Returns OK on success, non-OK on failure.
432   // Note: consider setting options.sync = true.
433   virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
434 
435   // If the database contains an entry for "key" store the
436   // corresponding value in *value and return OK.
437   //
438   // If timestamp is enabled and a non-null timestamp pointer is passed in,
439   // timestamp is returned.
440   //
441   // If there is no entry for "key" leave *value unchanged and return
442   // a status for which Status::IsNotFound() returns true.
443   //
444   // May return some other Status on an error.
Get(const ReadOptions & options,ColumnFamilyHandle * column_family,const Slice & key,std::string * value)445   virtual inline Status Get(const ReadOptions& options,
446                             ColumnFamilyHandle* column_family, const Slice& key,
447                             std::string* value) {
448     assert(value != nullptr);
449     PinnableSlice pinnable_val(value);
450     assert(!pinnable_val.IsPinned());
451     auto s = Get(options, column_family, key, &pinnable_val);
452     if (s.ok() && pinnable_val.IsPinned()) {
453       value->assign(pinnable_val.data(), pinnable_val.size());
454     }  // else value is already assigned
455     return s;
456   }
457   virtual Status Get(const ReadOptions& options,
458                      ColumnFamilyHandle* column_family, const Slice& key,
459                      PinnableSlice* value) = 0;
Get(const ReadOptions & options,const Slice & key,std::string * value)460   virtual Status Get(const ReadOptions& options, const Slice& key,
461                      std::string* value) {
462     return Get(options, DefaultColumnFamily(), key, value);
463   }
464 
465   // Get() methods that return timestamp. Derived DB classes don't need to worry
466   // about this group of methods if they don't care about timestamp feature.
Get(const ReadOptions & options,ColumnFamilyHandle * column_family,const Slice & key,std::string * value,std::string * timestamp)467   virtual inline Status Get(const ReadOptions& options,
468                             ColumnFamilyHandle* column_family, const Slice& key,
469                             std::string* value, std::string* timestamp) {
470     assert(value != nullptr);
471     PinnableSlice pinnable_val(value);
472     assert(!pinnable_val.IsPinned());
473     auto s = Get(options, column_family, key, &pinnable_val, timestamp);
474     if (s.ok() && pinnable_val.IsPinned()) {
475       value->assign(pinnable_val.data(), pinnable_val.size());
476     }  // else value is already assigned
477     return s;
478   }
Get(const ReadOptions &,ColumnFamilyHandle *,const Slice &,PinnableSlice *,std::string *)479   virtual Status Get(const ReadOptions& /*options*/,
480                      ColumnFamilyHandle* /*column_family*/,
481                      const Slice& /*key*/, PinnableSlice* /*value*/,
482                      std::string* /*timestamp*/) {
483     return Status::NotSupported(
484         "Get() that returns timestamp is not implemented.");
485   }
Get(const ReadOptions & options,const Slice & key,std::string * value,std::string * timestamp)486   virtual Status Get(const ReadOptions& options, const Slice& key,
487                      std::string* value, std::string* timestamp) {
488     return Get(options, DefaultColumnFamily(), key, value, timestamp);
489   }
490 
491   // Returns all the merge operands corresponding to the key. If the
492   // number of merge operands in DB is greater than
493   // merge_operands_options.expected_max_number_of_operands
494   // no merge operands are returned and status is Incomplete. Merge operands
495   // returned are in the order of insertion.
496   // merge_operands- Points to an array of at-least
497   //             merge_operands_options.expected_max_number_of_operands and the
498   //             caller is responsible for allocating it. If the status
499   //             returned is Incomplete then number_of_operands will contain
500   //             the total number of merge operands found in DB for key.
501   virtual Status GetMergeOperands(
502       const ReadOptions& options, ColumnFamilyHandle* column_family,
503       const Slice& key, PinnableSlice* merge_operands,
504       GetMergeOperandsOptions* get_merge_operands_options,
505       int* number_of_operands) = 0;
506 
507   // Consistent Get of many keys across column families without the need
508   // for an explicit snapshot. NOTE: the implementation of this MultiGet API
509   // does not have the performance benefits of the void-returning MultiGet
510   // functions.
511   //
512   // If keys[i] does not exist in the database, then the i'th returned
513   // status will be one for which Status::IsNotFound() is true, and
514   // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
515   // the i'th returned status will have Status::ok() true, and (*values)[i]
516   // will store the value associated with keys[i].
517   //
518   // (*values) will always be resized to be the same size as (keys).
519   // Similarly, the number of returned statuses will be the number of keys.
520   // Note: keys will not be "de-duplicated". Duplicate keys will return
521   // duplicate values in order.
522   virtual std::vector<Status> MultiGet(
523       const ReadOptions& options,
524       const std::vector<ColumnFamilyHandle*>& column_family,
525       const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
MultiGet(const ReadOptions & options,const std::vector<Slice> & keys,std::vector<std::string> * values)526   virtual std::vector<Status> MultiGet(const ReadOptions& options,
527                                        const std::vector<Slice>& keys,
528                                        std::vector<std::string>* values) {
529     return MultiGet(
530         options,
531         std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
532         keys, values);
533   }
534 
MultiGet(const ReadOptions &,const std::vector<ColumnFamilyHandle * > &,const std::vector<Slice> & keys,std::vector<std::string> *,std::vector<std::string> *)535   virtual std::vector<Status> MultiGet(
536       const ReadOptions& /*options*/,
537       const std::vector<ColumnFamilyHandle*>& /*column_family*/,
538       const std::vector<Slice>& keys, std::vector<std::string>* /*values*/,
539       std::vector<std::string>* /*timestamps*/) {
540     return std::vector<Status>(
541         keys.size(), Status::NotSupported(
542                          "MultiGet() returning timestamps not implemented."));
543   }
MultiGet(const ReadOptions & options,const std::vector<Slice> & keys,std::vector<std::string> * values,std::vector<std::string> * timestamps)544   virtual std::vector<Status> MultiGet(const ReadOptions& options,
545                                        const std::vector<Slice>& keys,
546                                        std::vector<std::string>* values,
547                                        std::vector<std::string>* timestamps) {
548     return MultiGet(
549         options,
550         std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
551         keys, values, timestamps);
552   }
553 
554   // Overloaded MultiGet API that improves performance by batching operations
555   // in the read path for greater efficiency. Currently, only the block based
556   // table format with full filters are supported. Other table formats such
557   // as plain table, block based table with block based filters and
558   // partitioned indexes will still work, but will not get any performance
559   // benefits.
560   // Parameters -
561   // options - ReadOptions
562   // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
563   //                 passed to the API are restricted to a single column family
564   // num_keys - Number of keys to lookup
565   // keys - Pointer to C style array of key Slices with num_keys elements
566   // values - Pointer to C style array of PinnableSlices with num_keys elements
567   // statuses - Pointer to C style array of Status with num_keys elements
568   // sorted_input - If true, it means the input keys are already sorted by key
569   //                order, so the MultiGet() API doesn't have to sort them
570   //                again. If false, the keys will be copied and sorted
571   //                internally by the API - the input array will not be
572   //                modified
573   virtual void MultiGet(const ReadOptions& options,
574                         ColumnFamilyHandle* column_family,
575                         const size_t num_keys, const Slice* keys,
576                         PinnableSlice* values, Status* statuses,
577                         const bool /*sorted_input*/ = false) {
578     std::vector<ColumnFamilyHandle*> cf;
579     std::vector<Slice> user_keys;
580     std::vector<Status> status;
581     std::vector<std::string> vals;
582 
583     for (size_t i = 0; i < num_keys; ++i) {
584       cf.emplace_back(column_family);
585       user_keys.emplace_back(keys[i]);
586     }
587     status = MultiGet(options, cf, user_keys, &vals);
588     std::copy(status.begin(), status.end(), statuses);
589     for (auto& value : vals) {
590       values->PinSelf(value);
591       values++;
592     }
593   }
594 
595   virtual void MultiGet(const ReadOptions& options,
596                         ColumnFamilyHandle* column_family,
597                         const size_t num_keys, const Slice* keys,
598                         PinnableSlice* values, std::string* timestamps,
599                         Status* statuses, const bool /*sorted_input*/ = false) {
600     std::vector<ColumnFamilyHandle*> cf;
601     std::vector<Slice> user_keys;
602     std::vector<Status> status;
603     std::vector<std::string> vals;
604     std::vector<std::string> tss;
605 
606     for (size_t i = 0; i < num_keys; ++i) {
607       cf.emplace_back(column_family);
608       user_keys.emplace_back(keys[i]);
609     }
610     status = MultiGet(options, cf, user_keys, &vals, &tss);
611     std::copy(status.begin(), status.end(), statuses);
612     std::copy(tss.begin(), tss.end(), timestamps);
613     for (auto& value : vals) {
614       values->PinSelf(value);
615       values++;
616     }
617   }
618 
619   // Overloaded MultiGet API that improves performance by batching operations
620   // in the read path for greater efficiency. Currently, only the block based
621   // table format with full filters are supported. Other table formats such
622   // as plain table, block based table with block based filters and
623   // partitioned indexes will still work, but will not get any performance
624   // benefits.
625   // Parameters -
626   // options - ReadOptions
627   // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
628   //                 passed to the API are restricted to a single column family
629   // num_keys - Number of keys to lookup
630   // keys - Pointer to C style array of key Slices with num_keys elements
631   // values - Pointer to C style array of PinnableSlices with num_keys elements
632   // statuses - Pointer to C style array of Status with num_keys elements
633   // sorted_input - If true, it means the input keys are already sorted by key
634   //                order, so the MultiGet() API doesn't have to sort them
635   //                again. If false, the keys will be copied and sorted
636   //                internally by the API - the input array will not be
637   //                modified
638   virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
639                         ColumnFamilyHandle** column_families, const Slice* keys,
640                         PinnableSlice* values, Status* statuses,
641                         const bool /*sorted_input*/ = false) {
642     std::vector<ColumnFamilyHandle*> cf;
643     std::vector<Slice> user_keys;
644     std::vector<Status> status;
645     std::vector<std::string> vals;
646 
647     for (size_t i = 0; i < num_keys; ++i) {
648       cf.emplace_back(column_families[i]);
649       user_keys.emplace_back(keys[i]);
650     }
651     status = MultiGet(options, cf, user_keys, &vals);
652     std::copy(status.begin(), status.end(), statuses);
653     for (auto& value : vals) {
654       values->PinSelf(value);
655       values++;
656     }
657   }
658   virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
659                         ColumnFamilyHandle** column_families, const Slice* keys,
660                         PinnableSlice* values, std::string* timestamps,
661                         Status* statuses, const bool /*sorted_input*/ = false) {
662     std::vector<ColumnFamilyHandle*> cf;
663     std::vector<Slice> user_keys;
664     std::vector<Status> status;
665     std::vector<std::string> vals;
666     std::vector<std::string> tss;
667 
668     for (size_t i = 0; i < num_keys; ++i) {
669       cf.emplace_back(column_families[i]);
670       user_keys.emplace_back(keys[i]);
671     }
672     status = MultiGet(options, cf, user_keys, &vals, &tss);
673     std::copy(status.begin(), status.end(), statuses);
674     std::copy(tss.begin(), tss.end(), timestamps);
675     for (auto& value : vals) {
676       values->PinSelf(value);
677       values++;
678     }
679   }
680 
681   // If the key definitely does not exist in the database, then this method
682   // returns false, else true. If the caller wants to obtain value when the key
683   // is found in memory, a bool for 'value_found' must be passed. 'value_found'
684   // will be true on return if value has been set properly.
685   // This check is potentially lighter-weight than invoking DB::Get(). One way
686   // to make this lighter weight is to avoid doing any IOs.
687   // Default implementation here returns true and sets 'value_found' to false
688   virtual bool KeyMayExist(const ReadOptions& /*options*/,
689                            ColumnFamilyHandle* /*column_family*/,
690                            const Slice& /*key*/, std::string* /*value*/,
691                            std::string* /*timestamp*/,
692                            bool* value_found = nullptr) {
693     if (value_found != nullptr) {
694       *value_found = false;
695     }
696     return true;
697   }
698 
699   virtual bool KeyMayExist(const ReadOptions& options,
700                            ColumnFamilyHandle* column_family, const Slice& key,
701                            std::string* value, bool* value_found = nullptr) {
702     return KeyMayExist(options, column_family, key, value,
703                        /*timestamp=*/nullptr, value_found);
704   }
705 
706   virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
707                            std::string* value, bool* value_found = nullptr) {
708     return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
709   }
710 
711   virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
712                            std::string* value, std::string* timestamp,
713                            bool* value_found = nullptr) {
714     return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp,
715                        value_found);
716   }
717 
718   // Return a heap-allocated iterator over the contents of the database.
719   // The result of NewIterator() is initially invalid (caller must
720   // call one of the Seek methods on the iterator before using it).
721   //
722   // Caller should delete the iterator when it is no longer needed.
723   // The returned iterator should be deleted before this db is deleted.
724   virtual Iterator* NewIterator(const ReadOptions& options,
725                                 ColumnFamilyHandle* column_family) = 0;
NewIterator(const ReadOptions & options)726   virtual Iterator* NewIterator(const ReadOptions& options) {
727     return NewIterator(options, DefaultColumnFamily());
728   }
729   // Returns iterators from a consistent database state across multiple
730   // column families. Iterators are heap allocated and need to be deleted
731   // before the db is deleted
732   virtual Status NewIterators(
733       const ReadOptions& options,
734       const std::vector<ColumnFamilyHandle*>& column_families,
735       std::vector<Iterator*>* iterators) = 0;
736 
737   // Return a handle to the current DB state.  Iterators created with
738   // this handle will all observe a stable snapshot of the current DB
739   // state.  The caller must call ReleaseSnapshot(result) when the
740   // snapshot is no longer needed.
741   //
742   // nullptr will be returned if the DB fails to take a snapshot or does
743   // not support snapshot.
744   virtual const Snapshot* GetSnapshot() = 0;
745 
746   // Release a previously acquired snapshot.  The caller must not
747   // use "snapshot" after this call.
748   virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
749 
750 #ifndef ROCKSDB_LITE
751   // Contains all valid property arguments for GetProperty() or
752   // GetMapProperty(). Each is a "string" property for retrieval with
753   // GetProperty() unless noted as a "map" property, for GetMapProperty().
754   //
755   // NOTE: Property names cannot end in numbers since those are interpreted as
756   //       arguments, e.g., see kNumFilesAtLevelPrefix.
757   struct Properties {
758     //  "rocksdb.num-files-at-level<N>" - returns string containing the number
759     //      of files at level <N>, where <N> is an ASCII representation of a
760     //      level number (e.g., "0").
761     static const std::string kNumFilesAtLevelPrefix;
762 
763     //  "rocksdb.compression-ratio-at-level<N>" - returns string containing the
764     //      compression ratio of data at level <N>, where <N> is an ASCII
765     //      representation of a level number (e.g., "0"). Here, compression
766     //      ratio is defined as uncompressed data size / compressed file size.
767     //      Returns "-1.0" if no open files at level <N>.
768     static const std::string kCompressionRatioAtLevelPrefix;
769 
770     //  "rocksdb.stats" - returns a multi-line string containing the data
771     //      described by kCFStats followed by the data described by kDBStats.
772     static const std::string kStats;
773 
774     //  "rocksdb.sstables" - returns a multi-line string summarizing current
775     //      SST files.
776     static const std::string kSSTables;
777 
778     //  "rocksdb.cfstats" - Raw data from "rocksdb.cfstats-no-file-histogram"
779     //      and "rocksdb.cf-file-histogram" as a "map" property.
780     static const std::string kCFStats;
781 
782     //  "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
783     //      general column family stats per-level over db's lifetime ("L<n>"),
784     //      aggregated over db's lifetime ("Sum"), and aggregated over the
785     //      interval since the last retrieval ("Int").
786     static const std::string kCFStatsNoFileHistogram;
787 
788     //  "rocksdb.cf-file-histogram" - print out how many file reads to every
789     //      level, as well as the histogram of latency of single requests.
790     static const std::string kCFFileHistogram;
791 
792     //  "rocksdb.dbstats" - As a string property, returns a multi-line string
793     //      with general database stats, both cumulative (over the db's
794     //      lifetime) and interval (since the last retrieval of kDBStats).
795     //      As a map property, returns cumulative stats only and does not
796     //      update the baseline for the interval stats.
797     static const std::string kDBStats;
798 
799     //  "rocksdb.levelstats" - returns multi-line string containing the number
800     //      of files per level and total size of each level (MB).
801     static const std::string kLevelStats;
802 
803     //  "rocksdb.block-cache-entry-stats" - returns a multi-line string or
804     //      map with statistics on block cache usage.
805     static const std::string kBlockCacheEntryStats;
806 
807     //  "rocksdb.num-immutable-mem-table" - returns number of immutable
808     //      memtables that have not yet been flushed.
809     static const std::string kNumImmutableMemTable;
810 
811     //  "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
812     //      memtables that have already been flushed.
813     static const std::string kNumImmutableMemTableFlushed;
814 
815     //  "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
816     //      pending; otherwise, returns 0.
817     static const std::string kMemTableFlushPending;
818 
819     //  "rocksdb.num-running-flushes" - returns the number of currently running
820     //      flushes.
821     static const std::string kNumRunningFlushes;
822 
823     //  "rocksdb.compaction-pending" - returns 1 if at least one compaction is
824     //      pending; otherwise, returns 0.
825     static const std::string kCompactionPending;
826 
827     //  "rocksdb.num-running-compactions" - returns the number of currently
828     //      running compactions.
829     static const std::string kNumRunningCompactions;
830 
831     //  "rocksdb.background-errors" - returns accumulated number of background
832     //      errors.
833     static const std::string kBackgroundErrors;
834 
835     //  "rocksdb.cur-size-active-mem-table" - returns approximate size of active
836     //      memtable (bytes).
837     static const std::string kCurSizeActiveMemTable;
838 
839     //  "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
840     //      and unflushed immutable memtables (bytes).
841     static const std::string kCurSizeAllMemTables;
842 
843     //  "rocksdb.size-all-mem-tables" - returns approximate size of active,
844     //      unflushed immutable, and pinned immutable memtables (bytes).
845     static const std::string kSizeAllMemTables;
846 
847     //  "rocksdb.num-entries-active-mem-table" - returns total number of entries
848     //      in the active memtable.
849     static const std::string kNumEntriesActiveMemTable;
850 
851     //  "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
852     //      in the unflushed immutable memtables.
853     static const std::string kNumEntriesImmMemTables;
854 
855     //  "rocksdb.num-deletes-active-mem-table" - returns total number of delete
856     //      entries in the active memtable.
857     static const std::string kNumDeletesActiveMemTable;
858 
859     //  "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
860     //      entries in the unflushed immutable memtables.
861     static const std::string kNumDeletesImmMemTables;
862 
863     //  "rocksdb.estimate-num-keys" - returns estimated number of total keys in
864     //      the active and unflushed immutable memtables and storage.
865     static const std::string kEstimateNumKeys;
866 
867     //  "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
868     //      reading SST tables, excluding memory used in block cache (e.g.,
869     //      filter and index blocks).
870     static const std::string kEstimateTableReadersMem;
871 
872     //  "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
873     //      files is enabled; otherwise, returns a non-zero number.
874     static const std::string kIsFileDeletionsEnabled;
875 
876     //  "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
877     //      database.
878     static const std::string kNumSnapshots;
879 
880     //  "rocksdb.oldest-snapshot-time" - returns number representing unix
881     //      timestamp of oldest unreleased snapshot.
882     static const std::string kOldestSnapshotTime;
883 
884     //  "rocksdb.oldest-snapshot-sequence" - returns number representing
885     //      sequence number of oldest unreleased snapshot.
886     static const std::string kOldestSnapshotSequence;
887 
888     //  "rocksdb.num-live-versions" - returns number of live versions. `Version`
889     //      is an internal data structure. See version_set.h for details. More
890     //      live versions often mean more SST files are held from being deleted,
891     //      by iterators or unfinished compactions.
892     static const std::string kNumLiveVersions;
893 
894     //  "rocksdb.current-super-version-number" - returns number of current LSM
895     //  version. It is a uint64_t integer number, incremented after there is
896     //  any change to the LSM tree. The number is not preserved after restarting
897     //  the DB. After DB restart, it will start from 0 again.
898     static const std::string kCurrentSuperVersionNumber;
899 
900     //  "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
901     //      live data in bytes. For BlobDB, it also includes the exact value of
902     //      live bytes in the blob files of the version.
903     static const std::string kEstimateLiveDataSize;
904 
905     //  "rocksdb.min-log-number-to-keep" - return the minimum log number of the
906     //      log files that should be kept.
907     static const std::string kMinLogNumberToKeep;
908 
909     //  "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
910     //      number for an obsolete SST to be kept. The max value of `uint64_t`
911     //      will be returned if all obsolete files can be deleted.
912     static const std::string kMinObsoleteSstNumberToKeep;
913 
914     //  "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
915     //      files.
916     //  WARNING: may slow down online queries if there are too many files.
917     static const std::string kTotalSstFilesSize;
918 
919     //  "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
920     //      files belong to the latest LSM tree.
921     static const std::string kLiveSstFilesSize;
922 
923     // "rocksdb.live_sst_files_size_at_temperature" - returns total size (bytes)
924     //      of SST files at all certain file temperature
925     static const std::string kLiveSstFilesSizeAtTemperature;
926 
927     //  "rocksdb.base-level" - returns number of level to which L0 data will be
928     //      compacted.
929     static const std::string kBaseLevel;
930 
931     //  "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
932     //      number of bytes compaction needs to rewrite to get all levels down
933     //      to under target size. Not valid for other compactions than level-
934     //      based.
935     static const std::string kEstimatePendingCompactionBytes;
936 
937     //  "rocksdb.aggregated-table-properties" - returns a string or map
938     //      representation of the aggregated table properties of the target
939     //      column family. Only properties that make sense for aggregation
940     //      are included.
941     static const std::string kAggregatedTableProperties;
942 
943     //  "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
944     //      one but only returns the aggregated table properties of the
945     //      specified level "N" at the target column family.
946     static const std::string kAggregatedTablePropertiesAtLevel;
947 
948     //  "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
949     //      write rate. 0 means no delay.
950     static const std::string kActualDelayedWriteRate;
951 
952     //  "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
953     static const std::string kIsWriteStopped;
954 
955     //  "rocksdb.estimate-oldest-key-time" - returns an estimation of
956     //      oldest key timestamp in the DB. Currently only available for
957     //      FIFO compaction with
958     //      compaction_options_fifo.allow_compaction = false.
959     static const std::string kEstimateOldestKeyTime;
960 
961     //  "rocksdb.block-cache-capacity" - returns block cache capacity.
962     static const std::string kBlockCacheCapacity;
963 
964     //  "rocksdb.block-cache-usage" - returns the memory size for the entries
965     //      residing in block cache.
966     static const std::string kBlockCacheUsage;
967 
968     // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
969     //      entries being pinned.
970     static const std::string kBlockCachePinnedUsage;
971 
972     // "rocksdb.options-statistics" - returns multi-line string
973     //      of options.statistics
974     static const std::string kOptionsStatistics;
975 
976     // "rocksdb.num-blob-files" - returns number of blob files in the current
977     //      version.
978     static const std::string kNumBlobFiles;
979 
980     // "rocksdb.blob-stats" - return the total number and size of all blob
981     //      files, and total amount of garbage (bytes) in the blob files in
982     //      the current version.
983     static const std::string kBlobStats;
984 
985     // "rocksdb.total-blob-file-size" - returns the total size of all blob
986     //      files over all versions.
987     static const std::string kTotalBlobFileSize;
988 
989     // "rocksdb.live-blob-file-size" - returns the total size of all blob
990     //      files in the current version.
991     static const std::string kLiveBlobFileSize;
992   };
993 #endif /* ROCKSDB_LITE */
994 
995   // DB implementations export properties about their state via this method.
996   // If "property" is a valid "string" property understood by this DB
997   // implementation (see Properties struct above for valid options), fills
998   // "*value" with its current value and returns true.  Otherwise, returns
999   // false.
1000   virtual bool GetProperty(ColumnFamilyHandle* column_family,
1001                            const Slice& property, std::string* value) = 0;
GetProperty(const Slice & property,std::string * value)1002   virtual bool GetProperty(const Slice& property, std::string* value) {
1003     return GetProperty(DefaultColumnFamily(), property, value);
1004   }
1005 
1006   // Like GetProperty but for valid "map" properties. (Some properties can be
1007   // accessed as either "string" properties or "map" properties.)
1008   virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
1009                               const Slice& property,
1010                               std::map<std::string, std::string>* value) = 0;
GetMapProperty(const Slice & property,std::map<std::string,std::string> * value)1011   virtual bool GetMapProperty(const Slice& property,
1012                               std::map<std::string, std::string>* value) {
1013     return GetMapProperty(DefaultColumnFamily(), property, value);
1014   }
1015 
1016   // Similar to GetProperty(), but only works for a subset of properties whose
1017   // return value is an integer. Return the value by integer. Supported
1018   // properties:
1019   //  "rocksdb.num-immutable-mem-table"
1020   //  "rocksdb.mem-table-flush-pending"
1021   //  "rocksdb.compaction-pending"
1022   //  "rocksdb.background-errors"
1023   //  "rocksdb.cur-size-active-mem-table"
1024   //  "rocksdb.cur-size-all-mem-tables"
1025   //  "rocksdb.size-all-mem-tables"
1026   //  "rocksdb.num-entries-active-mem-table"
1027   //  "rocksdb.num-entries-imm-mem-tables"
1028   //  "rocksdb.num-deletes-active-mem-table"
1029   //  "rocksdb.num-deletes-imm-mem-tables"
1030   //  "rocksdb.estimate-num-keys"
1031   //  "rocksdb.estimate-table-readers-mem"
1032   //  "rocksdb.is-file-deletions-enabled"
1033   //  "rocksdb.num-snapshots"
1034   //  "rocksdb.oldest-snapshot-time"
1035   //  "rocksdb.num-live-versions"
1036   //  "rocksdb.current-super-version-number"
1037   //  "rocksdb.estimate-live-data-size"
1038   //  "rocksdb.min-log-number-to-keep"
1039   //  "rocksdb.min-obsolete-sst-number-to-keep"
1040   //  "rocksdb.total-sst-files-size"
1041   //  "rocksdb.live-sst-files-size"
1042   //  "rocksdb.base-level"
1043   //  "rocksdb.estimate-pending-compaction-bytes"
1044   //  "rocksdb.num-running-compactions"
1045   //  "rocksdb.num-running-flushes"
1046   //  "rocksdb.actual-delayed-write-rate"
1047   //  "rocksdb.is-write-stopped"
1048   //  "rocksdb.estimate-oldest-key-time"
1049   //  "rocksdb.block-cache-capacity"
1050   //  "rocksdb.block-cache-usage"
1051   //  "rocksdb.block-cache-pinned-usage"
1052   //
1053   //  Properties dedicated for BlobDB:
1054   //  "rocksdb.num-blob-files"
1055   //  "rocksdb.total-blob-file-size"
1056   //  "rocksdb.live-blob-file-size"
1057   virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
1058                               const Slice& property, uint64_t* value) = 0;
GetIntProperty(const Slice & property,uint64_t * value)1059   virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
1060     return GetIntProperty(DefaultColumnFamily(), property, value);
1061   }
1062 
1063   // Reset internal stats for DB and all column families.
1064   // Note this doesn't reset options.statistics as it is not owned by
1065   // DB.
ResetStats()1066   virtual Status ResetStats() {
1067     return Status::NotSupported("Not implemented");
1068   }
1069 
1070   // Same as GetIntProperty(), but this one returns the aggregated int
1071   // property from all column families.
1072   virtual bool GetAggregatedIntProperty(const Slice& property,
1073                                         uint64_t* value) = 0;
1074 
1075   // Flags for DB::GetSizeApproximation that specify whether memtable
1076   // stats should be included, or file stats approximation or both
1077   enum SizeApproximationFlags : uint8_t {
1078     NONE = 0,
1079     INCLUDE_MEMTABLES = 1 << 0,
1080     INCLUDE_FILES = 1 << 1
1081   };
1082 
1083   // For each i in [0,n-1], store in "sizes[i]", the approximate
1084   // file system space used by keys in "[range[i].start .. range[i].limit)"
1085   // in a single column family.
1086   //
1087   // Note that the returned sizes measure file system space usage, so
1088   // if the user data compresses by a factor of ten, the returned
1089   // sizes will be one-tenth the size of the corresponding user data size.
1090   virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
1091                                      ColumnFamilyHandle* column_family,
1092                                      const Range* ranges, int n,
1093                                      uint64_t* sizes) = 0;
1094 
1095   // Simpler versions of the GetApproximateSizes() method above.
1096   // The include_flags argument must of type DB::SizeApproximationFlags
1097   // and can not be NONE.
1098   virtual Status GetApproximateSizes(ColumnFamilyHandle* column_family,
1099                                      const Range* ranges, int n,
1100                                      uint64_t* sizes,
1101                                      uint8_t include_flags = INCLUDE_FILES) {
1102     SizeApproximationOptions options;
1103     options.include_memtabtles =
1104         (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0;
1105     options.include_files =
1106         (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0;
1107     return GetApproximateSizes(options, column_family, ranges, n, sizes);
1108   }
1109   virtual Status GetApproximateSizes(const Range* ranges, int n,
1110                                      uint64_t* sizes,
1111                                      uint8_t include_flags = INCLUDE_FILES) {
1112     return GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes,
1113                                include_flags);
1114   }
1115 
1116   // The method is similar to GetApproximateSizes, except it
1117   // returns approximate number of records in memtables.
1118   virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
1119                                            const Range& range,
1120                                            uint64_t* const count,
1121                                            uint64_t* const size) = 0;
GetApproximateMemTableStats(const Range & range,uint64_t * const count,uint64_t * const size)1122   virtual void GetApproximateMemTableStats(const Range& range,
1123                                            uint64_t* const count,
1124                                            uint64_t* const size) {
1125     GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
1126   }
1127 
1128   // Deprecated versions of GetApproximateSizes
GetApproximateSizes(const Range * range,int n,uint64_t * sizes,bool include_memtable)1129   ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
1130       const Range* range, int n, uint64_t* sizes, bool include_memtable) {
1131     uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
1132     if (include_memtable) {
1133       include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
1134     }
1135     GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
1136   }
GetApproximateSizes(ColumnFamilyHandle * column_family,const Range * range,int n,uint64_t * sizes,bool include_memtable)1137   ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
1138       ColumnFamilyHandle* column_family, const Range* range, int n,
1139       uint64_t* sizes, bool include_memtable) {
1140     uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
1141     if (include_memtable) {
1142       include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
1143     }
1144     GetApproximateSizes(column_family, range, n, sizes, include_flags);
1145   }
1146 
1147   // Compact the underlying storage for the key range [*begin,*end].
1148   // The actual compaction interval might be superset of [*begin, *end].
1149   // In particular, deleted and overwritten versions are discarded,
1150   // and the data is rearranged to reduce the cost of operations
1151   // needed to access the data.  This operation should typically only
1152   // be invoked by users who understand the underlying implementation.
1153   // This call blocks until the operation completes successfully, fails,
1154   // or is aborted (Status::Incomplete). See DisableManualCompaction.
1155   //
1156   // begin==nullptr is treated as a key before all keys in the database.
1157   // end==nullptr is treated as a key after all keys in the database.
1158   // Therefore the following call will compact the entire database:
1159   //    db->CompactRange(options, nullptr, nullptr);
1160   // Note that after the entire database is compacted, all data are pushed
1161   // down to the last level containing any data. If the total data size after
1162   // compaction is reduced, that level might not be appropriate for hosting all
1163   // the files. In this case, client could set options.change_level to true, to
1164   // move the files back to the minimum level capable of holding the data set
1165   // or a given level (specified by non-negative options.target_level).
1166   virtual Status CompactRange(const CompactRangeOptions& options,
1167                               ColumnFamilyHandle* column_family,
1168                               const Slice* begin, const Slice* end) = 0;
CompactRange(const CompactRangeOptions & options,const Slice * begin,const Slice * end)1169   virtual Status CompactRange(const CompactRangeOptions& options,
1170                               const Slice* begin, const Slice* end) {
1171     return CompactRange(options, DefaultColumnFamily(), begin, end);
1172   }
1173 
1174   ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
1175       ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end,
1176       bool change_level = false, int target_level = -1,
1177       uint32_t target_path_id = 0) {
1178     CompactRangeOptions options;
1179     options.change_level = change_level;
1180     options.target_level = target_level;
1181     options.target_path_id = target_path_id;
1182     return CompactRange(options, column_family, begin, end);
1183   }
1184 
1185   ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
1186       const Slice* begin, const Slice* end, bool change_level = false,
1187       int target_level = -1, uint32_t target_path_id = 0) {
1188     CompactRangeOptions options;
1189     options.change_level = change_level;
1190     options.target_level = target_level;
1191     options.target_path_id = target_path_id;
1192     return CompactRange(options, DefaultColumnFamily(), begin, end);
1193   }
1194 
SetOptions(ColumnFamilyHandle *,const std::unordered_map<std::string,std::string> &)1195   virtual Status SetOptions(
1196       ColumnFamilyHandle* /*column_family*/,
1197       const std::unordered_map<std::string, std::string>& /*new_options*/) {
1198     return Status::NotSupported("Not implemented");
1199   }
SetOptions(const std::unordered_map<std::string,std::string> & new_options)1200   virtual Status SetOptions(
1201       const std::unordered_map<std::string, std::string>& new_options) {
1202     return SetOptions(DefaultColumnFamily(), new_options);
1203   }
1204 
1205   virtual Status SetDBOptions(
1206       const std::unordered_map<std::string, std::string>& new_options) = 0;
1207 
1208   // CompactFiles() inputs a list of files specified by file numbers and
1209   // compacts them to the specified level. A small difference compared to
1210   // CompactRange() is that CompactFiles() performs the compaction job
1211   // using the CURRENT thread, so is not considered a "background" job.
1212   //
1213   // @see GetDataBaseMetaData
1214   // @see GetColumnFamilyMetaData
1215   virtual Status CompactFiles(
1216       const CompactionOptions& compact_options,
1217       ColumnFamilyHandle* column_family,
1218       const std::vector<std::string>& input_file_names, const int output_level,
1219       const int output_path_id = -1,
1220       std::vector<std::string>* const output_file_names = nullptr,
1221       CompactionJobInfo* compaction_job_info = nullptr) = 0;
1222 
1223   virtual Status CompactFiles(
1224       const CompactionOptions& compact_options,
1225       const std::vector<std::string>& input_file_names, const int output_level,
1226       const int output_path_id = -1,
1227       std::vector<std::string>* const output_file_names = nullptr,
1228       CompactionJobInfo* compaction_job_info = nullptr) {
1229     return CompactFiles(compact_options, DefaultColumnFamily(),
1230                         input_file_names, output_level, output_path_id,
1231                         output_file_names, compaction_job_info);
1232   }
1233 
1234   // This function will wait until all currently running background processes
1235   // finish. After it returns, no background process will be run until
1236   // ContinueBackgroundWork is called, once for each preceding OK-returning
1237   // call to PauseBackgroundWork.
1238   virtual Status PauseBackgroundWork() = 0;
1239   virtual Status ContinueBackgroundWork() = 0;
1240 
1241   // This function will enable automatic compactions for the given column
1242   // families if they were previously disabled. The function will first set the
1243   // disable_auto_compactions option for each column family to 'false', after
1244   // which it will schedule a flush/compaction.
1245   //
1246   // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
1247   // does NOT schedule a flush/compaction afterwards, and only changes the
1248   // parameter itself within the column family option.
1249   //
1250   virtual Status EnableAutoCompaction(
1251       const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
1252 
1253   // After this function call, CompactRange() or CompactFiles() will not
1254   // run compactions and fail. Calling this function will tell outstanding
1255   // manual compactions to abort and will wait for them to finish or abort
1256   // before returning.
1257   virtual void DisableManualCompaction() = 0;
1258   // Re-enable CompactRange() and ComapctFiles() that are disabled by
1259   // DisableManualCompaction(). This function must be called as many times
1260   // as DisableManualCompaction() has been called in order to re-enable
1261   // manual compactions, and must not be called more times than
1262   // DisableManualCompaction() has been called.
1263   virtual void EnableManualCompaction() = 0;
1264 
1265   // Number of levels used for this DB.
1266   virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
NumberLevels()1267   virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
1268 
1269   // Maximum level to which a new compacted memtable is pushed if it
1270   // does not create overlap.
1271   virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
MaxMemCompactionLevel()1272   virtual int MaxMemCompactionLevel() {
1273     return MaxMemCompactionLevel(DefaultColumnFamily());
1274   }
1275 
1276   // Number of files in level-0 that would stop writes.
1277   virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
Level0StopWriteTrigger()1278   virtual int Level0StopWriteTrigger() {
1279     return Level0StopWriteTrigger(DefaultColumnFamily());
1280   }
1281 
1282   // Get DB name -- the exact same name that was provided as an argument to
1283   // DB::Open()
1284   virtual const std::string& GetName() const = 0;
1285 
1286   // Get Env object from the DB
1287   virtual Env* GetEnv() const = 0;
1288 
1289   virtual FileSystem* GetFileSystem() const;
1290 
1291   // Get DB Options that we use.  During the process of opening the
1292   // column family, the options provided when calling DB::Open() or
1293   // DB::CreateColumnFamily() will have been "sanitized" and transformed
1294   // in an implementation-defined manner.
1295   virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
GetOptions()1296   virtual Options GetOptions() const {
1297     return GetOptions(DefaultColumnFamily());
1298   }
1299 
1300   virtual DBOptions GetDBOptions() const = 0;
1301 
1302   // Flush all mem-table data.
1303   // Flush a single column family, even when atomic flush is enabled. To flush
1304   // multiple column families, use Flush(options, column_families).
1305   virtual Status Flush(const FlushOptions& options,
1306                        ColumnFamilyHandle* column_family) = 0;
Flush(const FlushOptions & options)1307   virtual Status Flush(const FlushOptions& options) {
1308     return Flush(options, DefaultColumnFamily());
1309   }
1310   // Flushes multiple column families.
1311   // If atomic flush is not enabled, Flush(options, column_families) is
1312   // equivalent to calling Flush(options, column_family) multiple times.
1313   // If atomic flush is enabled, Flush(options, column_families) will flush all
1314   // column families specified in 'column_families' up to the latest sequence
1315   // number at the time when flush is requested.
1316   // Note that RocksDB 5.15 and earlier may not be able to open later versions
1317   // with atomic flush enabled.
1318   virtual Status Flush(
1319       const FlushOptions& options,
1320       const std::vector<ColumnFamilyHandle*>& column_families) = 0;
1321 
1322   // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL
1323   // afterwards.
FlushWAL(bool)1324   virtual Status FlushWAL(bool /*sync*/) {
1325     return Status::NotSupported("FlushWAL not implemented");
1326   }
1327   // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
1328   // same as Write() with sync=true: in the latter case the changes won't be
1329   // visible until the sync is done.
1330   // Currently only works if allow_mmap_writes = false in Options.
1331   virtual Status SyncWAL() = 0;
1332 
1333   // Lock the WAL. Also flushes the WAL after locking.
LockWAL()1334   virtual Status LockWAL() {
1335     return Status::NotSupported("LockWAL not implemented");
1336   }
1337 
1338   // Unlock the WAL.
UnlockWAL()1339   virtual Status UnlockWAL() {
1340     return Status::NotSupported("UnlockWAL not implemented");
1341   }
1342 
1343   // The sequence number of the most recent transaction.
1344   virtual SequenceNumber GetLatestSequenceNumber() const = 0;
1345 
1346   // Instructs DB to preserve deletes with sequence numbers >= passed seqnum.
1347   // Has no effect if DBOptions.preserve_deletes is set to false.
1348   // This function assumes that user calls this function with monotonically
1349   // increasing seqnums (otherwise we can't guarantee that a particular delete
1350   // hasn't been already processed); returns true if the value was successfully
1351   // updated, false if user attempted to call if with seqnum <= current value.
1352   virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0;
1353 
1354   // Prevent file deletions. Compactions will continue to occur,
1355   // but no obsolete files will be deleted. Calling this multiple
1356   // times have the same effect as calling it once.
1357   virtual Status DisableFileDeletions() = 0;
1358 
1359   // Allow compactions to delete obsolete files.
1360   // If force == true, the call to EnableFileDeletions() will guarantee that
1361   // file deletions are enabled after the call, even if DisableFileDeletions()
1362   // was called multiple times before.
1363   // If force == false, EnableFileDeletions will only enable file deletion
1364   // after it's been called at least as many times as DisableFileDeletions(),
1365   // enabling the two methods to be called by two threads concurrently without
1366   // synchronization -- i.e., file deletions will be enabled only after both
1367   // threads call EnableFileDeletions()
1368   virtual Status EnableFileDeletions(bool force = true) = 0;
1369 
1370 #ifndef ROCKSDB_LITE
1371   // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
1372 
1373   // Retrieve the list of all files in the database. The files are
1374   // relative to the dbname and are not absolute paths. Despite being relative
1375   // paths, the file names begin with "/". The valid size of the manifest file
1376   // is returned in manifest_file_size. The manifest file is an ever growing
1377   // file, but only the portion specified by manifest_file_size is valid for
1378   // this snapshot. Setting flush_memtable to true does Flush before recording
1379   // the live files. Setting flush_memtable to false is useful when we don't
1380   // want to wait for flush which may have to wait for compaction to complete
1381   // taking an indeterminate time.
1382   //
1383   // In case you have multiple column families, even if flush_memtable is true,
1384   // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
1385   // for new data that arrived to already-flushed column families while other
1386   // column families were flushing
1387   virtual Status GetLiveFiles(std::vector<std::string>&,
1388                               uint64_t* manifest_file_size,
1389                               bool flush_memtable = true) = 0;
1390 
1391   // Retrieve the sorted list of all wal files with earliest file first
1392   virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
1393 
1394   // Retrieve information about the current wal file
1395   //
1396   // Note that the log might have rolled after this call in which case
1397   // the current_log_file would not point to the current log file.
1398   //
1399   // Additionally, for the sake of optimization current_log_file->StartSequence
1400   // would always be set to 0
1401   virtual Status GetCurrentWalFile(
1402       std::unique_ptr<LogFile>* current_log_file) = 0;
1403 
1404   // Retrieves the creation time of the oldest file in the DB.
1405   // This API only works if max_open_files = -1, if it is not then
1406   // Status returned is Status::NotSupported()
1407   // The file creation time is set using the env provided to the DB.
1408   // If the DB was created from a very old release then its possible that
1409   // the SST files might not have file_creation_time property and even after
1410   // moving to a newer release its possible that some files never got compacted
1411   // and may not have file_creation_time property. In both the cases
1412   // file_creation_time is considered 0 which means this API will return
1413   // creation_time = 0 as there wouldn't be a timestamp lower than 0.
1414   virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0;
1415 
1416   // Note: this API is not yet consistent with WritePrepared transactions.
1417   // Sets iter to an iterator that is positioned at a write-batch containing
1418   // seq_number. If the sequence number is non existent, it returns an iterator
1419   // at the first available seq_no after the requested seq_no
1420   // Returns Status::OK if iterator is valid
1421   // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
1422   // use this api, else the WAL files will get
1423   // cleared aggressively and the iterator might keep getting invalid before
1424   // an update is read.
1425   virtual Status GetUpdatesSince(
1426       SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
1427       const TransactionLogIterator::ReadOptions& read_options =
1428           TransactionLogIterator::ReadOptions()) = 0;
1429 
1430 // Windows API macro interference
1431 #undef DeleteFile
1432   // WARNING: This API is planned for removal in RocksDB 7.0 since it does not
1433   // operate at the proper level of abstraction for a key-value store, and its
1434   // contract/restrictions are poorly documented. For example, it returns non-OK
1435   // `Status` for non-bottommost files and files undergoing compaction. Since we
1436   // do not plan to maintain it, the contract will likely remain underspecified
1437   // until its removal. Any user is encouraged to read the implementation
1438   // carefully and migrate away from it when possible.
1439   //
1440   // Delete the file name from the db directory and update the internal state to
1441   // reflect that. Supports deletion of sst and log files only. 'name' must be
1442   // path relative to the db directory. eg. 000001.sst, /archive/000003.log
1443   virtual Status DeleteFile(std::string name) = 0;
1444 
1445   // Returns a list of all table files with their level, start key
1446   // and end key
GetLiveFilesMetaData(std::vector<LiveFileMetaData> *)1447   virtual void GetLiveFilesMetaData(
1448       std::vector<LiveFileMetaData>* /*metadata*/) {}
1449 
1450   // Return a list of all table and blob files checksum info.
1451   // Note: This function might be of limited use because it cannot be
1452   // synchronized with GetLiveFiles.
1453   virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
1454 
1455   // EXPERIMENTAL: This function is not yet feature-complete.
1456   // Get information about all live files that make up a DB, for making
1457   // live copies (Checkpoint, backups, etc.) or other storage-related purposes.
1458   // Use DisableFileDeletions() before and EnableFileDeletions() after to
1459   // preserve the files for live copy.
1460   virtual Status GetLiveFilesStorageInfo(
1461       const LiveFilesStorageInfoOptions& opts,
1462       std::vector<LiveFileStorageInfo>* files) = 0;
1463 
1464   // Obtains the meta data of the specified column family of the DB.
GetColumnFamilyMetaData(ColumnFamilyHandle *,ColumnFamilyMetaData *)1465   virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
1466                                        ColumnFamilyMetaData* /*metadata*/) {}
1467 
1468   // Get the metadata of the default column family.
GetColumnFamilyMetaData(ColumnFamilyMetaData * metadata)1469   void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) {
1470     GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
1471   }
1472 
1473   // Obtains the meta data of all column families for the DB.
1474   // The returned map contains one entry for each column family indexed by the
1475   // name of the column family.
GetAllColumnFamilyMetaData(std::vector<ColumnFamilyMetaData> *)1476   virtual void GetAllColumnFamilyMetaData(
1477       std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
1478 
1479   // IngestExternalFile() will load a list of external SST files (1) into the DB
1480   // Two primary modes are supported:
1481   // - Duplicate keys in the new files will overwrite exiting keys (default)
1482   // - Duplicate keys will be skipped (set ingest_behind=true)
1483   // In the first mode we will try to find the lowest possible level that
1484   // the file can fit in, and ingest the file into this level (2). A file that
1485   // have a key range that overlap with the memtable key range will require us
1486   // to Flush the memtable first before ingesting the file.
1487   // In the second mode we will always ingest in the bottom most level (see
1488   // docs to IngestExternalFileOptions::ingest_behind).
1489   //
1490   // (1) External SST files can be created using SstFileWriter
1491   // (2) We will try to ingest the files to the lowest possible level
1492   //     even if the file compression doesn't match the level compression
1493   // (3) If IngestExternalFileOptions->ingest_behind is set to true,
1494   //     we always ingest at the bottommost level, which should be reserved
1495   //     for this purpose (see DBOPtions::allow_ingest_behind flag).
1496   virtual Status IngestExternalFile(
1497       ColumnFamilyHandle* column_family,
1498       const std::vector<std::string>& external_files,
1499       const IngestExternalFileOptions& options) = 0;
1500 
IngestExternalFile(const std::vector<std::string> & external_files,const IngestExternalFileOptions & options)1501   virtual Status IngestExternalFile(
1502       const std::vector<std::string>& external_files,
1503       const IngestExternalFileOptions& options) {
1504     return IngestExternalFile(DefaultColumnFamily(), external_files, options);
1505   }
1506 
1507   // IngestExternalFiles() will ingest files for multiple column families, and
1508   // record the result atomically to the MANIFEST.
1509   // If this function returns OK, all column families' ingestion must succeed.
1510   // If this function returns NOK, or the process crashes, then non-of the
1511   // files will be ingested into the database after recovery.
1512   // Note that it is possible for application to observe a mixed state during
1513   // the execution of this function. If the user performs range scan over the
1514   // column families with iterators, iterator on one column family may return
1515   // ingested data, while iterator on other column family returns old data.
1516   // Users can use snapshot for a consistent view of data.
1517   // If your db ingests multiple SST files using this API, i.e. args.size()
1518   // > 1, then RocksDB 5.15 and earlier will not be able to open it.
1519   //
1520   // REQUIRES: each arg corresponds to a different column family: namely, for
1521   // 0 <= i < j < len(args), args[i].column_family != args[j].column_family.
1522   virtual Status IngestExternalFiles(
1523       const std::vector<IngestExternalFileArg>& args) = 0;
1524 
1525   // CreateColumnFamilyWithImport() will create a new column family with
1526   // column_family_name and import external SST files specified in metadata into
1527   // this column family.
1528   // (1) External SST files can be created using SstFileWriter.
1529   // (2) External SST files can be exported from a particular column family in
1530   //     an existing DB using Checkpoint::ExportColumnFamily.
1531   // Option in import_options specifies whether the external files are copied or
1532   // moved (default is copy). When option specifies copy, managing files at
1533   // external_file_path is caller's responsibility. When option specifies a
1534   // move, the call makes a best effort to delete the specified files at
1535   // external_file_path on successful return, logging any failure to delete
1536   // rather than returning in Status. Files are not modified on any error
1537   // return, and a best effort is made to remove any newly-created files.
1538   // On error return, column family handle returned will be nullptr.
1539   // ColumnFamily will be present on successful return and will not be present
1540   // on error return. ColumnFamily may be present on any crash during this call.
1541   virtual Status CreateColumnFamilyWithImport(
1542       const ColumnFamilyOptions& options, const std::string& column_family_name,
1543       const ImportColumnFamilyOptions& import_options,
1544       const ExportImportFilesMetaData& metadata,
1545       ColumnFamilyHandle** handle) = 0;
1546 
1547   // Verify the checksums of files in db. Currently the whole-file checksum of
1548   // table files are checked.
VerifyFileChecksums(const ReadOptions &)1549   virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
1550     return Status::NotSupported("File verification not supported");
1551   }
1552 
1553   // Verify the block checksums of files in db. The block checksums of table
1554   // files are checked.
1555   virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
1556 
VerifyChecksum()1557   virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
1558 
1559   // AddFile() is deprecated, please use IngestExternalFile()
1560   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1561       ColumnFamilyHandle* column_family,
1562       const std::vector<std::string>& file_path_list, bool move_file = false,
1563       bool skip_snapshot_check = false) {
1564     IngestExternalFileOptions ifo;
1565     ifo.move_files = move_file;
1566     ifo.snapshot_consistency = !skip_snapshot_check;
1567     ifo.allow_global_seqno = false;
1568     ifo.allow_blocking_flush = false;
1569     return IngestExternalFile(column_family, file_path_list, ifo);
1570   }
1571 
1572   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1573       const std::vector<std::string>& file_path_list, bool move_file = false,
1574       bool skip_snapshot_check = false) {
1575     IngestExternalFileOptions ifo;
1576     ifo.move_files = move_file;
1577     ifo.snapshot_consistency = !skip_snapshot_check;
1578     ifo.allow_global_seqno = false;
1579     ifo.allow_blocking_flush = false;
1580     return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo);
1581   }
1582 
1583   // AddFile() is deprecated, please use IngestExternalFile()
1584   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1585       ColumnFamilyHandle* column_family, const std::string& file_path,
1586       bool move_file = false, bool skip_snapshot_check = false) {
1587     IngestExternalFileOptions ifo;
1588     ifo.move_files = move_file;
1589     ifo.snapshot_consistency = !skip_snapshot_check;
1590     ifo.allow_global_seqno = false;
1591     ifo.allow_blocking_flush = false;
1592     return IngestExternalFile(column_family, {file_path}, ifo);
1593   }
1594 
1595   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1596       const std::string& file_path, bool move_file = false,
1597       bool skip_snapshot_check = false) {
1598     IngestExternalFileOptions ifo;
1599     ifo.move_files = move_file;
1600     ifo.snapshot_consistency = !skip_snapshot_check;
1601     ifo.allow_global_seqno = false;
1602     ifo.allow_blocking_flush = false;
1603     return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo);
1604   }
1605 
1606   // Load table file with information "file_info" into "column_family"
1607   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1608       ColumnFamilyHandle* column_family,
1609       const std::vector<ExternalSstFileInfo>& file_info_list,
1610       bool move_file = false, bool skip_snapshot_check = false) {
1611     std::vector<std::string> external_files;
1612     for (const ExternalSstFileInfo& file_info : file_info_list) {
1613       external_files.push_back(file_info.file_path);
1614     }
1615     IngestExternalFileOptions ifo;
1616     ifo.move_files = move_file;
1617     ifo.snapshot_consistency = !skip_snapshot_check;
1618     ifo.allow_global_seqno = false;
1619     ifo.allow_blocking_flush = false;
1620     return IngestExternalFile(column_family, external_files, ifo);
1621   }
1622 
1623   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1624       const std::vector<ExternalSstFileInfo>& file_info_list,
1625       bool move_file = false, bool skip_snapshot_check = false) {
1626     std::vector<std::string> external_files;
1627     for (const ExternalSstFileInfo& file_info : file_info_list) {
1628       external_files.push_back(file_info.file_path);
1629     }
1630     IngestExternalFileOptions ifo;
1631     ifo.move_files = move_file;
1632     ifo.snapshot_consistency = !skip_snapshot_check;
1633     ifo.allow_global_seqno = false;
1634     ifo.allow_blocking_flush = false;
1635     return IngestExternalFile(DefaultColumnFamily(), external_files, ifo);
1636   }
1637 
1638   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1639       ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info,
1640       bool move_file = false, bool skip_snapshot_check = false) {
1641     IngestExternalFileOptions ifo;
1642     ifo.move_files = move_file;
1643     ifo.snapshot_consistency = !skip_snapshot_check;
1644     ifo.allow_global_seqno = false;
1645     ifo.allow_blocking_flush = false;
1646     return IngestExternalFile(column_family, {file_info->file_path}, ifo);
1647   }
1648 
1649   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1650       const ExternalSstFileInfo* file_info, bool move_file = false,
1651       bool skip_snapshot_check = false) {
1652     IngestExternalFileOptions ifo;
1653     ifo.move_files = move_file;
1654     ifo.snapshot_consistency = !skip_snapshot_check;
1655     ifo.allow_global_seqno = false;
1656     ifo.allow_blocking_flush = false;
1657     return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path},
1658                               ifo);
1659   }
1660 
1661 #endif  // ROCKSDB_LITE
1662 
1663   // Returns the unique ID which is read from IDENTITY file during the opening
1664   // of database by setting in the identity variable
1665   // Returns Status::OK if identity could be set properly
1666   virtual Status GetDbIdentity(std::string& identity) const = 0;
1667 
1668   // Return a unique identifier for each DB object that is opened
1669   // This DB session ID should be unique among all open DB instances on all
1670   // hosts, and should be unique among re-openings of the same or other DBs.
1671   // (Two open DBs have the same identity from other function GetDbIdentity when
1672   // one is physically copied from the other.)
1673   virtual Status GetDbSessionId(std::string& session_id) const = 0;
1674 
1675   // Returns default column family handle
1676   virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
1677 
1678 #ifndef ROCKSDB_LITE
1679 
1680   virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
1681                                           TablePropertiesCollection* props) = 0;
GetPropertiesOfAllTables(TablePropertiesCollection * props)1682   virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
1683     return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
1684   }
1685   virtual Status GetPropertiesOfTablesInRange(
1686       ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
1687       TablePropertiesCollection* props) = 0;
1688 
SuggestCompactRange(ColumnFamilyHandle *,const Slice *,const Slice *)1689   virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
1690                                      const Slice* /*begin*/,
1691                                      const Slice* /*end*/) {
1692     return Status::NotSupported("SuggestCompactRange() is not implemented.");
1693   }
1694 
PromoteL0(ColumnFamilyHandle *,int)1695   virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
1696                            int /*target_level*/) {
1697     return Status::NotSupported("PromoteL0() is not implemented.");
1698   }
1699 
1700   // Trace DB operations. Use EndTrace() to stop tracing.
StartTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1701   virtual Status StartTrace(const TraceOptions& /*options*/,
1702                             std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1703     return Status::NotSupported("StartTrace() is not implemented.");
1704   }
1705 
EndTrace()1706   virtual Status EndTrace() {
1707     return Status::NotSupported("EndTrace() is not implemented.");
1708   }
1709 
1710   // IO Tracing operations. Use EndIOTrace() to stop tracing.
StartIOTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1711   virtual Status StartIOTrace(const TraceOptions& /*options*/,
1712                               std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1713     return Status::NotSupported("StartIOTrace() is not implemented.");
1714   }
1715 
EndIOTrace()1716   virtual Status EndIOTrace() {
1717     return Status::NotSupported("EndIOTrace() is not implemented.");
1718   }
1719 
1720   // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing.
StartBlockCacheTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1721   virtual Status StartBlockCacheTrace(
1722       const TraceOptions& /*options*/,
1723       std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1724     return Status::NotSupported("StartBlockCacheTrace() is not implemented.");
1725   }
1726 
EndBlockCacheTrace()1727   virtual Status EndBlockCacheTrace() {
1728     return Status::NotSupported("EndBlockCacheTrace() is not implemented.");
1729   }
1730 
1731   // Create a default trace replayer.
NewDefaultReplayer(const std::vector<ColumnFamilyHandle * > &,std::unique_ptr<TraceReader> &&,std::unique_ptr<Replayer> *)1732   virtual Status NewDefaultReplayer(
1733       const std::vector<ColumnFamilyHandle*>& /*handles*/,
1734       std::unique_ptr<TraceReader>&& /*reader*/,
1735       std::unique_ptr<Replayer>* /*replayer*/) {
1736     return Status::NotSupported("NewDefaultReplayer() is not implemented.");
1737   }
1738 
1739 #endif  // ROCKSDB_LITE
1740 
1741   // Needed for StackableDB
GetRootDB()1742   virtual DB* GetRootDB() { return this; }
1743 
1744   // Given a window [start_time, end_time), setup a StatsHistoryIterator
1745   // to access stats history. Note the start_time and end_time are epoch
1746   // time measured in seconds, and end_time is an exclusive bound.
GetStatsHistory(uint64_t,uint64_t,std::unique_ptr<StatsHistoryIterator> *)1747   virtual Status GetStatsHistory(
1748       uint64_t /*start_time*/, uint64_t /*end_time*/,
1749       std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) {
1750     return Status::NotSupported("GetStatsHistory() is not implemented.");
1751   }
1752 
1753 #ifndef ROCKSDB_LITE
1754   // Make the secondary instance catch up with the primary by tailing and
1755   // replaying the MANIFEST and WAL of the primary.
1756   // Column families created by the primary after the secondary instance starts
1757   // will be ignored unless the secondary instance closes and restarts with the
1758   // newly created column families.
1759   // Column families that exist before secondary instance starts and dropped by
1760   // the primary afterwards will be marked as dropped. However, as long as the
1761   // secondary instance does not delete the corresponding column family
1762   // handles, the data of the column family is still accessible to the
1763   // secondary.
1764   // TODO: we will support WAL tailing soon.
TryCatchUpWithPrimary()1765   virtual Status TryCatchUpWithPrimary() {
1766     return Status::NotSupported("Supported only by secondary instance");
1767   }
1768 #endif  // !ROCKSDB_LITE
1769 };
1770 
1771 // Destroy the contents of the specified database.
1772 // Be very careful using this method.
1773 Status DestroyDB(const std::string& name, const Options& options,
1774                  const std::vector<ColumnFamilyDescriptor>& column_families =
1775                      std::vector<ColumnFamilyDescriptor>());
1776 
1777 #ifndef ROCKSDB_LITE
1778 // If a DB cannot be opened, you may attempt to call this method to
1779 // resurrect as much of the contents of the database as possible.
1780 // Some data may be lost, so be careful when calling this function
1781 // on a database that contains important information.
1782 //
1783 // With this API, we will warn and skip data associated with column families not
1784 // specified in column_families.
1785 //
1786 // @param column_families Descriptors for known column families
1787 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1788                 const std::vector<ColumnFamilyDescriptor>& column_families);
1789 
1790 // @param unknown_cf_opts Options for column families encountered during the
1791 //                        repair that were not specified in column_families.
1792 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1793                 const std::vector<ColumnFamilyDescriptor>& column_families,
1794                 const ColumnFamilyOptions& unknown_cf_opts);
1795 
1796 // @param options These options will be used for the database and for ALL column
1797 //                families encountered during the repair
1798 Status RepairDB(const std::string& dbname, const Options& options);
1799 
1800 #endif
1801 
1802 }  // namespace ROCKSDB_NAMESPACE
1803