1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 
9 #pragma once
10 
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <map>
14 #include <memory>
15 #include <string>
16 #include <unordered_map>
17 #include <vector>
18 #include "rocksdb/iterator.h"
19 #include "rocksdb/listener.h"
20 #include "rocksdb/metadata.h"
21 #include "rocksdb/options.h"
22 #include "rocksdb/snapshot.h"
23 #include "rocksdb/sst_file_writer.h"
24 #include "rocksdb/thread_status.h"
25 #include "rocksdb/transaction_log.h"
26 #include "rocksdb/types.h"
27 #include "rocksdb/version.h"
28 
29 #ifdef _WIN32
30 // Windows API macro interference
31 #undef DeleteFile
32 #endif
33 
34 #if defined(__GNUC__) || defined(__clang__)
35 #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
36 #elif _WIN32
37 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
38 #endif
39 
40 namespace ROCKSDB_NAMESPACE {
41 
42 struct Options;
43 struct DBOptions;
44 struct ColumnFamilyOptions;
45 struct ReadOptions;
46 struct WriteOptions;
47 struct FlushOptions;
48 struct CompactionOptions;
49 struct CompactRangeOptions;
50 struct TableProperties;
51 struct ExternalSstFileInfo;
52 class WriteBatch;
53 class Env;
54 class EventListener;
55 class StatsHistoryIterator;
56 class TraceWriter;
57 #ifdef ROCKSDB_LITE
58 class CompactionJobInfo;
59 #endif
60 class FileSystem;
61 
62 extern const std::string kDefaultColumnFamilyName;
63 extern const std::string kPersistentStatsColumnFamilyName;
64 struct ColumnFamilyDescriptor {
65   std::string name;
66   ColumnFamilyOptions options;
ColumnFamilyDescriptorColumnFamilyDescriptor67   ColumnFamilyDescriptor()
68       : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
ColumnFamilyDescriptorColumnFamilyDescriptor69   ColumnFamilyDescriptor(const std::string& _name,
70                          const ColumnFamilyOptions& _options)
71       : name(_name), options(_options) {}
72 };
73 
74 class ColumnFamilyHandle {
75  public:
~ColumnFamilyHandle()76   virtual ~ColumnFamilyHandle() {}
77   // Returns the name of the column family associated with the current handle.
78   virtual const std::string& GetName() const = 0;
79   // Returns the ID of the column family associated with the current handle.
80   virtual uint32_t GetID() const = 0;
81   // Fills "*desc" with the up-to-date descriptor of the column family
82   // associated with this handle. Since it fills "*desc" with the up-to-date
83   // information, this call might internally lock and release DB mutex to
84   // access the up-to-date CF options.  In addition, all the pointer-typed
85   // options cannot be referenced any longer than the original options exist.
86   //
87   // Note that this function is not supported in RocksDBLite.
88   virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
89   // Returns the comparator of the column family associated with the
90   // current handle.
91   virtual const Comparator* GetComparator() const = 0;
92 };
93 
94 static const int kMajorVersion = __ROCKSDB_MAJOR__;
95 static const int kMinorVersion = __ROCKSDB_MINOR__;
96 
97 // A range of keys
98 struct Range {
99   Slice start;
100   Slice limit;
101 
RangeRange102   Range() {}
RangeRange103   Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
104 };
105 
106 struct RangePtr {
107   const Slice* start;
108   const Slice* limit;
109 
RangePtrRangePtr110   RangePtr() : start(nullptr), limit(nullptr) {}
RangePtrRangePtr111   RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {}
112 };
113 
114 // It is valid that files_checksums and files_checksum_func_names are both
115 // empty (no checksum information is provided for ingestion). Otherwise,
116 // their sizes should be the same as external_files. The file order should
117 // be the same in three vectors and guaranteed by the caller.
118 struct IngestExternalFileArg {
119   ColumnFamilyHandle* column_family = nullptr;
120   std::vector<std::string> external_files;
121   IngestExternalFileOptions options;
122   std::vector<std::string> files_checksums;
123   std::vector<std::string> files_checksum_func_names;
124 };
125 
126 struct GetMergeOperandsOptions {
127   int expected_max_number_of_operands = 0;
128 };
129 
130 // A collections of table properties objects, where
131 //  key: is the table's file name.
132 //  value: the table properties object of the given table.
133 typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
134     TablePropertiesCollection;
135 
136 // A DB is a persistent, versioned ordered map from keys to values.
137 // A DB is safe for concurrent access from multiple threads without
138 // any external synchronization.
139 // DB is an abstract base class with one primary implementation (DBImpl)
140 // and a number of wrapper implementations.
141 class DB {
142  public:
143   // Open the database with the specified "name".
144   // Stores a pointer to a heap-allocated database in *dbptr and returns
145   // OK on success.
146   // Stores nullptr in *dbptr and returns a non-OK status on error.
147   // Caller should delete *dbptr when it is no longer needed.
148   static Status Open(const Options& options, const std::string& name,
149                      DB** dbptr);
150 
151   // Open the database for read only. All DB interfaces
152   // that modify data, like put/delete, will return error.
153   // If the db is opened in read only mode, then no compactions
154   // will happen.
155   //
156   // Not supported in ROCKSDB_LITE, in which case the function will
157   // return Status::NotSupported.
158   static Status OpenForReadOnly(const Options& options, const std::string& name,
159                                 DB** dbptr,
160                                 bool error_if_wal_file_exists = false);
161 
162   // Open the database for read only with column families. When opening DB with
163   // read only, you can specify only a subset of column families in the
164   // database that should be opened. However, you always need to specify default
165   // column family. The default column family name is 'default' and it's stored
166   // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName
167   //
168   // Not supported in ROCKSDB_LITE, in which case the function will
169   // return Status::NotSupported.
170   static Status OpenForReadOnly(
171       const DBOptions& db_options, const std::string& name,
172       const std::vector<ColumnFamilyDescriptor>& column_families,
173       std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
174       bool error_if_wal_file_exists = false);
175 
176   // The following OpenAsSecondary functions create a secondary instance that
177   // can dynamically tail the MANIFEST of a primary that must have already been
178   // created. User can call TryCatchUpWithPrimary to make the secondary
179   // instance catch up with primary (WAL tailing is NOT supported now) whenever
180   // the user feels necessary. Column families created by the primary after the
181   // secondary instance starts are currently ignored by the secondary instance.
182   // Column families opened by secondary and dropped by the primary will be
183   // dropped by secondary as well. However the user of the secondary instance
184   // can still access the data of such dropped column family as long as they
185   // do not destroy the corresponding column family handle.
186   // WAL tailing is not supported at present, but will arrive soon.
187   //
188   // The options argument specifies the options to open the secondary instance.
189   // The name argument specifies the name of the primary db that you have used
190   // to open the primary instance.
191   // The secondary_path argument points to a directory where the secondary
192   // instance stores its info log.
193   // The dbptr is an out-arg corresponding to the opened secondary instance.
194   // The pointer points to a heap-allocated database, and the user should
195   // delete it after use.
196   // Open DB as secondary instance with only the default column family.
197   // Return OK on success, non-OK on failures.
198   static Status OpenAsSecondary(const Options& options, const std::string& name,
199                                 const std::string& secondary_path, DB** dbptr);
200 
201   // Open DB as secondary instance with column families. You can open a subset
202   // of column families in secondary mode.
203   // The db_options specify the database specific options.
204   // The name argument specifies the name of the primary db that you have used
205   // to open the primary instance.
206   // The secondary_path argument points to a directory where the secondary
207   // instance stores its info log.
208   // The column_families argument specifies a list of column families to open.
209   // If any of the column families does not exist, the function returns non-OK
210   // status.
211   // The handles is an out-arg corresponding to the opened database column
212   // family handles.
213   // The dbptr is an out-arg corresponding to the opened secondary instance.
214   // The pointer points to a heap-allocated database, and the caller should
215   // delete it after use. Before deleting the dbptr, the user should also
216   // delete the pointers stored in handles vector.
217   // Return OK on success, on-OK on failures.
218   static Status OpenAsSecondary(
219       const DBOptions& db_options, const std::string& name,
220       const std::string& secondary_path,
221       const std::vector<ColumnFamilyDescriptor>& column_families,
222       std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
223 
224   // Open DB with column families.
225   // db_options specify database specific options
226   // column_families is the vector of all column families in the database,
227   // containing column family name and options. You need to open ALL column
228   // families in the database. To get the list of column families, you can use
229   // ListColumnFamilies(). Also, you can open only a subset of column families
230   // for read-only access.
231   // The default column family name is 'default' and it's stored
232   // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName.
233   // If everything is OK, handles will on return be the same size
234   // as column_families --- handles[i] will be a handle that you
235   // will use to operate on column family column_family[i].
236   // Before delete DB, you have to close All column families by calling
237   // DestroyColumnFamilyHandle() with all the handles.
238   static Status Open(const DBOptions& db_options, const std::string& name,
239                      const std::vector<ColumnFamilyDescriptor>& column_families,
240                      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
241 
242   // Open DB and run the compaction.
243   // It's a read-only operation, the result won't be installed to the DB, it
244   // will be output to the `output_directory`. The API should only be used with
245   // `options.CompactionService` to run compaction triggered by
246   // `CompactionService`.
247   static Status OpenAndCompact(
248       const std::string& name, const std::string& output_directory,
249       const std::string& input, std::string* output,
250       const CompactionServiceOptionsOverride& override_options);
251 
Resume()252   virtual Status Resume() { return Status::NotSupported(); }
253 
254   // Close the DB by releasing resources, closing files etc. This should be
255   // called before calling the destructor so that the caller can get back a
256   // status in case there are any errors. This will not fsync the WAL files.
257   // If syncing is required, the caller must first call SyncWAL(), or Write()
258   // using an empty write batch with WriteOptions.sync=true.
259   // Regardless of the return status, the DB must be freed.
260   // If the return status is Aborted(), closing fails because there is
261   // unreleased snapshot in the system. In this case, users can release
262   // the unreleased snapshots and try again and expect it to succeed. For
263   // other status, recalling Close() will be no-op.
264   // If the return status is NotSupported(), then the DB implementation does
265   // cleanup in the destructor
Close()266   virtual Status Close() { return Status::NotSupported(); }
267 
268   // ListColumnFamilies will open the DB specified by argument name
269   // and return the list of all column families in that DB
270   // through column_families argument. The ordering of
271   // column families in column_families is unspecified.
272   static Status ListColumnFamilies(const DBOptions& db_options,
273                                    const std::string& name,
274                                    std::vector<std::string>* column_families);
275 
276   // Abstract class ctor
DB()277   DB() {}
278   // No copying allowed
279   DB(const DB&) = delete;
280   void operator=(const DB&) = delete;
281 
282   virtual ~DB();
283 
284   // Create a column_family and return the handle of column family
285   // through the argument handle.
286   virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
287                                     const std::string& column_family_name,
288                                     ColumnFamilyHandle** handle);
289 
290   // Bulk create column families with the same column family options.
291   // Return the handles of the column families through the argument handles.
292   // In case of error, the request may succeed partially, and handles will
293   // contain column family handles that it managed to create, and have size
294   // equal to the number of created column families.
295   virtual Status CreateColumnFamilies(
296       const ColumnFamilyOptions& options,
297       const std::vector<std::string>& column_family_names,
298       std::vector<ColumnFamilyHandle*>* handles);
299 
300   // Bulk create column families.
301   // Return the handles of the column families through the argument handles.
302   // In case of error, the request may succeed partially, and handles will
303   // contain column family handles that it managed to create, and have size
304   // equal to the number of created column families.
305   virtual Status CreateColumnFamilies(
306       const std::vector<ColumnFamilyDescriptor>& column_families,
307       std::vector<ColumnFamilyHandle*>* handles);
308 
309   // Drop a column family specified by column_family handle. This call
310   // only records a drop record in the manifest and prevents the column
311   // family from flushing and compacting.
312   virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
313 
314   // Bulk drop column families. This call only records drop records in the
315   // manifest and prevents the column families from flushing and compacting.
316   // In case of error, the request may succeed partially. User may call
317   // ListColumnFamilies to check the result.
318   virtual Status DropColumnFamilies(
319       const std::vector<ColumnFamilyHandle*>& column_families);
320 
321   // Close a column family specified by column_family handle and destroy
322   // the column family handle specified to avoid double deletion. This call
323   // deletes the column family handle by default. Use this method to
324   // close column family instead of deleting column family handle directly
325   virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
326 
327   // Set the database entry for "key" to "value".
328   // If "key" already exists, it will be overwritten.
329   // Returns OK on success, and a non-OK status on error.
330   // Note: consider setting options.sync = true.
331   virtual Status Put(const WriteOptions& options,
332                      ColumnFamilyHandle* column_family, const Slice& key,
333                      const Slice& value) = 0;
Put(const WriteOptions & options,const Slice & key,const Slice & value)334   virtual Status Put(const WriteOptions& options, const Slice& key,
335                      const Slice& value) {
336     return Put(options, DefaultColumnFamily(), key, value);
337   }
338 
339   // Remove the database entry (if any) for "key".  Returns OK on
340   // success, and a non-OK status on error.  It is not an error if "key"
341   // did not exist in the database.
342   // Note: consider setting options.sync = true.
343   virtual Status Delete(const WriteOptions& options,
344                         ColumnFamilyHandle* column_family,
345                         const Slice& key) = 0;
Delete(const WriteOptions & options,const Slice & key)346   virtual Status Delete(const WriteOptions& options, const Slice& key) {
347     return Delete(options, DefaultColumnFamily(), key);
348   }
349 
350   // Remove the database entry for "key". Requires that the key exists
351   // and was not overwritten. Returns OK on success, and a non-OK status
352   // on error.  It is not an error if "key" did not exist in the database.
353   //
354   // If a key is overwritten (by calling Put() multiple times), then the result
355   // of calling SingleDelete() on this key is undefined.  SingleDelete() only
356   // behaves correctly if there has been only one Put() for this key since the
357   // previous call to SingleDelete() for this key.
358   //
359   // This feature is currently an experimental performance optimization
360   // for a very specific workload.  It is up to the caller to ensure that
361   // SingleDelete is only used for a key that is not deleted using Delete() or
362   // written using Merge().  Mixing SingleDelete operations with Deletes and
363   // Merges can result in undefined behavior.
364   //
365   // Note: consider setting options.sync = true.
366   virtual Status SingleDelete(const WriteOptions& options,
367                               ColumnFamilyHandle* column_family,
368                               const Slice& key) = 0;
SingleDelete(const WriteOptions & options,const Slice & key)369   virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
370     return SingleDelete(options, DefaultColumnFamily(), key);
371   }
372 
373   // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
374   // including "begin_key" and excluding "end_key". Returns OK on success, and
375   // a non-OK status on error. It is not an error if the database does not
376   // contain any existing data in the range ["begin_key", "end_key").
377   //
378   // If "end_key" comes before "start_key" according to the user's comparator,
379   // a `Status::InvalidArgument` is returned.
380   //
381   // This feature is now usable in production, with the following caveats:
382   // 1) Accumulating many range tombstones in the memtable will degrade read
383   // performance; this can be avoided by manually flushing occasionally.
384   // 2) Limiting the maximum number of open files in the presence of range
385   // tombstones can degrade read performance. To avoid this problem, set
386   // max_open_files to -1 whenever possible.
387   virtual Status DeleteRange(const WriteOptions& options,
388                              ColumnFamilyHandle* column_family,
389                              const Slice& begin_key, const Slice& end_key);
390 
391   // Merge the database entry for "key" with "value".  Returns OK on success,
392   // and a non-OK status on error. The semantics of this operation is
393   // determined by the user provided merge_operator when opening DB.
394   // Note: consider setting options.sync = true.
395   virtual Status Merge(const WriteOptions& options,
396                        ColumnFamilyHandle* column_family, const Slice& key,
397                        const Slice& value) = 0;
Merge(const WriteOptions & options,const Slice & key,const Slice & value)398   virtual Status Merge(const WriteOptions& options, const Slice& key,
399                        const Slice& value) {
400     return Merge(options, DefaultColumnFamily(), key, value);
401   }
402 
403   // Apply the specified updates to the database.
404   // If `updates` contains no update, WAL will still be synced if
405   // options.sync=true.
406   // Returns OK on success, non-OK on failure.
407   // Note: consider setting options.sync = true.
408   virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
409 
410   // If the database contains an entry for "key" store the
411   // corresponding value in *value and return OK.
412   //
413   // If timestamp is enabled and a non-null timestamp pointer is passed in,
414   // timestamp is returned.
415   //
416   // If there is no entry for "key" leave *value unchanged and return
417   // a status for which Status::IsNotFound() returns true.
418   //
419   // May return some other Status on an error.
Get(const ReadOptions & options,ColumnFamilyHandle * column_family,const Slice & key,std::string * value)420   virtual inline Status Get(const ReadOptions& options,
421                             ColumnFamilyHandle* column_family, const Slice& key,
422                             std::string* value) {
423     assert(value != nullptr);
424     PinnableSlice pinnable_val(value);
425     assert(!pinnable_val.IsPinned());
426     auto s = Get(options, column_family, key, &pinnable_val);
427     if (s.ok() && pinnable_val.IsPinned()) {
428       value->assign(pinnable_val.data(), pinnable_val.size());
429     }  // else value is already assigned
430     return s;
431   }
432   virtual Status Get(const ReadOptions& options,
433                      ColumnFamilyHandle* column_family, const Slice& key,
434                      PinnableSlice* value) = 0;
Get(const ReadOptions & options,const Slice & key,std::string * value)435   virtual Status Get(const ReadOptions& options, const Slice& key,
436                      std::string* value) {
437     return Get(options, DefaultColumnFamily(), key, value);
438   }
439 
440   // Get() methods that return timestamp. Derived DB classes don't need to worry
441   // about this group of methods if they don't care about timestamp feature.
Get(const ReadOptions & options,ColumnFamilyHandle * column_family,const Slice & key,std::string * value,std::string * timestamp)442   virtual inline Status Get(const ReadOptions& options,
443                             ColumnFamilyHandle* column_family, const Slice& key,
444                             std::string* value, std::string* timestamp) {
445     assert(value != nullptr);
446     PinnableSlice pinnable_val(value);
447     assert(!pinnable_val.IsPinned());
448     auto s = Get(options, column_family, key, &pinnable_val, timestamp);
449     if (s.ok() && pinnable_val.IsPinned()) {
450       value->assign(pinnable_val.data(), pinnable_val.size());
451     }  // else value is already assigned
452     return s;
453   }
Get(const ReadOptions &,ColumnFamilyHandle *,const Slice &,PinnableSlice *,std::string *)454   virtual Status Get(const ReadOptions& /*options*/,
455                      ColumnFamilyHandle* /*column_family*/,
456                      const Slice& /*key*/, PinnableSlice* /*value*/,
457                      std::string* /*timestamp*/) {
458     return Status::NotSupported(
459         "Get() that returns timestamp is not implemented.");
460   }
Get(const ReadOptions & options,const Slice & key,std::string * value,std::string * timestamp)461   virtual Status Get(const ReadOptions& options, const Slice& key,
462                      std::string* value, std::string* timestamp) {
463     return Get(options, DefaultColumnFamily(), key, value, timestamp);
464   }
465 
466   // Returns all the merge operands corresponding to the key. If the
467   // number of merge operands in DB is greater than
468   // merge_operands_options.expected_max_number_of_operands
469   // no merge operands are returned and status is Incomplete. Merge operands
470   // returned are in the order of insertion.
471   // merge_operands- Points to an array of at-least
472   //             merge_operands_options.expected_max_number_of_operands and the
473   //             caller is responsible for allocating it. If the status
474   //             returned is Incomplete then number_of_operands will contain
475   //             the total number of merge operands found in DB for key.
476   virtual Status GetMergeOperands(
477       const ReadOptions& options, ColumnFamilyHandle* column_family,
478       const Slice& key, PinnableSlice* merge_operands,
479       GetMergeOperandsOptions* get_merge_operands_options,
480       int* number_of_operands) = 0;
481 
482   // Consistent Get of many keys across column families without the need
483   // for an explicit snapshot. NOTE: the implementation of this MultiGet API
484   // does not have the performance benefits of the void-returning MultiGet
485   // functions.
486   //
487   // If keys[i] does not exist in the database, then the i'th returned
488   // status will be one for which Status::IsNotFound() is true, and
489   // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
490   // the i'th returned status will have Status::ok() true, and (*values)[i]
491   // will store the value associated with keys[i].
492   //
493   // (*values) will always be resized to be the same size as (keys).
494   // Similarly, the number of returned statuses will be the number of keys.
495   // Note: keys will not be "de-duplicated". Duplicate keys will return
496   // duplicate values in order.
497   virtual std::vector<Status> MultiGet(
498       const ReadOptions& options,
499       const std::vector<ColumnFamilyHandle*>& column_family,
500       const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
MultiGet(const ReadOptions & options,const std::vector<Slice> & keys,std::vector<std::string> * values)501   virtual std::vector<Status> MultiGet(const ReadOptions& options,
502                                        const std::vector<Slice>& keys,
503                                        std::vector<std::string>* values) {
504     return MultiGet(
505         options,
506         std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
507         keys, values);
508   }
509 
MultiGet(const ReadOptions &,const std::vector<ColumnFamilyHandle * > &,const std::vector<Slice> & keys,std::vector<std::string> *,std::vector<std::string> *)510   virtual std::vector<Status> MultiGet(
511       const ReadOptions& /*options*/,
512       const std::vector<ColumnFamilyHandle*>& /*column_family*/,
513       const std::vector<Slice>& keys, std::vector<std::string>* /*values*/,
514       std::vector<std::string>* /*timestamps*/) {
515     return std::vector<Status>(
516         keys.size(), Status::NotSupported(
517                          "MultiGet() returning timestamps not implemented."));
518   }
MultiGet(const ReadOptions & options,const std::vector<Slice> & keys,std::vector<std::string> * values,std::vector<std::string> * timestamps)519   virtual std::vector<Status> MultiGet(const ReadOptions& options,
520                                        const std::vector<Slice>& keys,
521                                        std::vector<std::string>* values,
522                                        std::vector<std::string>* timestamps) {
523     return MultiGet(
524         options,
525         std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
526         keys, values, timestamps);
527   }
528 
529   // Overloaded MultiGet API that improves performance by batching operations
530   // in the read path for greater efficiency. Currently, only the block based
531   // table format with full filters are supported. Other table formats such
532   // as plain table, block based table with block based filters and
533   // partitioned indexes will still work, but will not get any performance
534   // benefits.
535   // Parameters -
536   // options - ReadOptions
537   // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
538   //                 passed to the API are restricted to a single column family
539   // num_keys - Number of keys to lookup
540   // keys - Pointer to C style array of key Slices with num_keys elements
541   // values - Pointer to C style array of PinnableSlices with num_keys elements
542   // statuses - Pointer to C style array of Status with num_keys elements
543   // sorted_input - If true, it means the input keys are already sorted by key
544   //                order, so the MultiGet() API doesn't have to sort them
545   //                again. If false, the keys will be copied and sorted
546   //                internally by the API - the input array will not be
547   //                modified
548   virtual void MultiGet(const ReadOptions& options,
549                         ColumnFamilyHandle* column_family,
550                         const size_t num_keys, const Slice* keys,
551                         PinnableSlice* values, Status* statuses,
552                         const bool /*sorted_input*/ = false) {
553     std::vector<ColumnFamilyHandle*> cf;
554     std::vector<Slice> user_keys;
555     std::vector<Status> status;
556     std::vector<std::string> vals;
557 
558     for (size_t i = 0; i < num_keys; ++i) {
559       cf.emplace_back(column_family);
560       user_keys.emplace_back(keys[i]);
561     }
562     status = MultiGet(options, cf, user_keys, &vals);
563     std::copy(status.begin(), status.end(), statuses);
564     for (auto& value : vals) {
565       values->PinSelf(value);
566       values++;
567     }
568   }
569 
570   virtual void MultiGet(const ReadOptions& options,
571                         ColumnFamilyHandle* column_family,
572                         const size_t num_keys, const Slice* keys,
573                         PinnableSlice* values, std::string* timestamps,
574                         Status* statuses, const bool /*sorted_input*/ = false) {
575     std::vector<ColumnFamilyHandle*> cf;
576     std::vector<Slice> user_keys;
577     std::vector<Status> status;
578     std::vector<std::string> vals;
579     std::vector<std::string> tss;
580 
581     for (size_t i = 0; i < num_keys; ++i) {
582       cf.emplace_back(column_family);
583       user_keys.emplace_back(keys[i]);
584     }
585     status = MultiGet(options, cf, user_keys, &vals, &tss);
586     std::copy(status.begin(), status.end(), statuses);
587     std::copy(tss.begin(), tss.end(), timestamps);
588     for (auto& value : vals) {
589       values->PinSelf(value);
590       values++;
591     }
592   }
593 
594   // Overloaded MultiGet API that improves performance by batching operations
595   // in the read path for greater efficiency. Currently, only the block based
596   // table format with full filters are supported. Other table formats such
597   // as plain table, block based table with block based filters and
598   // partitioned indexes will still work, but will not get any performance
599   // benefits.
600   // Parameters -
601   // options - ReadOptions
602   // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
603   //                 passed to the API are restricted to a single column family
604   // num_keys - Number of keys to lookup
605   // keys - Pointer to C style array of key Slices with num_keys elements
606   // values - Pointer to C style array of PinnableSlices with num_keys elements
607   // statuses - Pointer to C style array of Status with num_keys elements
608   // sorted_input - If true, it means the input keys are already sorted by key
609   //                order, so the MultiGet() API doesn't have to sort them
610   //                again. If false, the keys will be copied and sorted
611   //                internally by the API - the input array will not be
612   //                modified
613   virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
614                         ColumnFamilyHandle** column_families, const Slice* keys,
615                         PinnableSlice* values, Status* statuses,
616                         const bool /*sorted_input*/ = false) {
617     std::vector<ColumnFamilyHandle*> cf;
618     std::vector<Slice> user_keys;
619     std::vector<Status> status;
620     std::vector<std::string> vals;
621 
622     for (size_t i = 0; i < num_keys; ++i) {
623       cf.emplace_back(column_families[i]);
624       user_keys.emplace_back(keys[i]);
625     }
626     status = MultiGet(options, cf, user_keys, &vals);
627     std::copy(status.begin(), status.end(), statuses);
628     for (auto& value : vals) {
629       values->PinSelf(value);
630       values++;
631     }
632   }
633   virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
634                         ColumnFamilyHandle** column_families, const Slice* keys,
635                         PinnableSlice* values, std::string* timestamps,
636                         Status* statuses, const bool /*sorted_input*/ = false) {
637     std::vector<ColumnFamilyHandle*> cf;
638     std::vector<Slice> user_keys;
639     std::vector<Status> status;
640     std::vector<std::string> vals;
641     std::vector<std::string> tss;
642 
643     for (size_t i = 0; i < num_keys; ++i) {
644       cf.emplace_back(column_families[i]);
645       user_keys.emplace_back(keys[i]);
646     }
647     status = MultiGet(options, cf, user_keys, &vals, &tss);
648     std::copy(status.begin(), status.end(), statuses);
649     std::copy(tss.begin(), tss.end(), timestamps);
650     for (auto& value : vals) {
651       values->PinSelf(value);
652       values++;
653     }
654   }
655 
656   // If the key definitely does not exist in the database, then this method
657   // returns false, else true. If the caller wants to obtain value when the key
658   // is found in memory, a bool for 'value_found' must be passed. 'value_found'
659   // will be true on return if value has been set properly.
660   // This check is potentially lighter-weight than invoking DB::Get(). One way
661   // to make this lighter weight is to avoid doing any IOs.
662   // Default implementation here returns true and sets 'value_found' to false
663   virtual bool KeyMayExist(const ReadOptions& /*options*/,
664                            ColumnFamilyHandle* /*column_family*/,
665                            const Slice& /*key*/, std::string* /*value*/,
666                            std::string* /*timestamp*/,
667                            bool* value_found = nullptr) {
668     if (value_found != nullptr) {
669       *value_found = false;
670     }
671     return true;
672   }
673 
674   virtual bool KeyMayExist(const ReadOptions& options,
675                            ColumnFamilyHandle* column_family, const Slice& key,
676                            std::string* value, bool* value_found = nullptr) {
677     return KeyMayExist(options, column_family, key, value,
678                        /*timestamp=*/nullptr, value_found);
679   }
680 
681   virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
682                            std::string* value, bool* value_found = nullptr) {
683     return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
684   }
685 
686   virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
687                            std::string* value, std::string* timestamp,
688                            bool* value_found = nullptr) {
689     return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp,
690                        value_found);
691   }
692 
693   // Return a heap-allocated iterator over the contents of the database.
694   // The result of NewIterator() is initially invalid (caller must
695   // call one of the Seek methods on the iterator before using it).
696   //
697   // Caller should delete the iterator when it is no longer needed.
698   // The returned iterator should be deleted before this db is deleted.
699   virtual Iterator* NewIterator(const ReadOptions& options,
700                                 ColumnFamilyHandle* column_family) = 0;
NewIterator(const ReadOptions & options)701   virtual Iterator* NewIterator(const ReadOptions& options) {
702     return NewIterator(options, DefaultColumnFamily());
703   }
704   // Returns iterators from a consistent database state across multiple
705   // column families. Iterators are heap allocated and need to be deleted
706   // before the db is deleted
707   virtual Status NewIterators(
708       const ReadOptions& options,
709       const std::vector<ColumnFamilyHandle*>& column_families,
710       std::vector<Iterator*>* iterators) = 0;
711 
712   // Return a handle to the current DB state.  Iterators created with
713   // this handle will all observe a stable snapshot of the current DB
714   // state.  The caller must call ReleaseSnapshot(result) when the
715   // snapshot is no longer needed.
716   //
717   // nullptr will be returned if the DB fails to take a snapshot or does
718   // not support snapshot.
719   virtual const Snapshot* GetSnapshot() = 0;
720 
721   // Release a previously acquired snapshot.  The caller must not
722   // use "snapshot" after this call.
723   virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
724 
725 #ifndef ROCKSDB_LITE
726   // Contains all valid property arguments for GetProperty() or
727   // GetMapProperty(). Each is a "string" property for retrieval with
728   // GetProperty() unless noted as a "map" property, for GetMapProperty().
729   //
730   // NOTE: Property names cannot end in numbers since those are interpreted as
731   //       arguments, e.g., see kNumFilesAtLevelPrefix.
732   struct Properties {
733     //  "rocksdb.num-files-at-level<N>" - returns string containing the number
734     //      of files at level <N>, where <N> is an ASCII representation of a
735     //      level number (e.g., "0").
736     static const std::string kNumFilesAtLevelPrefix;
737 
738     //  "rocksdb.compression-ratio-at-level<N>" - returns string containing the
739     //      compression ratio of data at level <N>, where <N> is an ASCII
740     //      representation of a level number (e.g., "0"). Here, compression
741     //      ratio is defined as uncompressed data size / compressed file size.
742     //      Returns "-1.0" if no open files at level <N>.
743     static const std::string kCompressionRatioAtLevelPrefix;
744 
745     //  "rocksdb.stats" - returns a multi-line string containing the data
746     //      described by kCFStats followed by the data described by kDBStats.
747     static const std::string kStats;
748 
749     //  "rocksdb.sstables" - returns a multi-line string summarizing current
750     //      SST files.
751     static const std::string kSSTables;
752 
753     //  "rocksdb.cfstats" - Raw data from "rocksdb.cfstats-no-file-histogram"
754     //      and "rocksdb.cf-file-histogram" as a "map" property.
755     static const std::string kCFStats;
756 
757     //  "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
758     //      general column family stats per-level over db's lifetime ("L<n>"),
759     //      aggregated over db's lifetime ("Sum"), and aggregated over the
760     //      interval since the last retrieval ("Int").
761     static const std::string kCFStatsNoFileHistogram;
762 
763     //  "rocksdb.cf-file-histogram" - print out how many file reads to every
764     //      level, as well as the histogram of latency of single requests.
765     static const std::string kCFFileHistogram;
766 
767     //  "rocksdb.dbstats" - returns a multi-line string with general database
768     //      stats, both cumulative (over the db's lifetime) and interval (since
769     //      the last retrieval of kDBStats).
770     static const std::string kDBStats;
771 
772     //  "rocksdb.levelstats" - returns multi-line string containing the number
773     //      of files per level and total size of each level (MB).
774     static const std::string kLevelStats;
775 
776     //  "rocksdb.block-cache-entry-stats" - returns a multi-line string or
777     //      map with statistics on block cache usage.
778     static const std::string kBlockCacheEntryStats;
779 
780     //  "rocksdb.num-immutable-mem-table" - returns number of immutable
781     //      memtables that have not yet been flushed.
782     static const std::string kNumImmutableMemTable;
783 
784     //  "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
785     //      memtables that have already been flushed.
786     static const std::string kNumImmutableMemTableFlushed;
787 
788     //  "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
789     //      pending; otherwise, returns 0.
790     static const std::string kMemTableFlushPending;
791 
792     //  "rocksdb.num-running-flushes" - returns the number of currently running
793     //      flushes.
794     static const std::string kNumRunningFlushes;
795 
796     //  "rocksdb.compaction-pending" - returns 1 if at least one compaction is
797     //      pending; otherwise, returns 0.
798     static const std::string kCompactionPending;
799 
800     //  "rocksdb.num-running-compactions" - returns the number of currently
801     //      running compactions.
802     static const std::string kNumRunningCompactions;
803 
804     //  "rocksdb.background-errors" - returns accumulated number of background
805     //      errors.
806     static const std::string kBackgroundErrors;
807 
808     //  "rocksdb.cur-size-active-mem-table" - returns approximate size of active
809     //      memtable (bytes).
810     static const std::string kCurSizeActiveMemTable;
811 
812     //  "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
813     //      and unflushed immutable memtables (bytes).
814     static const std::string kCurSizeAllMemTables;
815 
816     //  "rocksdb.size-all-mem-tables" - returns approximate size of active,
817     //      unflushed immutable, and pinned immutable memtables (bytes).
818     static const std::string kSizeAllMemTables;
819 
820     //  "rocksdb.num-entries-active-mem-table" - returns total number of entries
821     //      in the active memtable.
822     static const std::string kNumEntriesActiveMemTable;
823 
824     //  "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
825     //      in the unflushed immutable memtables.
826     static const std::string kNumEntriesImmMemTables;
827 
828     //  "rocksdb.num-deletes-active-mem-table" - returns total number of delete
829     //      entries in the active memtable.
830     static const std::string kNumDeletesActiveMemTable;
831 
832     //  "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
833     //      entries in the unflushed immutable memtables.
834     static const std::string kNumDeletesImmMemTables;
835 
836     //  "rocksdb.estimate-num-keys" - returns estimated number of total keys in
837     //      the active and unflushed immutable memtables and storage.
838     static const std::string kEstimateNumKeys;
839 
840     //  "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
841     //      reading SST tables, excluding memory used in block cache (e.g.,
842     //      filter and index blocks).
843     static const std::string kEstimateTableReadersMem;
844 
845     //  "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
846     //      files is enabled; otherwise, returns a non-zero number.
847     static const std::string kIsFileDeletionsEnabled;
848 
849     //  "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
850     //      database.
851     static const std::string kNumSnapshots;
852 
853     //  "rocksdb.oldest-snapshot-time" - returns number representing unix
854     //      timestamp of oldest unreleased snapshot.
855     static const std::string kOldestSnapshotTime;
856 
857     //  "rocksdb.oldest-snapshot-sequence" - returns number representing
858     //      sequence number of oldest unreleased snapshot.
859     static const std::string kOldestSnapshotSequence;
860 
861     //  "rocksdb.num-live-versions" - returns number of live versions. `Version`
862     //      is an internal data structure. See version_set.h for details. More
863     //      live versions often mean more SST files are held from being deleted,
864     //      by iterators or unfinished compactions.
865     static const std::string kNumLiveVersions;
866 
867     //  "rocksdb.current-super-version-number" - returns number of current LSM
868     //  version. It is a uint64_t integer number, incremented after there is
869     //  any change to the LSM tree. The number is not preserved after restarting
870     //  the DB. After DB restart, it will start from 0 again.
871     static const std::string kCurrentSuperVersionNumber;
872 
873     //  "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
874     //      live data in bytes.
875     static const std::string kEstimateLiveDataSize;
876 
877     //  "rocksdb.min-log-number-to-keep" - return the minimum log number of the
878     //      log files that should be kept.
879     static const std::string kMinLogNumberToKeep;
880 
881     //  "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
882     //      number for an obsolete SST to be kept. The max value of `uint64_t`
883     //      will be returned if all obsolete files can be deleted.
884     static const std::string kMinObsoleteSstNumberToKeep;
885 
886     //  "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
887     //      files.
888     //  WARNING: may slow down online queries if there are too many files.
889     static const std::string kTotalSstFilesSize;
890 
891     //  "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
892     //      files belong to the latest LSM tree.
893     static const std::string kLiveSstFilesSize;
894 
895     //  "rocksdb.base-level" - returns number of level to which L0 data will be
896     //      compacted.
897     static const std::string kBaseLevel;
898 
899     //  "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
900     //      number of bytes compaction needs to rewrite to get all levels down
901     //      to under target size. Not valid for other compactions than level-
902     //      based.
903     static const std::string kEstimatePendingCompactionBytes;
904 
905     //  "rocksdb.aggregated-table-properties" - returns a string or map
906     //      representation of the aggregated table properties of the target
907     //      column family. Only properties that make sense for aggregation
908     //      are included.
909     static const std::string kAggregatedTableProperties;
910 
911     //  "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
912     //      one but only returns the aggregated table properties of the
913     //      specified level "N" at the target column family.
914     static const std::string kAggregatedTablePropertiesAtLevel;
915 
916     //  "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
917     //      write rate. 0 means no delay.
918     static const std::string kActualDelayedWriteRate;
919 
920     //  "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
921     static const std::string kIsWriteStopped;
922 
923     //  "rocksdb.estimate-oldest-key-time" - returns an estimation of
924     //      oldest key timestamp in the DB. Currently only available for
925     //      FIFO compaction with
926     //      compaction_options_fifo.allow_compaction = false.
927     static const std::string kEstimateOldestKeyTime;
928 
929     //  "rocksdb.block-cache-capacity" - returns block cache capacity.
930     static const std::string kBlockCacheCapacity;
931 
932     //  "rocksdb.block-cache-usage" - returns the memory size for the entries
933     //      residing in block cache.
934     static const std::string kBlockCacheUsage;
935 
936     // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
937     //      entries being pinned.
938     static const std::string kBlockCachePinnedUsage;
939 
940     // "rocksdb.options-statistics" - returns multi-line string
941     //      of options.statistics
942     static const std::string kOptionsStatistics;
943   };
944 #endif /* ROCKSDB_LITE */
945 
946   // DB implementations export properties about their state via this method.
947   // If "property" is a valid "string" property understood by this DB
948   // implementation (see Properties struct above for valid options), fills
949   // "*value" with its current value and returns true.  Otherwise, returns
950   // false.
951   virtual bool GetProperty(ColumnFamilyHandle* column_family,
952                            const Slice& property, std::string* value) = 0;
GetProperty(const Slice & property,std::string * value)953   virtual bool GetProperty(const Slice& property, std::string* value) {
954     return GetProperty(DefaultColumnFamily(), property, value);
955   }
956 
957   // Like GetProperty but for valid "map" properties. (Some properties can be
958   // accessed as either "string" properties or "map" properties.)
959   virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
960                               const Slice& property,
961                               std::map<std::string, std::string>* value) = 0;
GetMapProperty(const Slice & property,std::map<std::string,std::string> * value)962   virtual bool GetMapProperty(const Slice& property,
963                               std::map<std::string, std::string>* value) {
964     return GetMapProperty(DefaultColumnFamily(), property, value);
965   }
966 
967   // Similar to GetProperty(), but only works for a subset of properties whose
968   // return value is an integer. Return the value by integer. Supported
969   // properties:
970   //  "rocksdb.num-immutable-mem-table"
971   //  "rocksdb.mem-table-flush-pending"
972   //  "rocksdb.compaction-pending"
973   //  "rocksdb.background-errors"
974   //  "rocksdb.cur-size-active-mem-table"
975   //  "rocksdb.cur-size-all-mem-tables"
976   //  "rocksdb.size-all-mem-tables"
977   //  "rocksdb.num-entries-active-mem-table"
978   //  "rocksdb.num-entries-imm-mem-tables"
979   //  "rocksdb.num-deletes-active-mem-table"
980   //  "rocksdb.num-deletes-imm-mem-tables"
981   //  "rocksdb.estimate-num-keys"
982   //  "rocksdb.estimate-table-readers-mem"
983   //  "rocksdb.is-file-deletions-enabled"
984   //  "rocksdb.num-snapshots"
985   //  "rocksdb.oldest-snapshot-time"
986   //  "rocksdb.num-live-versions"
987   //  "rocksdb.current-super-version-number"
988   //  "rocksdb.estimate-live-data-size"
989   //  "rocksdb.min-log-number-to-keep"
990   //  "rocksdb.min-obsolete-sst-number-to-keep"
991   //  "rocksdb.total-sst-files-size"
992   //  "rocksdb.live-sst-files-size"
993   //  "rocksdb.base-level"
994   //  "rocksdb.estimate-pending-compaction-bytes"
995   //  "rocksdb.num-running-compactions"
996   //  "rocksdb.num-running-flushes"
997   //  "rocksdb.actual-delayed-write-rate"
998   //  "rocksdb.is-write-stopped"
999   //  "rocksdb.estimate-oldest-key-time"
1000   //  "rocksdb.block-cache-capacity"
1001   //  "rocksdb.block-cache-usage"
1002   //  "rocksdb.block-cache-pinned-usage"
1003   virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
1004                               const Slice& property, uint64_t* value) = 0;
GetIntProperty(const Slice & property,uint64_t * value)1005   virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
1006     return GetIntProperty(DefaultColumnFamily(), property, value);
1007   }
1008 
1009   // Reset internal stats for DB and all column families.
1010   // Note this doesn't reset options.statistics as it is not owned by
1011   // DB.
ResetStats()1012   virtual Status ResetStats() {
1013     return Status::NotSupported("Not implemented");
1014   }
1015 
1016   // Same as GetIntProperty(), but this one returns the aggregated int
1017   // property from all column families.
1018   virtual bool GetAggregatedIntProperty(const Slice& property,
1019                                         uint64_t* value) = 0;
1020 
1021   // Flags for DB::GetSizeApproximation that specify whether memtable
1022   // stats should be included, or file stats approximation or both
1023   enum SizeApproximationFlags : uint8_t {
1024     NONE = 0,
1025     INCLUDE_MEMTABLES = 1 << 0,
1026     INCLUDE_FILES = 1 << 1
1027   };
1028 
1029   // For each i in [0,n-1], store in "sizes[i]", the approximate
1030   // file system space used by keys in "[range[i].start .. range[i].limit)"
1031   // in a single column family.
1032   //
1033   // Note that the returned sizes measure file system space usage, so
1034   // if the user data compresses by a factor of ten, the returned
1035   // sizes will be one-tenth the size of the corresponding user data size.
1036   virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
1037                                      ColumnFamilyHandle* column_family,
1038                                      const Range* ranges, int n,
1039                                      uint64_t* sizes) = 0;
1040 
1041   // Simpler versions of the GetApproximateSizes() method above.
1042   // The include_flags argument must of type DB::SizeApproximationFlags
1043   // and can not be NONE.
1044   virtual Status GetApproximateSizes(ColumnFamilyHandle* column_family,
1045                                      const Range* ranges, int n,
1046                                      uint64_t* sizes,
1047                                      uint8_t include_flags = INCLUDE_FILES) {
1048     SizeApproximationOptions options;
1049     options.include_memtabtles =
1050         (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0;
1051     options.include_files =
1052         (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0;
1053     return GetApproximateSizes(options, column_family, ranges, n, sizes);
1054   }
1055   virtual Status GetApproximateSizes(const Range* ranges, int n,
1056                                      uint64_t* sizes,
1057                                      uint8_t include_flags = INCLUDE_FILES) {
1058     return GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes,
1059                                include_flags);
1060   }
1061 
1062   // The method is similar to GetApproximateSizes, except it
1063   // returns approximate number of records in memtables.
1064   virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
1065                                            const Range& range,
1066                                            uint64_t* const count,
1067                                            uint64_t* const size) = 0;
GetApproximateMemTableStats(const Range & range,uint64_t * const count,uint64_t * const size)1068   virtual void GetApproximateMemTableStats(const Range& range,
1069                                            uint64_t* const count,
1070                                            uint64_t* const size) {
1071     GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
1072   }
1073 
1074   // Deprecated versions of GetApproximateSizes
GetApproximateSizes(const Range * range,int n,uint64_t * sizes,bool include_memtable)1075   ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
1076       const Range* range, int n, uint64_t* sizes, bool include_memtable) {
1077     uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
1078     if (include_memtable) {
1079       include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
1080     }
1081     GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
1082   }
GetApproximateSizes(ColumnFamilyHandle * column_family,const Range * range,int n,uint64_t * sizes,bool include_memtable)1083   ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
1084       ColumnFamilyHandle* column_family, const Range* range, int n,
1085       uint64_t* sizes, bool include_memtable) {
1086     uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
1087     if (include_memtable) {
1088       include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
1089     }
1090     GetApproximateSizes(column_family, range, n, sizes, include_flags);
1091   }
1092 
1093   // Compact the underlying storage for the key range [*begin,*end].
1094   // The actual compaction interval might be superset of [*begin, *end].
1095   // In particular, deleted and overwritten versions are discarded,
1096   // and the data is rearranged to reduce the cost of operations
1097   // needed to access the data.  This operation should typically only
1098   // be invoked by users who understand the underlying implementation.
1099   //
1100   // begin==nullptr is treated as a key before all keys in the database.
1101   // end==nullptr is treated as a key after all keys in the database.
1102   // Therefore the following call will compact the entire database:
1103   //    db->CompactRange(options, nullptr, nullptr);
1104   // Note that after the entire database is compacted, all data are pushed
1105   // down to the last level containing any data. If the total data size after
1106   // compaction is reduced, that level might not be appropriate for hosting all
1107   // the files. In this case, client could set options.change_level to true, to
1108   // move the files back to the minimum level capable of holding the data set
1109   // or a given level (specified by non-negative options.target_level).
1110   virtual Status CompactRange(const CompactRangeOptions& options,
1111                               ColumnFamilyHandle* column_family,
1112                               const Slice* begin, const Slice* end) = 0;
CompactRange(const CompactRangeOptions & options,const Slice * begin,const Slice * end)1113   virtual Status CompactRange(const CompactRangeOptions& options,
1114                               const Slice* begin, const Slice* end) {
1115     return CompactRange(options, DefaultColumnFamily(), begin, end);
1116   }
1117 
1118   ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
1119       ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end,
1120       bool change_level = false, int target_level = -1,
1121       uint32_t target_path_id = 0) {
1122     CompactRangeOptions options;
1123     options.change_level = change_level;
1124     options.target_level = target_level;
1125     options.target_path_id = target_path_id;
1126     return CompactRange(options, column_family, begin, end);
1127   }
1128 
1129   ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
1130       const Slice* begin, const Slice* end, bool change_level = false,
1131       int target_level = -1, uint32_t target_path_id = 0) {
1132     CompactRangeOptions options;
1133     options.change_level = change_level;
1134     options.target_level = target_level;
1135     options.target_path_id = target_path_id;
1136     return CompactRange(options, DefaultColumnFamily(), begin, end);
1137   }
1138 
SetOptions(ColumnFamilyHandle *,const std::unordered_map<std::string,std::string> &)1139   virtual Status SetOptions(
1140       ColumnFamilyHandle* /*column_family*/,
1141       const std::unordered_map<std::string, std::string>& /*new_options*/) {
1142     return Status::NotSupported("Not implemented");
1143   }
SetOptions(const std::unordered_map<std::string,std::string> & new_options)1144   virtual Status SetOptions(
1145       const std::unordered_map<std::string, std::string>& new_options) {
1146     return SetOptions(DefaultColumnFamily(), new_options);
1147   }
1148 
1149   virtual Status SetDBOptions(
1150       const std::unordered_map<std::string, std::string>& new_options) = 0;
1151 
1152   // CompactFiles() inputs a list of files specified by file numbers and
1153   // compacts them to the specified level. Note that the behavior is different
1154   // from CompactRange() in that CompactFiles() performs the compaction job
1155   // using the CURRENT thread.
1156   //
1157   // @see GetDataBaseMetaData
1158   // @see GetColumnFamilyMetaData
1159   virtual Status CompactFiles(
1160       const CompactionOptions& compact_options,
1161       ColumnFamilyHandle* column_family,
1162       const std::vector<std::string>& input_file_names, const int output_level,
1163       const int output_path_id = -1,
1164       std::vector<std::string>* const output_file_names = nullptr,
1165       CompactionJobInfo* compaction_job_info = nullptr) = 0;
1166 
1167   virtual Status CompactFiles(
1168       const CompactionOptions& compact_options,
1169       const std::vector<std::string>& input_file_names, const int output_level,
1170       const int output_path_id = -1,
1171       std::vector<std::string>* const output_file_names = nullptr,
1172       CompactionJobInfo* compaction_job_info = nullptr) {
1173     return CompactFiles(compact_options, DefaultColumnFamily(),
1174                         input_file_names, output_level, output_path_id,
1175                         output_file_names, compaction_job_info);
1176   }
1177 
1178   // This function will wait until all currently running background processes
1179   // finish. After it returns, no background process will be run until
1180   // ContinueBackgroundWork is called, once for each preceding OK-returning
1181   // call to PauseBackgroundWork.
1182   virtual Status PauseBackgroundWork() = 0;
1183   virtual Status ContinueBackgroundWork() = 0;
1184 
1185   // This function will enable automatic compactions for the given column
1186   // families if they were previously disabled. The function will first set the
1187   // disable_auto_compactions option for each column family to 'false', after
1188   // which it will schedule a flush/compaction.
1189   //
1190   // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
1191   // does NOT schedule a flush/compaction afterwards, and only changes the
1192   // parameter itself within the column family option.
1193   //
1194   virtual Status EnableAutoCompaction(
1195       const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
1196 
1197   // After this function call, CompactRange() or CompactFiles() will not
1198   // run compactions and fail. The function will wait for all outstanding
1199   // manual compactions to finish before returning
1200   virtual void DisableManualCompaction() = 0;
1201   // Re-enable CompactRange() and ComapctFiles() that are disabled by
1202   // DisableManualCompaction(). In debug mode, it might hit assertion if
1203   // no DisableManualCompaction() was previously called.
1204   virtual void EnableManualCompaction() = 0;
1205 
1206   // Number of levels used for this DB.
1207   virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
NumberLevels()1208   virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
1209 
1210   // Maximum level to which a new compacted memtable is pushed if it
1211   // does not create overlap.
1212   virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
MaxMemCompactionLevel()1213   virtual int MaxMemCompactionLevel() {
1214     return MaxMemCompactionLevel(DefaultColumnFamily());
1215   }
1216 
1217   // Number of files in level-0 that would stop writes.
1218   virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
Level0StopWriteTrigger()1219   virtual int Level0StopWriteTrigger() {
1220     return Level0StopWriteTrigger(DefaultColumnFamily());
1221   }
1222 
1223   // Get DB name -- the exact same name that was provided as an argument to
1224   // DB::Open()
1225   virtual const std::string& GetName() const = 0;
1226 
1227   // Get Env object from the DB
1228   virtual Env* GetEnv() const = 0;
1229 
1230   virtual FileSystem* GetFileSystem() const;
1231 
1232   // Get DB Options that we use.  During the process of opening the
1233   // column family, the options provided when calling DB::Open() or
1234   // DB::CreateColumnFamily() will have been "sanitized" and transformed
1235   // in an implementation-defined manner.
1236   virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
GetOptions()1237   virtual Options GetOptions() const {
1238     return GetOptions(DefaultColumnFamily());
1239   }
1240 
1241   virtual DBOptions GetDBOptions() const = 0;
1242 
1243   // Flush all mem-table data.
1244   // Flush a single column family, even when atomic flush is enabled. To flush
1245   // multiple column families, use Flush(options, column_families).
1246   virtual Status Flush(const FlushOptions& options,
1247                        ColumnFamilyHandle* column_family) = 0;
Flush(const FlushOptions & options)1248   virtual Status Flush(const FlushOptions& options) {
1249     return Flush(options, DefaultColumnFamily());
1250   }
1251   // Flushes multiple column families.
1252   // If atomic flush is not enabled, Flush(options, column_families) is
1253   // equivalent to calling Flush(options, column_family) multiple times.
1254   // If atomic flush is enabled, Flush(options, column_families) will flush all
1255   // column families specified in 'column_families' up to the latest sequence
1256   // number at the time when flush is requested.
1257   // Note that RocksDB 5.15 and earlier may not be able to open later versions
1258   // with atomic flush enabled.
1259   virtual Status Flush(
1260       const FlushOptions& options,
1261       const std::vector<ColumnFamilyHandle*>& column_families) = 0;
1262 
1263   // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL
1264   // afterwards.
FlushWAL(bool)1265   virtual Status FlushWAL(bool /*sync*/) {
1266     return Status::NotSupported("FlushWAL not implemented");
1267   }
1268   // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
1269   // same as Write() with sync=true: in the latter case the changes won't be
1270   // visible until the sync is done.
1271   // Currently only works if allow_mmap_writes = false in Options.
1272   virtual Status SyncWAL() = 0;
1273 
1274   // Lock the WAL. Also flushes the WAL after locking.
LockWAL()1275   virtual Status LockWAL() {
1276     return Status::NotSupported("LockWAL not implemented");
1277   }
1278 
1279   // Unlock the WAL.
UnlockWAL()1280   virtual Status UnlockWAL() {
1281     return Status::NotSupported("UnlockWAL not implemented");
1282   }
1283 
1284   // The sequence number of the most recent transaction.
1285   virtual SequenceNumber GetLatestSequenceNumber() const = 0;
1286 
1287   // Instructs DB to preserve deletes with sequence numbers >= passed seqnum.
1288   // Has no effect if DBOptions.preserve_deletes is set to false.
1289   // This function assumes that user calls this function with monotonically
1290   // increasing seqnums (otherwise we can't guarantee that a particular delete
1291   // hasn't been already processed); returns true if the value was successfully
1292   // updated, false if user attempted to call if with seqnum <= current value.
1293   virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0;
1294 
1295   // Prevent file deletions. Compactions will continue to occur,
1296   // but no obsolete files will be deleted. Calling this multiple
1297   // times have the same effect as calling it once.
1298   virtual Status DisableFileDeletions() = 0;
1299 
1300   // Allow compactions to delete obsolete files.
1301   // If force == true, the call to EnableFileDeletions() will guarantee that
1302   // file deletions are enabled after the call, even if DisableFileDeletions()
1303   // was called multiple times before.
1304   // If force == false, EnableFileDeletions will only enable file deletion
1305   // after it's been called at least as many times as DisableFileDeletions(),
1306   // enabling the two methods to be called by two threads concurrently without
1307   // synchronization -- i.e., file deletions will be enabled only after both
1308   // threads call EnableFileDeletions()
1309   virtual Status EnableFileDeletions(bool force = true) = 0;
1310 
1311 #ifndef ROCKSDB_LITE
1312   // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
1313 
1314   // Retrieve the list of all files in the database. The files are
1315   // relative to the dbname and are not absolute paths. Despite being relative
1316   // paths, the file names begin with "/". The valid size of the manifest file
1317   // is returned in manifest_file_size. The manifest file is an ever growing
1318   // file, but only the portion specified by manifest_file_size is valid for
1319   // this snapshot. Setting flush_memtable to true does Flush before recording
1320   // the live files. Setting flush_memtable to false is useful when we don't
1321   // want to wait for flush which may have to wait for compaction to complete
1322   // taking an indeterminate time.
1323   //
1324   // In case you have multiple column families, even if flush_memtable is true,
1325   // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
1326   // for new data that arrived to already-flushed column families while other
1327   // column families were flushing
1328   virtual Status GetLiveFiles(std::vector<std::string>&,
1329                               uint64_t* manifest_file_size,
1330                               bool flush_memtable = true) = 0;
1331 
1332   // Retrieve the sorted list of all wal files with earliest file first
1333   virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
1334 
1335   // Retrieve information about the current wal file
1336   //
1337   // Note that the log might have rolled after this call in which case
1338   // the current_log_file would not point to the current log file.
1339   //
1340   // Additionally, for the sake of optimization current_log_file->StartSequence
1341   // would always be set to 0
1342   virtual Status GetCurrentWalFile(
1343       std::unique_ptr<LogFile>* current_log_file) = 0;
1344 
1345   // Retrieves the creation time of the oldest file in the DB.
1346   // This API only works if max_open_files = -1, if it is not then
1347   // Status returned is Status::NotSupported()
1348   // The file creation time is set using the env provided to the DB.
1349   // If the DB was created from a very old release then its possible that
1350   // the SST files might not have file_creation_time property and even after
1351   // moving to a newer release its possible that some files never got compacted
1352   // and may not have file_creation_time property. In both the cases
1353   // file_creation_time is considered 0 which means this API will return
1354   // creation_time = 0 as there wouldn't be a timestamp lower than 0.
1355   virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0;
1356 
1357   // Note: this API is not yet consistent with WritePrepared transactions.
1358   // Sets iter to an iterator that is positioned at a write-batch containing
1359   // seq_number. If the sequence number is non existent, it returns an iterator
1360   // at the first available seq_no after the requested seq_no
1361   // Returns Status::OK if iterator is valid
1362   // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
1363   // use this api, else the WAL files will get
1364   // cleared aggressively and the iterator might keep getting invalid before
1365   // an update is read.
1366   virtual Status GetUpdatesSince(
1367       SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
1368       const TransactionLogIterator::ReadOptions& read_options =
1369           TransactionLogIterator::ReadOptions()) = 0;
1370 
1371 // Windows API macro interference
1372 #undef DeleteFile
1373   // WARNING: This API is planned for removal in RocksDB 7.0 since it does not
1374   // operate at the proper level of abstraction for a key-value store, and its
1375   // contract/restrictions are poorly documented. For example, it returns non-OK
1376   // `Status` for non-bottommost files and files undergoing compaction. Since we
1377   // do not plan to maintain it, the contract will likely remain underspecified
1378   // until its removal. Any user is encouraged to read the implementation
1379   // carefully and migrate away from it when possible.
1380   //
1381   // Delete the file name from the db directory and update the internal state to
1382   // reflect that. Supports deletion of sst and log files only. 'name' must be
1383   // path relative to the db directory. eg. 000001.sst, /archive/000003.log
1384   virtual Status DeleteFile(std::string name) = 0;
1385 
1386   // Returns a list of all table files with their level, start key
1387   // and end key
GetLiveFilesMetaData(std::vector<LiveFileMetaData> *)1388   virtual void GetLiveFilesMetaData(
1389       std::vector<LiveFileMetaData>* /*metadata*/) {}
1390 
1391   // Return a list of all table and blob files checksum info.
1392   // Note: This function might be of limited use because it cannot be
1393   // synchronized with GetLiveFiles.
1394   virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
1395 
1396   // Obtains the meta data of the specified column family of the DB.
GetColumnFamilyMetaData(ColumnFamilyHandle *,ColumnFamilyMetaData *)1397   virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
1398                                        ColumnFamilyMetaData* /*metadata*/) {}
1399 
1400   // Get the metadata of the default column family.
GetColumnFamilyMetaData(ColumnFamilyMetaData * metadata)1401   void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) {
1402     GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
1403   }
1404 
1405   // IngestExternalFile() will load a list of external SST files (1) into the DB
1406   // Two primary modes are supported:
1407   // - Duplicate keys in the new files will overwrite exiting keys (default)
1408   // - Duplicate keys will be skipped (set ingest_behind=true)
1409   // In the first mode we will try to find the lowest possible level that
1410   // the file can fit in, and ingest the file into this level (2). A file that
1411   // have a key range that overlap with the memtable key range will require us
1412   // to Flush the memtable first before ingesting the file.
1413   // In the second mode we will always ingest in the bottom most level (see
1414   // docs to IngestExternalFileOptions::ingest_behind).
1415   //
1416   // (1) External SST files can be created using SstFileWriter
1417   // (2) We will try to ingest the files to the lowest possible level
1418   //     even if the file compression doesn't match the level compression
1419   // (3) If IngestExternalFileOptions->ingest_behind is set to true,
1420   //     we always ingest at the bottommost level, which should be reserved
1421   //     for this purpose (see DBOPtions::allow_ingest_behind flag).
1422   virtual Status IngestExternalFile(
1423       ColumnFamilyHandle* column_family,
1424       const std::vector<std::string>& external_files,
1425       const IngestExternalFileOptions& options) = 0;
1426 
IngestExternalFile(const std::vector<std::string> & external_files,const IngestExternalFileOptions & options)1427   virtual Status IngestExternalFile(
1428       const std::vector<std::string>& external_files,
1429       const IngestExternalFileOptions& options) {
1430     return IngestExternalFile(DefaultColumnFamily(), external_files, options);
1431   }
1432 
1433   // IngestExternalFiles() will ingest files for multiple column families, and
1434   // record the result atomically to the MANIFEST.
1435   // If this function returns OK, all column families' ingestion must succeed.
1436   // If this function returns NOK, or the process crashes, then non-of the
1437   // files will be ingested into the database after recovery.
1438   // Note that it is possible for application to observe a mixed state during
1439   // the execution of this function. If the user performs range scan over the
1440   // column families with iterators, iterator on one column family may return
1441   // ingested data, while iterator on other column family returns old data.
1442   // Users can use snapshot for a consistent view of data.
1443   // If your db ingests multiple SST files using this API, i.e. args.size()
1444   // > 1, then RocksDB 5.15 and earlier will not be able to open it.
1445   //
1446   // REQUIRES: each arg corresponds to a different column family: namely, for
1447   // 0 <= i < j < len(args), args[i].column_family != args[j].column_family.
1448   virtual Status IngestExternalFiles(
1449       const std::vector<IngestExternalFileArg>& args) = 0;
1450 
1451   // CreateColumnFamilyWithImport() will create a new column family with
1452   // column_family_name and import external SST files specified in metadata into
1453   // this column family.
1454   // (1) External SST files can be created using SstFileWriter.
1455   // (2) External SST files can be exported from a particular column family in
1456   //     an existing DB.
1457   // Option in import_options specifies whether the external files are copied or
1458   // moved (default is copy). When option specifies copy, managing files at
1459   // external_file_path is caller's responsibility. When option specifies a
1460   // move, the call ensures that the specified files at external_file_path are
1461   // deleted on successful return and files are not modified on any error
1462   // return.
1463   // On error return, column family handle returned will be nullptr.
1464   // ColumnFamily will be present on successful return and will not be present
1465   // on error return. ColumnFamily may be present on any crash during this call.
1466   virtual Status CreateColumnFamilyWithImport(
1467       const ColumnFamilyOptions& options, const std::string& column_family_name,
1468       const ImportColumnFamilyOptions& import_options,
1469       const ExportImportFilesMetaData& metadata,
1470       ColumnFamilyHandle** handle) = 0;
1471 
1472   // Verify the checksums of files in db. Currently the whole-file checksum of
1473   // table files are checked.
VerifyFileChecksums(const ReadOptions &)1474   virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
1475     return Status::NotSupported("File verification not supported");
1476   }
1477 
1478   // Verify the block checksums of files in db. The block checksums of table
1479   // files are checked.
1480   virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
1481 
VerifyChecksum()1482   virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
1483 
1484   // AddFile() is deprecated, please use IngestExternalFile()
1485   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1486       ColumnFamilyHandle* column_family,
1487       const std::vector<std::string>& file_path_list, bool move_file = false,
1488       bool skip_snapshot_check = false) {
1489     IngestExternalFileOptions ifo;
1490     ifo.move_files = move_file;
1491     ifo.snapshot_consistency = !skip_snapshot_check;
1492     ifo.allow_global_seqno = false;
1493     ifo.allow_blocking_flush = false;
1494     return IngestExternalFile(column_family, file_path_list, ifo);
1495   }
1496 
1497   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1498       const std::vector<std::string>& file_path_list, bool move_file = false,
1499       bool skip_snapshot_check = false) {
1500     IngestExternalFileOptions ifo;
1501     ifo.move_files = move_file;
1502     ifo.snapshot_consistency = !skip_snapshot_check;
1503     ifo.allow_global_seqno = false;
1504     ifo.allow_blocking_flush = false;
1505     return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo);
1506   }
1507 
1508   // AddFile() is deprecated, please use IngestExternalFile()
1509   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1510       ColumnFamilyHandle* column_family, const std::string& file_path,
1511       bool move_file = false, bool skip_snapshot_check = false) {
1512     IngestExternalFileOptions ifo;
1513     ifo.move_files = move_file;
1514     ifo.snapshot_consistency = !skip_snapshot_check;
1515     ifo.allow_global_seqno = false;
1516     ifo.allow_blocking_flush = false;
1517     return IngestExternalFile(column_family, {file_path}, ifo);
1518   }
1519 
1520   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1521       const std::string& file_path, bool move_file = false,
1522       bool skip_snapshot_check = false) {
1523     IngestExternalFileOptions ifo;
1524     ifo.move_files = move_file;
1525     ifo.snapshot_consistency = !skip_snapshot_check;
1526     ifo.allow_global_seqno = false;
1527     ifo.allow_blocking_flush = false;
1528     return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo);
1529   }
1530 
1531   // Load table file with information "file_info" into "column_family"
1532   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1533       ColumnFamilyHandle* column_family,
1534       const std::vector<ExternalSstFileInfo>& file_info_list,
1535       bool move_file = false, bool skip_snapshot_check = false) {
1536     std::vector<std::string> external_files;
1537     for (const ExternalSstFileInfo& file_info : file_info_list) {
1538       external_files.push_back(file_info.file_path);
1539     }
1540     IngestExternalFileOptions ifo;
1541     ifo.move_files = move_file;
1542     ifo.snapshot_consistency = !skip_snapshot_check;
1543     ifo.allow_global_seqno = false;
1544     ifo.allow_blocking_flush = false;
1545     return IngestExternalFile(column_family, external_files, ifo);
1546   }
1547 
1548   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1549       const std::vector<ExternalSstFileInfo>& file_info_list,
1550       bool move_file = false, bool skip_snapshot_check = false) {
1551     std::vector<std::string> external_files;
1552     for (const ExternalSstFileInfo& file_info : file_info_list) {
1553       external_files.push_back(file_info.file_path);
1554     }
1555     IngestExternalFileOptions ifo;
1556     ifo.move_files = move_file;
1557     ifo.snapshot_consistency = !skip_snapshot_check;
1558     ifo.allow_global_seqno = false;
1559     ifo.allow_blocking_flush = false;
1560     return IngestExternalFile(DefaultColumnFamily(), external_files, ifo);
1561   }
1562 
1563   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1564       ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info,
1565       bool move_file = false, bool skip_snapshot_check = false) {
1566     IngestExternalFileOptions ifo;
1567     ifo.move_files = move_file;
1568     ifo.snapshot_consistency = !skip_snapshot_check;
1569     ifo.allow_global_seqno = false;
1570     ifo.allow_blocking_flush = false;
1571     return IngestExternalFile(column_family, {file_info->file_path}, ifo);
1572   }
1573 
1574   ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
1575       const ExternalSstFileInfo* file_info, bool move_file = false,
1576       bool skip_snapshot_check = false) {
1577     IngestExternalFileOptions ifo;
1578     ifo.move_files = move_file;
1579     ifo.snapshot_consistency = !skip_snapshot_check;
1580     ifo.allow_global_seqno = false;
1581     ifo.allow_blocking_flush = false;
1582     return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path},
1583                               ifo);
1584   }
1585 
1586 #endif  // ROCKSDB_LITE
1587 
1588   // Returns the unique ID which is read from IDENTITY file during the opening
1589   // of database by setting in the identity variable
1590   // Returns Status::OK if identity could be set properly
1591   virtual Status GetDbIdentity(std::string& identity) const = 0;
1592 
1593   // Return a unique identifier for each DB object that is opened
1594   // This DB session ID should be unique among all open DB instances on all
1595   // hosts, and should be unique among re-openings of the same or other DBs.
1596   // (Two open DBs have the same identity from other function GetDbIdentity when
1597   // one is physically copied from the other.)
1598   virtual Status GetDbSessionId(std::string& session_id) const = 0;
1599 
1600   // Returns default column family handle
1601   virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
1602 
1603 #ifndef ROCKSDB_LITE
1604   virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
1605                                           TablePropertiesCollection* props) = 0;
GetPropertiesOfAllTables(TablePropertiesCollection * props)1606   virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
1607     return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
1608   }
1609   virtual Status GetPropertiesOfTablesInRange(
1610       ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
1611       TablePropertiesCollection* props) = 0;
1612 
SuggestCompactRange(ColumnFamilyHandle *,const Slice *,const Slice *)1613   virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
1614                                      const Slice* /*begin*/,
1615                                      const Slice* /*end*/) {
1616     return Status::NotSupported("SuggestCompactRange() is not implemented.");
1617   }
1618 
PromoteL0(ColumnFamilyHandle *,int)1619   virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
1620                            int /*target_level*/) {
1621     return Status::NotSupported("PromoteL0() is not implemented.");
1622   }
1623 
1624   // Trace DB operations. Use EndTrace() to stop tracing.
StartTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1625   virtual Status StartTrace(const TraceOptions& /*options*/,
1626                             std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1627     return Status::NotSupported("StartTrace() is not implemented.");
1628   }
1629 
EndTrace()1630   virtual Status EndTrace() {
1631     return Status::NotSupported("EndTrace() is not implemented.");
1632   }
1633 
1634   // IO Tracing operations. Use EndIOTrace() to stop tracing.
StartIOTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1635   virtual Status StartIOTrace(const TraceOptions& /*options*/,
1636                               std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1637     return Status::NotSupported("StartIOTrace() is not implemented.");
1638   }
1639 
EndIOTrace()1640   virtual Status EndIOTrace() {
1641     return Status::NotSupported("EndIOTrace() is not implemented.");
1642   }
1643 
1644   // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing.
StartBlockCacheTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1645   virtual Status StartBlockCacheTrace(
1646       const TraceOptions& /*options*/,
1647       std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
1648     return Status::NotSupported("StartBlockCacheTrace() is not implemented.");
1649   }
1650 
EndBlockCacheTrace()1651   virtual Status EndBlockCacheTrace() {
1652     return Status::NotSupported("EndBlockCacheTrace() is not implemented.");
1653   }
1654 #endif  // ROCKSDB_LITE
1655 
1656   // Needed for StackableDB
GetRootDB()1657   virtual DB* GetRootDB() { return this; }
1658 
1659   // Given a window [start_time, end_time), setup a StatsHistoryIterator
1660   // to access stats history. Note the start_time and end_time are epoch
1661   // time measured in seconds, and end_time is an exclusive bound.
GetStatsHistory(uint64_t,uint64_t,std::unique_ptr<StatsHistoryIterator> *)1662   virtual Status GetStatsHistory(
1663       uint64_t /*start_time*/, uint64_t /*end_time*/,
1664       std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) {
1665     return Status::NotSupported("GetStatsHistory() is not implemented.");
1666   }
1667 
1668 #ifndef ROCKSDB_LITE
1669   // Make the secondary instance catch up with the primary by tailing and
1670   // replaying the MANIFEST and WAL of the primary.
1671   // Column families created by the primary after the secondary instance starts
1672   // will be ignored unless the secondary instance closes and restarts with the
1673   // newly created column families.
1674   // Column families that exist before secondary instance starts and dropped by
1675   // the primary afterwards will be marked as dropped. However, as long as the
1676   // secondary instance does not delete the corresponding column family
1677   // handles, the data of the column family is still accessible to the
1678   // secondary.
1679   // TODO: we will support WAL tailing soon.
TryCatchUpWithPrimary()1680   virtual Status TryCatchUpWithPrimary() {
1681     return Status::NotSupported("Supported only by secondary instance");
1682   }
1683 #endif  // !ROCKSDB_LITE
1684 };
1685 
1686 // Destroy the contents of the specified database.
1687 // Be very careful using this method.
1688 Status DestroyDB(const std::string& name, const Options& options,
1689                  const std::vector<ColumnFamilyDescriptor>& column_families =
1690                      std::vector<ColumnFamilyDescriptor>());
1691 
1692 #ifndef ROCKSDB_LITE
1693 // If a DB cannot be opened, you may attempt to call this method to
1694 // resurrect as much of the contents of the database as possible.
1695 // Some data may be lost, so be careful when calling this function
1696 // on a database that contains important information.
1697 //
1698 // With this API, we will warn and skip data associated with column families not
1699 // specified in column_families.
1700 //
1701 // @param column_families Descriptors for known column families
1702 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1703                 const std::vector<ColumnFamilyDescriptor>& column_families);
1704 
1705 // @param unknown_cf_opts Options for column families encountered during the
1706 //                        repair that were not specified in column_families.
1707 Status RepairDB(const std::string& dbname, const DBOptions& db_options,
1708                 const std::vector<ColumnFamilyDescriptor>& column_families,
1709                 const ColumnFamilyOptions& unknown_cf_opts);
1710 
1711 // @param options These options will be used for the database and for ALL column
1712 //                families encountered during the repair
1713 Status RepairDB(const std::string& dbname, const Options& options);
1714 
1715 #endif
1716 
1717 }  // namespace ROCKSDB_NAMESPACE
1718