1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the LICENSE file. See the AUTHORS file for names of contributors. 8 9 #pragma once 10 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <map> 14 #include <memory> 15 #include <string> 16 #include <unordered_map> 17 #include <vector> 18 #include "rocksdb/iterator.h" 19 #include "rocksdb/listener.h" 20 #include "rocksdb/metadata.h" 21 #include "rocksdb/options.h" 22 #include "rocksdb/snapshot.h" 23 #include "rocksdb/sst_file_writer.h" 24 #include "rocksdb/thread_status.h" 25 #include "rocksdb/transaction_log.h" 26 #include "rocksdb/types.h" 27 #include "rocksdb/version.h" 28 29 #ifdef _WIN32 30 // Windows API macro interference 31 #undef DeleteFile 32 #endif 33 34 #if defined(__GNUC__) || defined(__clang__) 35 #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__)) 36 #elif _WIN32 37 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated) 38 #endif 39 40 namespace ROCKSDB_NAMESPACE { 41 42 struct ColumnFamilyOptions; 43 struct CompactionOptions; 44 struct CompactRangeOptions; 45 struct DBOptions; 46 struct ExternalSstFileInfo; 47 struct FlushOptions; 48 struct Options; 49 struct ReadOptions; 50 struct TableProperties; 51 struct WriteOptions; 52 #ifdef ROCKSDB_LITE 53 class CompactionJobInfo; 54 #endif 55 class Env; 56 class EventListener; 57 class FileSystem; 58 #ifndef ROCKSDB_LITE 59 class Replayer; 60 #endif 61 class StatsHistoryIterator; 62 #ifndef ROCKSDB_LITE 63 class TraceReader; 64 class TraceWriter; 65 #endif 66 class WriteBatch; 67 68 extern const std::string kDefaultColumnFamilyName; 69 extern const std::string kPersistentStatsColumnFamilyName; 70 struct ColumnFamilyDescriptor { 71 std::string name; 72 ColumnFamilyOptions options; ColumnFamilyDescriptorColumnFamilyDescriptor73 ColumnFamilyDescriptor() 74 : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {} ColumnFamilyDescriptorColumnFamilyDescriptor75 ColumnFamilyDescriptor(const std::string& _name, 76 const ColumnFamilyOptions& _options) 77 : name(_name), options(_options) {} 78 }; 79 80 class ColumnFamilyHandle { 81 public: ~ColumnFamilyHandle()82 virtual ~ColumnFamilyHandle() {} 83 // Returns the name of the column family associated with the current handle. 84 virtual const std::string& GetName() const = 0; 85 // Returns the ID of the column family associated with the current handle. 86 virtual uint32_t GetID() const = 0; 87 // Fills "*desc" with the up-to-date descriptor of the column family 88 // associated with this handle. Since it fills "*desc" with the up-to-date 89 // information, this call might internally lock and release DB mutex to 90 // access the up-to-date CF options. In addition, all the pointer-typed 91 // options cannot be referenced any longer than the original options exist. 92 // 93 // Note that this function is not supported in RocksDBLite. 94 virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0; 95 // Returns the comparator of the column family associated with the 96 // current handle. 97 virtual const Comparator* GetComparator() const = 0; 98 }; 99 100 static const int kMajorVersion = __ROCKSDB_MAJOR__; 101 static const int kMinorVersion = __ROCKSDB_MINOR__; 102 103 // A range of keys 104 struct Range { 105 Slice start; 106 Slice limit; 107 RangeRange108 Range() {} RangeRange109 Range(const Slice& s, const Slice& l) : start(s), limit(l) {} 110 }; 111 112 struct RangePtr { 113 const Slice* start; 114 const Slice* limit; 115 RangePtrRangePtr116 RangePtr() : start(nullptr), limit(nullptr) {} RangePtrRangePtr117 RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {} 118 }; 119 120 // It is valid that files_checksums and files_checksum_func_names are both 121 // empty (no checksum information is provided for ingestion). Otherwise, 122 // their sizes should be the same as external_files. The file order should 123 // be the same in three vectors and guaranteed by the caller. 124 // Note that, we assume the temperatures of this batch of files to be 125 // ingested are the same. 126 struct IngestExternalFileArg { 127 ColumnFamilyHandle* column_family = nullptr; 128 std::vector<std::string> external_files; 129 IngestExternalFileOptions options; 130 std::vector<std::string> files_checksums; 131 std::vector<std::string> files_checksum_func_names; 132 Temperature file_temperature = Temperature::kUnknown; 133 }; 134 135 struct GetMergeOperandsOptions { 136 int expected_max_number_of_operands = 0; 137 }; 138 139 // A collections of table properties objects, where 140 // key: is the table's file name. 141 // value: the table properties object of the given table. 142 using TablePropertiesCollection = 143 std::unordered_map<std::string, std::shared_ptr<const TableProperties>>; 144 145 // A DB is a persistent, versioned ordered map from keys to values. 146 // A DB is safe for concurrent access from multiple threads without 147 // any external synchronization. 148 // DB is an abstract base class with one primary implementation (DBImpl) 149 // and a number of wrapper implementations. 150 class DB { 151 public: 152 // Open the database with the specified "name" for reads and writes. 153 // Stores a pointer to a heap-allocated database in *dbptr and returns 154 // OK on success. 155 // Stores nullptr in *dbptr and returns a non-OK status on error, including 156 // if the DB is already open (read-write) by another DB object. (This 157 // guarantee depends on options.env->LockFile(), which might not provide 158 // this guarantee in a custom Env implementation.) 159 // 160 // Caller must delete *dbptr when it is no longer needed. 161 static Status Open(const Options& options, const std::string& name, 162 DB** dbptr); 163 164 // Open the database for read only. All DB interfaces 165 // that modify data, like put/delete, will return error. 166 // If the db is opened in read only mode, then no compactions 167 // will happen. 168 // 169 // While a given DB can be simultaneously open via OpenForReadOnly 170 // by any number of readers, if a DB is simultaneously open by Open 171 // and OpenForReadOnly, the read-only instance has undefined behavior 172 // (though can often succeed if quickly closed) and the read-write 173 // instance is unaffected. See also OpenAsSecondary. 174 // 175 // Not supported in ROCKSDB_LITE, in which case the function will 176 // return Status::NotSupported. 177 static Status OpenForReadOnly(const Options& options, const std::string& name, 178 DB** dbptr, 179 bool error_if_wal_file_exists = false); 180 181 // Open the database for read only with column families. When opening DB with 182 // read only, you can specify only a subset of column families in the 183 // database that should be opened. However, you always need to specify default 184 // column family. The default column family name is 'default' and it's stored 185 // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName 186 // 187 // While a given DB can be simultaneously open via OpenForReadOnly 188 // by any number of readers, if a DB is simultaneously open by Open 189 // and OpenForReadOnly, the read-only instance has undefined behavior 190 // (though can often succeed if quickly closed) and the read-write 191 // instance is unaffected. See also OpenAsSecondary. 192 // 193 // Not supported in ROCKSDB_LITE, in which case the function will 194 // return Status::NotSupported. 195 static Status OpenForReadOnly( 196 const DBOptions& db_options, const std::string& name, 197 const std::vector<ColumnFamilyDescriptor>& column_families, 198 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr, 199 bool error_if_wal_file_exists = false); 200 201 // The following OpenAsSecondary functions create a secondary instance that 202 // can dynamically tail the MANIFEST of a primary that must have already been 203 // created. User can call TryCatchUpWithPrimary to make the secondary 204 // instance catch up with primary (WAL tailing is NOT supported now) whenever 205 // the user feels necessary. Column families created by the primary after the 206 // secondary instance starts are currently ignored by the secondary instance. 207 // Column families opened by secondary and dropped by the primary will be 208 // dropped by secondary as well. However the user of the secondary instance 209 // can still access the data of such dropped column family as long as they 210 // do not destroy the corresponding column family handle. 211 // WAL tailing is not supported at present, but will arrive soon. 212 // 213 // The options argument specifies the options to open the secondary instance. 214 // The name argument specifies the name of the primary db that you have used 215 // to open the primary instance. 216 // The secondary_path argument points to a directory where the secondary 217 // instance stores its info log. 218 // The dbptr is an out-arg corresponding to the opened secondary instance. 219 // The pointer points to a heap-allocated database, and the user should 220 // delete it after use. 221 // Open DB as secondary instance with only the default column family. 222 // Return OK on success, non-OK on failures. 223 static Status OpenAsSecondary(const Options& options, const std::string& name, 224 const std::string& secondary_path, DB** dbptr); 225 226 // Open DB as secondary instance with column families. You can open a subset 227 // of column families in secondary mode. 228 // The db_options specify the database specific options. 229 // The name argument specifies the name of the primary db that you have used 230 // to open the primary instance. 231 // The secondary_path argument points to a directory where the secondary 232 // instance stores its info log. 233 // The column_families argument specifies a list of column families to open. 234 // If any of the column families does not exist, the function returns non-OK 235 // status. 236 // The handles is an out-arg corresponding to the opened database column 237 // family handles. 238 // The dbptr is an out-arg corresponding to the opened secondary instance. 239 // The pointer points to a heap-allocated database, and the caller should 240 // delete it after use. Before deleting the dbptr, the user should also 241 // delete the pointers stored in handles vector. 242 // Return OK on success, on-OK on failures. 243 static Status OpenAsSecondary( 244 const DBOptions& db_options, const std::string& name, 245 const std::string& secondary_path, 246 const std::vector<ColumnFamilyDescriptor>& column_families, 247 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr); 248 249 // Open DB with column families. 250 // db_options specify database specific options 251 // column_families is the vector of all column families in the database, 252 // containing column family name and options. You need to open ALL column 253 // families in the database. To get the list of column families, you can use 254 // ListColumnFamilies(). Also, you can open only a subset of column families 255 // for read-only access. 256 // The default column family name is 'default' and it's stored 257 // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName. 258 // If everything is OK, handles will on return be the same size 259 // as column_families --- handles[i] will be a handle that you 260 // will use to operate on column family column_family[i]. 261 // Before delete DB, you have to close All column families by calling 262 // DestroyColumnFamilyHandle() with all the handles. 263 static Status Open(const DBOptions& db_options, const std::string& name, 264 const std::vector<ColumnFamilyDescriptor>& column_families, 265 std::vector<ColumnFamilyHandle*>* handles, DB** dbptr); 266 267 // Open DB and run the compaction. 268 // It's a read-only operation, the result won't be installed to the DB, it 269 // will be output to the `output_directory`. The API should only be used with 270 // `options.CompactionService` to run compaction triggered by 271 // `CompactionService`. 272 static Status OpenAndCompact( 273 const std::string& name, const std::string& output_directory, 274 const std::string& input, std::string* output, 275 const CompactionServiceOptionsOverride& override_options); 276 Resume()277 virtual Status Resume() { return Status::NotSupported(); } 278 279 // Close the DB by releasing resources, closing files etc. This should be 280 // called before calling the destructor so that the caller can get back a 281 // status in case there are any errors. This will not fsync the WAL files. 282 // If syncing is required, the caller must first call SyncWAL(), or Write() 283 // using an empty write batch with WriteOptions.sync=true. 284 // Regardless of the return status, the DB must be freed. 285 // If the return status is Aborted(), closing fails because there is 286 // unreleased snapshot in the system. In this case, users can release 287 // the unreleased snapshots and try again and expect it to succeed. For 288 // other status, re-calling Close() will be no-op and return the original 289 // close status. If the return status is NotSupported(), then the DB 290 // implementation does cleanup in the destructor Close()291 virtual Status Close() { return Status::NotSupported(); } 292 293 // ListColumnFamilies will open the DB specified by argument name 294 // and return the list of all column families in that DB 295 // through column_families argument. The ordering of 296 // column families in column_families is unspecified. 297 static Status ListColumnFamilies(const DBOptions& db_options, 298 const std::string& name, 299 std::vector<std::string>* column_families); 300 301 // Abstract class ctor DB()302 DB() {} 303 // No copying allowed 304 DB(const DB&) = delete; 305 void operator=(const DB&) = delete; 306 307 virtual ~DB(); 308 309 // Create a column_family and return the handle of column family 310 // through the argument handle. 311 virtual Status CreateColumnFamily(const ColumnFamilyOptions& options, 312 const std::string& column_family_name, 313 ColumnFamilyHandle** handle); 314 315 // Bulk create column families with the same column family options. 316 // Return the handles of the column families through the argument handles. 317 // In case of error, the request may succeed partially, and handles will 318 // contain column family handles that it managed to create, and have size 319 // equal to the number of created column families. 320 virtual Status CreateColumnFamilies( 321 const ColumnFamilyOptions& options, 322 const std::vector<std::string>& column_family_names, 323 std::vector<ColumnFamilyHandle*>* handles); 324 325 // Bulk create column families. 326 // Return the handles of the column families through the argument handles. 327 // In case of error, the request may succeed partially, and handles will 328 // contain column family handles that it managed to create, and have size 329 // equal to the number of created column families. 330 virtual Status CreateColumnFamilies( 331 const std::vector<ColumnFamilyDescriptor>& column_families, 332 std::vector<ColumnFamilyHandle*>* handles); 333 334 // Drop a column family specified by column_family handle. This call 335 // only records a drop record in the manifest and prevents the column 336 // family from flushing and compacting. 337 virtual Status DropColumnFamily(ColumnFamilyHandle* column_family); 338 339 // Bulk drop column families. This call only records drop records in the 340 // manifest and prevents the column families from flushing and compacting. 341 // In case of error, the request may succeed partially. User may call 342 // ListColumnFamilies to check the result. 343 virtual Status DropColumnFamilies( 344 const std::vector<ColumnFamilyHandle*>& column_families); 345 346 // Close a column family specified by column_family handle and destroy 347 // the column family handle specified to avoid double deletion. This call 348 // deletes the column family handle by default. Use this method to 349 // close column family instead of deleting column family handle directly 350 virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family); 351 352 // Set the database entry for "key" to "value". 353 // If "key" already exists, it will be overwritten. 354 // Returns OK on success, and a non-OK status on error. 355 // Note: consider setting options.sync = true. 356 virtual Status Put(const WriteOptions& options, 357 ColumnFamilyHandle* column_family, const Slice& key, 358 const Slice& value) = 0; Put(const WriteOptions & options,const Slice & key,const Slice & value)359 virtual Status Put(const WriteOptions& options, const Slice& key, 360 const Slice& value) { 361 return Put(options, DefaultColumnFamily(), key, value); 362 } 363 364 // Remove the database entry (if any) for "key". Returns OK on 365 // success, and a non-OK status on error. It is not an error if "key" 366 // did not exist in the database. 367 // Note: consider setting options.sync = true. 368 virtual Status Delete(const WriteOptions& options, 369 ColumnFamilyHandle* column_family, 370 const Slice& key) = 0; Delete(const WriteOptions & options,const Slice & key)371 virtual Status Delete(const WriteOptions& options, const Slice& key) { 372 return Delete(options, DefaultColumnFamily(), key); 373 } 374 375 // Remove the database entry for "key". Requires that the key exists 376 // and was not overwritten. Returns OK on success, and a non-OK status 377 // on error. It is not an error if "key" did not exist in the database. 378 // 379 // If a key is overwritten (by calling Put() multiple times), then the result 380 // of calling SingleDelete() on this key is undefined. SingleDelete() only 381 // behaves correctly if there has been only one Put() for this key since the 382 // previous call to SingleDelete() for this key. 383 // 384 // This feature is currently an experimental performance optimization 385 // for a very specific workload. It is up to the caller to ensure that 386 // SingleDelete is only used for a key that is not deleted using Delete() or 387 // written using Merge(). Mixing SingleDelete operations with Deletes and 388 // Merges can result in undefined behavior. 389 // 390 // Note: consider setting options.sync = true. 391 virtual Status SingleDelete(const WriteOptions& options, 392 ColumnFamilyHandle* column_family, 393 const Slice& key) = 0; SingleDelete(const WriteOptions & options,const Slice & key)394 virtual Status SingleDelete(const WriteOptions& options, const Slice& key) { 395 return SingleDelete(options, DefaultColumnFamily(), key); 396 } 397 398 // Removes the database entries in the range ["begin_key", "end_key"), i.e., 399 // including "begin_key" and excluding "end_key". Returns OK on success, and 400 // a non-OK status on error. It is not an error if the database does not 401 // contain any existing data in the range ["begin_key", "end_key"). 402 // 403 // If "end_key" comes before "start_key" according to the user's comparator, 404 // a `Status::InvalidArgument` is returned. 405 // 406 // This feature is now usable in production, with the following caveats: 407 // 1) Accumulating many range tombstones in the memtable will degrade read 408 // performance; this can be avoided by manually flushing occasionally. 409 // 2) Limiting the maximum number of open files in the presence of range 410 // tombstones can degrade read performance. To avoid this problem, set 411 // max_open_files to -1 whenever possible. 412 virtual Status DeleteRange(const WriteOptions& options, 413 ColumnFamilyHandle* column_family, 414 const Slice& begin_key, const Slice& end_key); 415 416 // Merge the database entry for "key" with "value". Returns OK on success, 417 // and a non-OK status on error. The semantics of this operation is 418 // determined by the user provided merge_operator when opening DB. 419 // Note: consider setting options.sync = true. 420 virtual Status Merge(const WriteOptions& options, 421 ColumnFamilyHandle* column_family, const Slice& key, 422 const Slice& value) = 0; Merge(const WriteOptions & options,const Slice & key,const Slice & value)423 virtual Status Merge(const WriteOptions& options, const Slice& key, 424 const Slice& value) { 425 return Merge(options, DefaultColumnFamily(), key, value); 426 } 427 428 // Apply the specified updates to the database. 429 // If `updates` contains no update, WAL will still be synced if 430 // options.sync=true. 431 // Returns OK on success, non-OK on failure. 432 // Note: consider setting options.sync = true. 433 virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; 434 435 // If the database contains an entry for "key" store the 436 // corresponding value in *value and return OK. 437 // 438 // If timestamp is enabled and a non-null timestamp pointer is passed in, 439 // timestamp is returned. 440 // 441 // If there is no entry for "key" leave *value unchanged and return 442 // a status for which Status::IsNotFound() returns true. 443 // 444 // May return some other Status on an error. Get(const ReadOptions & options,ColumnFamilyHandle * column_family,const Slice & key,std::string * value)445 virtual inline Status Get(const ReadOptions& options, 446 ColumnFamilyHandle* column_family, const Slice& key, 447 std::string* value) { 448 assert(value != nullptr); 449 PinnableSlice pinnable_val(value); 450 assert(!pinnable_val.IsPinned()); 451 auto s = Get(options, column_family, key, &pinnable_val); 452 if (s.ok() && pinnable_val.IsPinned()) { 453 value->assign(pinnable_val.data(), pinnable_val.size()); 454 } // else value is already assigned 455 return s; 456 } 457 virtual Status Get(const ReadOptions& options, 458 ColumnFamilyHandle* column_family, const Slice& key, 459 PinnableSlice* value) = 0; Get(const ReadOptions & options,const Slice & key,std::string * value)460 virtual Status Get(const ReadOptions& options, const Slice& key, 461 std::string* value) { 462 return Get(options, DefaultColumnFamily(), key, value); 463 } 464 465 // Get() methods that return timestamp. Derived DB classes don't need to worry 466 // about this group of methods if they don't care about timestamp feature. Get(const ReadOptions & options,ColumnFamilyHandle * column_family,const Slice & key,std::string * value,std::string * timestamp)467 virtual inline Status Get(const ReadOptions& options, 468 ColumnFamilyHandle* column_family, const Slice& key, 469 std::string* value, std::string* timestamp) { 470 assert(value != nullptr); 471 PinnableSlice pinnable_val(value); 472 assert(!pinnable_val.IsPinned()); 473 auto s = Get(options, column_family, key, &pinnable_val, timestamp); 474 if (s.ok() && pinnable_val.IsPinned()) { 475 value->assign(pinnable_val.data(), pinnable_val.size()); 476 } // else value is already assigned 477 return s; 478 } Get(const ReadOptions &,ColumnFamilyHandle *,const Slice &,PinnableSlice *,std::string *)479 virtual Status Get(const ReadOptions& /*options*/, 480 ColumnFamilyHandle* /*column_family*/, 481 const Slice& /*key*/, PinnableSlice* /*value*/, 482 std::string* /*timestamp*/) { 483 return Status::NotSupported( 484 "Get() that returns timestamp is not implemented."); 485 } Get(const ReadOptions & options,const Slice & key,std::string * value,std::string * timestamp)486 virtual Status Get(const ReadOptions& options, const Slice& key, 487 std::string* value, std::string* timestamp) { 488 return Get(options, DefaultColumnFamily(), key, value, timestamp); 489 } 490 491 // Returns all the merge operands corresponding to the key. If the 492 // number of merge operands in DB is greater than 493 // merge_operands_options.expected_max_number_of_operands 494 // no merge operands are returned and status is Incomplete. Merge operands 495 // returned are in the order of insertion. 496 // merge_operands- Points to an array of at-least 497 // merge_operands_options.expected_max_number_of_operands and the 498 // caller is responsible for allocating it. If the status 499 // returned is Incomplete then number_of_operands will contain 500 // the total number of merge operands found in DB for key. 501 virtual Status GetMergeOperands( 502 const ReadOptions& options, ColumnFamilyHandle* column_family, 503 const Slice& key, PinnableSlice* merge_operands, 504 GetMergeOperandsOptions* get_merge_operands_options, 505 int* number_of_operands) = 0; 506 507 // Consistent Get of many keys across column families without the need 508 // for an explicit snapshot. NOTE: the implementation of this MultiGet API 509 // does not have the performance benefits of the void-returning MultiGet 510 // functions. 511 // 512 // If keys[i] does not exist in the database, then the i'th returned 513 // status will be one for which Status::IsNotFound() is true, and 514 // (*values)[i] will be set to some arbitrary value (often ""). Otherwise, 515 // the i'th returned status will have Status::ok() true, and (*values)[i] 516 // will store the value associated with keys[i]. 517 // 518 // (*values) will always be resized to be the same size as (keys). 519 // Similarly, the number of returned statuses will be the number of keys. 520 // Note: keys will not be "de-duplicated". Duplicate keys will return 521 // duplicate values in order. 522 virtual std::vector<Status> MultiGet( 523 const ReadOptions& options, 524 const std::vector<ColumnFamilyHandle*>& column_family, 525 const std::vector<Slice>& keys, std::vector<std::string>* values) = 0; MultiGet(const ReadOptions & options,const std::vector<Slice> & keys,std::vector<std::string> * values)526 virtual std::vector<Status> MultiGet(const ReadOptions& options, 527 const std::vector<Slice>& keys, 528 std::vector<std::string>* values) { 529 return MultiGet( 530 options, 531 std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()), 532 keys, values); 533 } 534 MultiGet(const ReadOptions &,const std::vector<ColumnFamilyHandle * > &,const std::vector<Slice> & keys,std::vector<std::string> *,std::vector<std::string> *)535 virtual std::vector<Status> MultiGet( 536 const ReadOptions& /*options*/, 537 const std::vector<ColumnFamilyHandle*>& /*column_family*/, 538 const std::vector<Slice>& keys, std::vector<std::string>* /*values*/, 539 std::vector<std::string>* /*timestamps*/) { 540 return std::vector<Status>( 541 keys.size(), Status::NotSupported( 542 "MultiGet() returning timestamps not implemented.")); 543 } MultiGet(const ReadOptions & options,const std::vector<Slice> & keys,std::vector<std::string> * values,std::vector<std::string> * timestamps)544 virtual std::vector<Status> MultiGet(const ReadOptions& options, 545 const std::vector<Slice>& keys, 546 std::vector<std::string>* values, 547 std::vector<std::string>* timestamps) { 548 return MultiGet( 549 options, 550 std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()), 551 keys, values, timestamps); 552 } 553 554 // Overloaded MultiGet API that improves performance by batching operations 555 // in the read path for greater efficiency. Currently, only the block based 556 // table format with full filters are supported. Other table formats such 557 // as plain table, block based table with block based filters and 558 // partitioned indexes will still work, but will not get any performance 559 // benefits. 560 // Parameters - 561 // options - ReadOptions 562 // column_family - ColumnFamilyHandle* that the keys belong to. All the keys 563 // passed to the API are restricted to a single column family 564 // num_keys - Number of keys to lookup 565 // keys - Pointer to C style array of key Slices with num_keys elements 566 // values - Pointer to C style array of PinnableSlices with num_keys elements 567 // statuses - Pointer to C style array of Status with num_keys elements 568 // sorted_input - If true, it means the input keys are already sorted by key 569 // order, so the MultiGet() API doesn't have to sort them 570 // again. If false, the keys will be copied and sorted 571 // internally by the API - the input array will not be 572 // modified 573 virtual void MultiGet(const ReadOptions& options, 574 ColumnFamilyHandle* column_family, 575 const size_t num_keys, const Slice* keys, 576 PinnableSlice* values, Status* statuses, 577 const bool /*sorted_input*/ = false) { 578 std::vector<ColumnFamilyHandle*> cf; 579 std::vector<Slice> user_keys; 580 std::vector<Status> status; 581 std::vector<std::string> vals; 582 583 for (size_t i = 0; i < num_keys; ++i) { 584 cf.emplace_back(column_family); 585 user_keys.emplace_back(keys[i]); 586 } 587 status = MultiGet(options, cf, user_keys, &vals); 588 std::copy(status.begin(), status.end(), statuses); 589 for (auto& value : vals) { 590 values->PinSelf(value); 591 values++; 592 } 593 } 594 595 virtual void MultiGet(const ReadOptions& options, 596 ColumnFamilyHandle* column_family, 597 const size_t num_keys, const Slice* keys, 598 PinnableSlice* values, std::string* timestamps, 599 Status* statuses, const bool /*sorted_input*/ = false) { 600 std::vector<ColumnFamilyHandle*> cf; 601 std::vector<Slice> user_keys; 602 std::vector<Status> status; 603 std::vector<std::string> vals; 604 std::vector<std::string> tss; 605 606 for (size_t i = 0; i < num_keys; ++i) { 607 cf.emplace_back(column_family); 608 user_keys.emplace_back(keys[i]); 609 } 610 status = MultiGet(options, cf, user_keys, &vals, &tss); 611 std::copy(status.begin(), status.end(), statuses); 612 std::copy(tss.begin(), tss.end(), timestamps); 613 for (auto& value : vals) { 614 values->PinSelf(value); 615 values++; 616 } 617 } 618 619 // Overloaded MultiGet API that improves performance by batching operations 620 // in the read path for greater efficiency. Currently, only the block based 621 // table format with full filters are supported. Other table formats such 622 // as plain table, block based table with block based filters and 623 // partitioned indexes will still work, but will not get any performance 624 // benefits. 625 // Parameters - 626 // options - ReadOptions 627 // column_family - ColumnFamilyHandle* that the keys belong to. All the keys 628 // passed to the API are restricted to a single column family 629 // num_keys - Number of keys to lookup 630 // keys - Pointer to C style array of key Slices with num_keys elements 631 // values - Pointer to C style array of PinnableSlices with num_keys elements 632 // statuses - Pointer to C style array of Status with num_keys elements 633 // sorted_input - If true, it means the input keys are already sorted by key 634 // order, so the MultiGet() API doesn't have to sort them 635 // again. If false, the keys will be copied and sorted 636 // internally by the API - the input array will not be 637 // modified 638 virtual void MultiGet(const ReadOptions& options, const size_t num_keys, 639 ColumnFamilyHandle** column_families, const Slice* keys, 640 PinnableSlice* values, Status* statuses, 641 const bool /*sorted_input*/ = false) { 642 std::vector<ColumnFamilyHandle*> cf; 643 std::vector<Slice> user_keys; 644 std::vector<Status> status; 645 std::vector<std::string> vals; 646 647 for (size_t i = 0; i < num_keys; ++i) { 648 cf.emplace_back(column_families[i]); 649 user_keys.emplace_back(keys[i]); 650 } 651 status = MultiGet(options, cf, user_keys, &vals); 652 std::copy(status.begin(), status.end(), statuses); 653 for (auto& value : vals) { 654 values->PinSelf(value); 655 values++; 656 } 657 } 658 virtual void MultiGet(const ReadOptions& options, const size_t num_keys, 659 ColumnFamilyHandle** column_families, const Slice* keys, 660 PinnableSlice* values, std::string* timestamps, 661 Status* statuses, const bool /*sorted_input*/ = false) { 662 std::vector<ColumnFamilyHandle*> cf; 663 std::vector<Slice> user_keys; 664 std::vector<Status> status; 665 std::vector<std::string> vals; 666 std::vector<std::string> tss; 667 668 for (size_t i = 0; i < num_keys; ++i) { 669 cf.emplace_back(column_families[i]); 670 user_keys.emplace_back(keys[i]); 671 } 672 status = MultiGet(options, cf, user_keys, &vals, &tss); 673 std::copy(status.begin(), status.end(), statuses); 674 std::copy(tss.begin(), tss.end(), timestamps); 675 for (auto& value : vals) { 676 values->PinSelf(value); 677 values++; 678 } 679 } 680 681 // If the key definitely does not exist in the database, then this method 682 // returns false, else true. If the caller wants to obtain value when the key 683 // is found in memory, a bool for 'value_found' must be passed. 'value_found' 684 // will be true on return if value has been set properly. 685 // This check is potentially lighter-weight than invoking DB::Get(). One way 686 // to make this lighter weight is to avoid doing any IOs. 687 // Default implementation here returns true and sets 'value_found' to false 688 virtual bool KeyMayExist(const ReadOptions& /*options*/, 689 ColumnFamilyHandle* /*column_family*/, 690 const Slice& /*key*/, std::string* /*value*/, 691 std::string* /*timestamp*/, 692 bool* value_found = nullptr) { 693 if (value_found != nullptr) { 694 *value_found = false; 695 } 696 return true; 697 } 698 699 virtual bool KeyMayExist(const ReadOptions& options, 700 ColumnFamilyHandle* column_family, const Slice& key, 701 std::string* value, bool* value_found = nullptr) { 702 return KeyMayExist(options, column_family, key, value, 703 /*timestamp=*/nullptr, value_found); 704 } 705 706 virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, 707 std::string* value, bool* value_found = nullptr) { 708 return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found); 709 } 710 711 virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, 712 std::string* value, std::string* timestamp, 713 bool* value_found = nullptr) { 714 return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp, 715 value_found); 716 } 717 718 // Return a heap-allocated iterator over the contents of the database. 719 // The result of NewIterator() is initially invalid (caller must 720 // call one of the Seek methods on the iterator before using it). 721 // 722 // Caller should delete the iterator when it is no longer needed. 723 // The returned iterator should be deleted before this db is deleted. 724 virtual Iterator* NewIterator(const ReadOptions& options, 725 ColumnFamilyHandle* column_family) = 0; NewIterator(const ReadOptions & options)726 virtual Iterator* NewIterator(const ReadOptions& options) { 727 return NewIterator(options, DefaultColumnFamily()); 728 } 729 // Returns iterators from a consistent database state across multiple 730 // column families. Iterators are heap allocated and need to be deleted 731 // before the db is deleted 732 virtual Status NewIterators( 733 const ReadOptions& options, 734 const std::vector<ColumnFamilyHandle*>& column_families, 735 std::vector<Iterator*>* iterators) = 0; 736 737 // Return a handle to the current DB state. Iterators created with 738 // this handle will all observe a stable snapshot of the current DB 739 // state. The caller must call ReleaseSnapshot(result) when the 740 // snapshot is no longer needed. 741 // 742 // nullptr will be returned if the DB fails to take a snapshot or does 743 // not support snapshot. 744 virtual const Snapshot* GetSnapshot() = 0; 745 746 // Release a previously acquired snapshot. The caller must not 747 // use "snapshot" after this call. 748 virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; 749 750 #ifndef ROCKSDB_LITE 751 // Contains all valid property arguments for GetProperty() or 752 // GetMapProperty(). Each is a "string" property for retrieval with 753 // GetProperty() unless noted as a "map" property, for GetMapProperty(). 754 // 755 // NOTE: Property names cannot end in numbers since those are interpreted as 756 // arguments, e.g., see kNumFilesAtLevelPrefix. 757 struct Properties { 758 // "rocksdb.num-files-at-level<N>" - returns string containing the number 759 // of files at level <N>, where <N> is an ASCII representation of a 760 // level number (e.g., "0"). 761 static const std::string kNumFilesAtLevelPrefix; 762 763 // "rocksdb.compression-ratio-at-level<N>" - returns string containing the 764 // compression ratio of data at level <N>, where <N> is an ASCII 765 // representation of a level number (e.g., "0"). Here, compression 766 // ratio is defined as uncompressed data size / compressed file size. 767 // Returns "-1.0" if no open files at level <N>. 768 static const std::string kCompressionRatioAtLevelPrefix; 769 770 // "rocksdb.stats" - returns a multi-line string containing the data 771 // described by kCFStats followed by the data described by kDBStats. 772 static const std::string kStats; 773 774 // "rocksdb.sstables" - returns a multi-line string summarizing current 775 // SST files. 776 static const std::string kSSTables; 777 778 // "rocksdb.cfstats" - Raw data from "rocksdb.cfstats-no-file-histogram" 779 // and "rocksdb.cf-file-histogram" as a "map" property. 780 static const std::string kCFStats; 781 782 // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with 783 // general column family stats per-level over db's lifetime ("L<n>"), 784 // aggregated over db's lifetime ("Sum"), and aggregated over the 785 // interval since the last retrieval ("Int"). 786 static const std::string kCFStatsNoFileHistogram; 787 788 // "rocksdb.cf-file-histogram" - print out how many file reads to every 789 // level, as well as the histogram of latency of single requests. 790 static const std::string kCFFileHistogram; 791 792 // "rocksdb.dbstats" - As a string property, returns a multi-line string 793 // with general database stats, both cumulative (over the db's 794 // lifetime) and interval (since the last retrieval of kDBStats). 795 // As a map property, returns cumulative stats only and does not 796 // update the baseline for the interval stats. 797 static const std::string kDBStats; 798 799 // "rocksdb.levelstats" - returns multi-line string containing the number 800 // of files per level and total size of each level (MB). 801 static const std::string kLevelStats; 802 803 // "rocksdb.block-cache-entry-stats" - returns a multi-line string or 804 // map with statistics on block cache usage. 805 static const std::string kBlockCacheEntryStats; 806 807 // "rocksdb.num-immutable-mem-table" - returns number of immutable 808 // memtables that have not yet been flushed. 809 static const std::string kNumImmutableMemTable; 810 811 // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable 812 // memtables that have already been flushed. 813 static const std::string kNumImmutableMemTableFlushed; 814 815 // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is 816 // pending; otherwise, returns 0. 817 static const std::string kMemTableFlushPending; 818 819 // "rocksdb.num-running-flushes" - returns the number of currently running 820 // flushes. 821 static const std::string kNumRunningFlushes; 822 823 // "rocksdb.compaction-pending" - returns 1 if at least one compaction is 824 // pending; otherwise, returns 0. 825 static const std::string kCompactionPending; 826 827 // "rocksdb.num-running-compactions" - returns the number of currently 828 // running compactions. 829 static const std::string kNumRunningCompactions; 830 831 // "rocksdb.background-errors" - returns accumulated number of background 832 // errors. 833 static const std::string kBackgroundErrors; 834 835 // "rocksdb.cur-size-active-mem-table" - returns approximate size of active 836 // memtable (bytes). 837 static const std::string kCurSizeActiveMemTable; 838 839 // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active 840 // and unflushed immutable memtables (bytes). 841 static const std::string kCurSizeAllMemTables; 842 843 // "rocksdb.size-all-mem-tables" - returns approximate size of active, 844 // unflushed immutable, and pinned immutable memtables (bytes). 845 static const std::string kSizeAllMemTables; 846 847 // "rocksdb.num-entries-active-mem-table" - returns total number of entries 848 // in the active memtable. 849 static const std::string kNumEntriesActiveMemTable; 850 851 // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries 852 // in the unflushed immutable memtables. 853 static const std::string kNumEntriesImmMemTables; 854 855 // "rocksdb.num-deletes-active-mem-table" - returns total number of delete 856 // entries in the active memtable. 857 static const std::string kNumDeletesActiveMemTable; 858 859 // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete 860 // entries in the unflushed immutable memtables. 861 static const std::string kNumDeletesImmMemTables; 862 863 // "rocksdb.estimate-num-keys" - returns estimated number of total keys in 864 // the active and unflushed immutable memtables and storage. 865 static const std::string kEstimateNumKeys; 866 867 // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for 868 // reading SST tables, excluding memory used in block cache (e.g., 869 // filter and index blocks). 870 static const std::string kEstimateTableReadersMem; 871 872 // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete 873 // files is enabled; otherwise, returns a non-zero number. 874 static const std::string kIsFileDeletionsEnabled; 875 876 // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the 877 // database. 878 static const std::string kNumSnapshots; 879 880 // "rocksdb.oldest-snapshot-time" - returns number representing unix 881 // timestamp of oldest unreleased snapshot. 882 static const std::string kOldestSnapshotTime; 883 884 // "rocksdb.oldest-snapshot-sequence" - returns number representing 885 // sequence number of oldest unreleased snapshot. 886 static const std::string kOldestSnapshotSequence; 887 888 // "rocksdb.num-live-versions" - returns number of live versions. `Version` 889 // is an internal data structure. See version_set.h for details. More 890 // live versions often mean more SST files are held from being deleted, 891 // by iterators or unfinished compactions. 892 static const std::string kNumLiveVersions; 893 894 // "rocksdb.current-super-version-number" - returns number of current LSM 895 // version. It is a uint64_t integer number, incremented after there is 896 // any change to the LSM tree. The number is not preserved after restarting 897 // the DB. After DB restart, it will start from 0 again. 898 static const std::string kCurrentSuperVersionNumber; 899 900 // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of 901 // live data in bytes. For BlobDB, it also includes the exact value of 902 // live bytes in the blob files of the version. 903 static const std::string kEstimateLiveDataSize; 904 905 // "rocksdb.min-log-number-to-keep" - return the minimum log number of the 906 // log files that should be kept. 907 static const std::string kMinLogNumberToKeep; 908 909 // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file 910 // number for an obsolete SST to be kept. The max value of `uint64_t` 911 // will be returned if all obsolete files can be deleted. 912 static const std::string kMinObsoleteSstNumberToKeep; 913 914 // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST 915 // files. 916 // WARNING: may slow down online queries if there are too many files. 917 static const std::string kTotalSstFilesSize; 918 919 // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST 920 // files belong to the latest LSM tree. 921 static const std::string kLiveSstFilesSize; 922 923 // "rocksdb.live_sst_files_size_at_temperature" - returns total size (bytes) 924 // of SST files at all certain file temperature 925 static const std::string kLiveSstFilesSizeAtTemperature; 926 927 // "rocksdb.base-level" - returns number of level to which L0 data will be 928 // compacted. 929 static const std::string kBaseLevel; 930 931 // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total 932 // number of bytes compaction needs to rewrite to get all levels down 933 // to under target size. Not valid for other compactions than level- 934 // based. 935 static const std::string kEstimatePendingCompactionBytes; 936 937 // "rocksdb.aggregated-table-properties" - returns a string or map 938 // representation of the aggregated table properties of the target 939 // column family. Only properties that make sense for aggregation 940 // are included. 941 static const std::string kAggregatedTableProperties; 942 943 // "rocksdb.aggregated-table-properties-at-level<N>", same as the previous 944 // one but only returns the aggregated table properties of the 945 // specified level "N" at the target column family. 946 static const std::string kAggregatedTablePropertiesAtLevel; 947 948 // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed 949 // write rate. 0 means no delay. 950 static const std::string kActualDelayedWriteRate; 951 952 // "rocksdb.is-write-stopped" - Return 1 if write has been stopped. 953 static const std::string kIsWriteStopped; 954 955 // "rocksdb.estimate-oldest-key-time" - returns an estimation of 956 // oldest key timestamp in the DB. Currently only available for 957 // FIFO compaction with 958 // compaction_options_fifo.allow_compaction = false. 959 static const std::string kEstimateOldestKeyTime; 960 961 // "rocksdb.block-cache-capacity" - returns block cache capacity. 962 static const std::string kBlockCacheCapacity; 963 964 // "rocksdb.block-cache-usage" - returns the memory size for the entries 965 // residing in block cache. 966 static const std::string kBlockCacheUsage; 967 968 // "rocksdb.block-cache-pinned-usage" - returns the memory size for the 969 // entries being pinned. 970 static const std::string kBlockCachePinnedUsage; 971 972 // "rocksdb.options-statistics" - returns multi-line string 973 // of options.statistics 974 static const std::string kOptionsStatistics; 975 976 // "rocksdb.num-blob-files" - returns number of blob files in the current 977 // version. 978 static const std::string kNumBlobFiles; 979 980 // "rocksdb.blob-stats" - return the total number and size of all blob 981 // files, and total amount of garbage (bytes) in the blob files in 982 // the current version. 983 static const std::string kBlobStats; 984 985 // "rocksdb.total-blob-file-size" - returns the total size of all blob 986 // files over all versions. 987 static const std::string kTotalBlobFileSize; 988 989 // "rocksdb.live-blob-file-size" - returns the total size of all blob 990 // files in the current version. 991 static const std::string kLiveBlobFileSize; 992 }; 993 #endif /* ROCKSDB_LITE */ 994 995 // DB implementations export properties about their state via this method. 996 // If "property" is a valid "string" property understood by this DB 997 // implementation (see Properties struct above for valid options), fills 998 // "*value" with its current value and returns true. Otherwise, returns 999 // false. 1000 virtual bool GetProperty(ColumnFamilyHandle* column_family, 1001 const Slice& property, std::string* value) = 0; GetProperty(const Slice & property,std::string * value)1002 virtual bool GetProperty(const Slice& property, std::string* value) { 1003 return GetProperty(DefaultColumnFamily(), property, value); 1004 } 1005 1006 // Like GetProperty but for valid "map" properties. (Some properties can be 1007 // accessed as either "string" properties or "map" properties.) 1008 virtual bool GetMapProperty(ColumnFamilyHandle* column_family, 1009 const Slice& property, 1010 std::map<std::string, std::string>* value) = 0; GetMapProperty(const Slice & property,std::map<std::string,std::string> * value)1011 virtual bool GetMapProperty(const Slice& property, 1012 std::map<std::string, std::string>* value) { 1013 return GetMapProperty(DefaultColumnFamily(), property, value); 1014 } 1015 1016 // Similar to GetProperty(), but only works for a subset of properties whose 1017 // return value is an integer. Return the value by integer. Supported 1018 // properties: 1019 // "rocksdb.num-immutable-mem-table" 1020 // "rocksdb.mem-table-flush-pending" 1021 // "rocksdb.compaction-pending" 1022 // "rocksdb.background-errors" 1023 // "rocksdb.cur-size-active-mem-table" 1024 // "rocksdb.cur-size-all-mem-tables" 1025 // "rocksdb.size-all-mem-tables" 1026 // "rocksdb.num-entries-active-mem-table" 1027 // "rocksdb.num-entries-imm-mem-tables" 1028 // "rocksdb.num-deletes-active-mem-table" 1029 // "rocksdb.num-deletes-imm-mem-tables" 1030 // "rocksdb.estimate-num-keys" 1031 // "rocksdb.estimate-table-readers-mem" 1032 // "rocksdb.is-file-deletions-enabled" 1033 // "rocksdb.num-snapshots" 1034 // "rocksdb.oldest-snapshot-time" 1035 // "rocksdb.num-live-versions" 1036 // "rocksdb.current-super-version-number" 1037 // "rocksdb.estimate-live-data-size" 1038 // "rocksdb.min-log-number-to-keep" 1039 // "rocksdb.min-obsolete-sst-number-to-keep" 1040 // "rocksdb.total-sst-files-size" 1041 // "rocksdb.live-sst-files-size" 1042 // "rocksdb.base-level" 1043 // "rocksdb.estimate-pending-compaction-bytes" 1044 // "rocksdb.num-running-compactions" 1045 // "rocksdb.num-running-flushes" 1046 // "rocksdb.actual-delayed-write-rate" 1047 // "rocksdb.is-write-stopped" 1048 // "rocksdb.estimate-oldest-key-time" 1049 // "rocksdb.block-cache-capacity" 1050 // "rocksdb.block-cache-usage" 1051 // "rocksdb.block-cache-pinned-usage" 1052 // 1053 // Properties dedicated for BlobDB: 1054 // "rocksdb.num-blob-files" 1055 // "rocksdb.total-blob-file-size" 1056 // "rocksdb.live-blob-file-size" 1057 virtual bool GetIntProperty(ColumnFamilyHandle* column_family, 1058 const Slice& property, uint64_t* value) = 0; GetIntProperty(const Slice & property,uint64_t * value)1059 virtual bool GetIntProperty(const Slice& property, uint64_t* value) { 1060 return GetIntProperty(DefaultColumnFamily(), property, value); 1061 } 1062 1063 // Reset internal stats for DB and all column families. 1064 // Note this doesn't reset options.statistics as it is not owned by 1065 // DB. ResetStats()1066 virtual Status ResetStats() { 1067 return Status::NotSupported("Not implemented"); 1068 } 1069 1070 // Same as GetIntProperty(), but this one returns the aggregated int 1071 // property from all column families. 1072 virtual bool GetAggregatedIntProperty(const Slice& property, 1073 uint64_t* value) = 0; 1074 1075 // Flags for DB::GetSizeApproximation that specify whether memtable 1076 // stats should be included, or file stats approximation or both 1077 enum SizeApproximationFlags : uint8_t { 1078 NONE = 0, 1079 INCLUDE_MEMTABLES = 1 << 0, 1080 INCLUDE_FILES = 1 << 1 1081 }; 1082 1083 // For each i in [0,n-1], store in "sizes[i]", the approximate 1084 // file system space used by keys in "[range[i].start .. range[i].limit)" 1085 // in a single column family. 1086 // 1087 // Note that the returned sizes measure file system space usage, so 1088 // if the user data compresses by a factor of ten, the returned 1089 // sizes will be one-tenth the size of the corresponding user data size. 1090 virtual Status GetApproximateSizes(const SizeApproximationOptions& options, 1091 ColumnFamilyHandle* column_family, 1092 const Range* ranges, int n, 1093 uint64_t* sizes) = 0; 1094 1095 // Simpler versions of the GetApproximateSizes() method above. 1096 // The include_flags argument must of type DB::SizeApproximationFlags 1097 // and can not be NONE. 1098 virtual Status GetApproximateSizes(ColumnFamilyHandle* column_family, 1099 const Range* ranges, int n, 1100 uint64_t* sizes, 1101 uint8_t include_flags = INCLUDE_FILES) { 1102 SizeApproximationOptions options; 1103 options.include_memtabtles = 1104 (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0; 1105 options.include_files = 1106 (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0; 1107 return GetApproximateSizes(options, column_family, ranges, n, sizes); 1108 } 1109 virtual Status GetApproximateSizes(const Range* ranges, int n, 1110 uint64_t* sizes, 1111 uint8_t include_flags = INCLUDE_FILES) { 1112 return GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes, 1113 include_flags); 1114 } 1115 1116 // The method is similar to GetApproximateSizes, except it 1117 // returns approximate number of records in memtables. 1118 virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, 1119 const Range& range, 1120 uint64_t* const count, 1121 uint64_t* const size) = 0; GetApproximateMemTableStats(const Range & range,uint64_t * const count,uint64_t * const size)1122 virtual void GetApproximateMemTableStats(const Range& range, 1123 uint64_t* const count, 1124 uint64_t* const size) { 1125 GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size); 1126 } 1127 1128 // Deprecated versions of GetApproximateSizes GetApproximateSizes(const Range * range,int n,uint64_t * sizes,bool include_memtable)1129 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( 1130 const Range* range, int n, uint64_t* sizes, bool include_memtable) { 1131 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; 1132 if (include_memtable) { 1133 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; 1134 } 1135 GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags); 1136 } GetApproximateSizes(ColumnFamilyHandle * column_family,const Range * range,int n,uint64_t * sizes,bool include_memtable)1137 ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( 1138 ColumnFamilyHandle* column_family, const Range* range, int n, 1139 uint64_t* sizes, bool include_memtable) { 1140 uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; 1141 if (include_memtable) { 1142 include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; 1143 } 1144 GetApproximateSizes(column_family, range, n, sizes, include_flags); 1145 } 1146 1147 // Compact the underlying storage for the key range [*begin,*end]. 1148 // The actual compaction interval might be superset of [*begin, *end]. 1149 // In particular, deleted and overwritten versions are discarded, 1150 // and the data is rearranged to reduce the cost of operations 1151 // needed to access the data. This operation should typically only 1152 // be invoked by users who understand the underlying implementation. 1153 // This call blocks until the operation completes successfully, fails, 1154 // or is aborted (Status::Incomplete). See DisableManualCompaction. 1155 // 1156 // begin==nullptr is treated as a key before all keys in the database. 1157 // end==nullptr is treated as a key after all keys in the database. 1158 // Therefore the following call will compact the entire database: 1159 // db->CompactRange(options, nullptr, nullptr); 1160 // Note that after the entire database is compacted, all data are pushed 1161 // down to the last level containing any data. If the total data size after 1162 // compaction is reduced, that level might not be appropriate for hosting all 1163 // the files. In this case, client could set options.change_level to true, to 1164 // move the files back to the minimum level capable of holding the data set 1165 // or a given level (specified by non-negative options.target_level). 1166 virtual Status CompactRange(const CompactRangeOptions& options, 1167 ColumnFamilyHandle* column_family, 1168 const Slice* begin, const Slice* end) = 0; CompactRange(const CompactRangeOptions & options,const Slice * begin,const Slice * end)1169 virtual Status CompactRange(const CompactRangeOptions& options, 1170 const Slice* begin, const Slice* end) { 1171 return CompactRange(options, DefaultColumnFamily(), begin, end); 1172 } 1173 1174 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( 1175 ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end, 1176 bool change_level = false, int target_level = -1, 1177 uint32_t target_path_id = 0) { 1178 CompactRangeOptions options; 1179 options.change_level = change_level; 1180 options.target_level = target_level; 1181 options.target_path_id = target_path_id; 1182 return CompactRange(options, column_family, begin, end); 1183 } 1184 1185 ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( 1186 const Slice* begin, const Slice* end, bool change_level = false, 1187 int target_level = -1, uint32_t target_path_id = 0) { 1188 CompactRangeOptions options; 1189 options.change_level = change_level; 1190 options.target_level = target_level; 1191 options.target_path_id = target_path_id; 1192 return CompactRange(options, DefaultColumnFamily(), begin, end); 1193 } 1194 SetOptions(ColumnFamilyHandle *,const std::unordered_map<std::string,std::string> &)1195 virtual Status SetOptions( 1196 ColumnFamilyHandle* /*column_family*/, 1197 const std::unordered_map<std::string, std::string>& /*new_options*/) { 1198 return Status::NotSupported("Not implemented"); 1199 } SetOptions(const std::unordered_map<std::string,std::string> & new_options)1200 virtual Status SetOptions( 1201 const std::unordered_map<std::string, std::string>& new_options) { 1202 return SetOptions(DefaultColumnFamily(), new_options); 1203 } 1204 1205 virtual Status SetDBOptions( 1206 const std::unordered_map<std::string, std::string>& new_options) = 0; 1207 1208 // CompactFiles() inputs a list of files specified by file numbers and 1209 // compacts them to the specified level. A small difference compared to 1210 // CompactRange() is that CompactFiles() performs the compaction job 1211 // using the CURRENT thread, so is not considered a "background" job. 1212 // 1213 // @see GetDataBaseMetaData 1214 // @see GetColumnFamilyMetaData 1215 virtual Status CompactFiles( 1216 const CompactionOptions& compact_options, 1217 ColumnFamilyHandle* column_family, 1218 const std::vector<std::string>& input_file_names, const int output_level, 1219 const int output_path_id = -1, 1220 std::vector<std::string>* const output_file_names = nullptr, 1221 CompactionJobInfo* compaction_job_info = nullptr) = 0; 1222 1223 virtual Status CompactFiles( 1224 const CompactionOptions& compact_options, 1225 const std::vector<std::string>& input_file_names, const int output_level, 1226 const int output_path_id = -1, 1227 std::vector<std::string>* const output_file_names = nullptr, 1228 CompactionJobInfo* compaction_job_info = nullptr) { 1229 return CompactFiles(compact_options, DefaultColumnFamily(), 1230 input_file_names, output_level, output_path_id, 1231 output_file_names, compaction_job_info); 1232 } 1233 1234 // This function will wait until all currently running background processes 1235 // finish. After it returns, no background process will be run until 1236 // ContinueBackgroundWork is called, once for each preceding OK-returning 1237 // call to PauseBackgroundWork. 1238 virtual Status PauseBackgroundWork() = 0; 1239 virtual Status ContinueBackgroundWork() = 0; 1240 1241 // This function will enable automatic compactions for the given column 1242 // families if they were previously disabled. The function will first set the 1243 // disable_auto_compactions option for each column family to 'false', after 1244 // which it will schedule a flush/compaction. 1245 // 1246 // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API 1247 // does NOT schedule a flush/compaction afterwards, and only changes the 1248 // parameter itself within the column family option. 1249 // 1250 virtual Status EnableAutoCompaction( 1251 const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0; 1252 1253 // After this function call, CompactRange() or CompactFiles() will not 1254 // run compactions and fail. Calling this function will tell outstanding 1255 // manual compactions to abort and will wait for them to finish or abort 1256 // before returning. 1257 virtual void DisableManualCompaction() = 0; 1258 // Re-enable CompactRange() and ComapctFiles() that are disabled by 1259 // DisableManualCompaction(). This function must be called as many times 1260 // as DisableManualCompaction() has been called in order to re-enable 1261 // manual compactions, and must not be called more times than 1262 // DisableManualCompaction() has been called. 1263 virtual void EnableManualCompaction() = 0; 1264 1265 // Number of levels used for this DB. 1266 virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0; NumberLevels()1267 virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); } 1268 1269 // Maximum level to which a new compacted memtable is pushed if it 1270 // does not create overlap. 1271 virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0; MaxMemCompactionLevel()1272 virtual int MaxMemCompactionLevel() { 1273 return MaxMemCompactionLevel(DefaultColumnFamily()); 1274 } 1275 1276 // Number of files in level-0 that would stop writes. 1277 virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0; Level0StopWriteTrigger()1278 virtual int Level0StopWriteTrigger() { 1279 return Level0StopWriteTrigger(DefaultColumnFamily()); 1280 } 1281 1282 // Get DB name -- the exact same name that was provided as an argument to 1283 // DB::Open() 1284 virtual const std::string& GetName() const = 0; 1285 1286 // Get Env object from the DB 1287 virtual Env* GetEnv() const = 0; 1288 1289 virtual FileSystem* GetFileSystem() const; 1290 1291 // Get DB Options that we use. During the process of opening the 1292 // column family, the options provided when calling DB::Open() or 1293 // DB::CreateColumnFamily() will have been "sanitized" and transformed 1294 // in an implementation-defined manner. 1295 virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0; GetOptions()1296 virtual Options GetOptions() const { 1297 return GetOptions(DefaultColumnFamily()); 1298 } 1299 1300 virtual DBOptions GetDBOptions() const = 0; 1301 1302 // Flush all mem-table data. 1303 // Flush a single column family, even when atomic flush is enabled. To flush 1304 // multiple column families, use Flush(options, column_families). 1305 virtual Status Flush(const FlushOptions& options, 1306 ColumnFamilyHandle* column_family) = 0; Flush(const FlushOptions & options)1307 virtual Status Flush(const FlushOptions& options) { 1308 return Flush(options, DefaultColumnFamily()); 1309 } 1310 // Flushes multiple column families. 1311 // If atomic flush is not enabled, Flush(options, column_families) is 1312 // equivalent to calling Flush(options, column_family) multiple times. 1313 // If atomic flush is enabled, Flush(options, column_families) will flush all 1314 // column families specified in 'column_families' up to the latest sequence 1315 // number at the time when flush is requested. 1316 // Note that RocksDB 5.15 and earlier may not be able to open later versions 1317 // with atomic flush enabled. 1318 virtual Status Flush( 1319 const FlushOptions& options, 1320 const std::vector<ColumnFamilyHandle*>& column_families) = 0; 1321 1322 // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL 1323 // afterwards. FlushWAL(bool)1324 virtual Status FlushWAL(bool /*sync*/) { 1325 return Status::NotSupported("FlushWAL not implemented"); 1326 } 1327 // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the 1328 // same as Write() with sync=true: in the latter case the changes won't be 1329 // visible until the sync is done. 1330 // Currently only works if allow_mmap_writes = false in Options. 1331 virtual Status SyncWAL() = 0; 1332 1333 // Lock the WAL. Also flushes the WAL after locking. LockWAL()1334 virtual Status LockWAL() { 1335 return Status::NotSupported("LockWAL not implemented"); 1336 } 1337 1338 // Unlock the WAL. UnlockWAL()1339 virtual Status UnlockWAL() { 1340 return Status::NotSupported("UnlockWAL not implemented"); 1341 } 1342 1343 // The sequence number of the most recent transaction. 1344 virtual SequenceNumber GetLatestSequenceNumber() const = 0; 1345 1346 // Instructs DB to preserve deletes with sequence numbers >= passed seqnum. 1347 // Has no effect if DBOptions.preserve_deletes is set to false. 1348 // This function assumes that user calls this function with monotonically 1349 // increasing seqnums (otherwise we can't guarantee that a particular delete 1350 // hasn't been already processed); returns true if the value was successfully 1351 // updated, false if user attempted to call if with seqnum <= current value. 1352 virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0; 1353 1354 // Prevent file deletions. Compactions will continue to occur, 1355 // but no obsolete files will be deleted. Calling this multiple 1356 // times have the same effect as calling it once. 1357 virtual Status DisableFileDeletions() = 0; 1358 1359 // Allow compactions to delete obsolete files. 1360 // If force == true, the call to EnableFileDeletions() will guarantee that 1361 // file deletions are enabled after the call, even if DisableFileDeletions() 1362 // was called multiple times before. 1363 // If force == false, EnableFileDeletions will only enable file deletion 1364 // after it's been called at least as many times as DisableFileDeletions(), 1365 // enabling the two methods to be called by two threads concurrently without 1366 // synchronization -- i.e., file deletions will be enabled only after both 1367 // threads call EnableFileDeletions() 1368 virtual Status EnableFileDeletions(bool force = true) = 0; 1369 1370 #ifndef ROCKSDB_LITE 1371 // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup 1372 1373 // Retrieve the list of all files in the database. The files are 1374 // relative to the dbname and are not absolute paths. Despite being relative 1375 // paths, the file names begin with "/". The valid size of the manifest file 1376 // is returned in manifest_file_size. The manifest file is an ever growing 1377 // file, but only the portion specified by manifest_file_size is valid for 1378 // this snapshot. Setting flush_memtable to true does Flush before recording 1379 // the live files. Setting flush_memtable to false is useful when we don't 1380 // want to wait for flush which may have to wait for compaction to complete 1381 // taking an indeterminate time. 1382 // 1383 // In case you have multiple column families, even if flush_memtable is true, 1384 // you still need to call GetSortedWalFiles after GetLiveFiles to compensate 1385 // for new data that arrived to already-flushed column families while other 1386 // column families were flushing 1387 virtual Status GetLiveFiles(std::vector<std::string>&, 1388 uint64_t* manifest_file_size, 1389 bool flush_memtable = true) = 0; 1390 1391 // Retrieve the sorted list of all wal files with earliest file first 1392 virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; 1393 1394 // Retrieve information about the current wal file 1395 // 1396 // Note that the log might have rolled after this call in which case 1397 // the current_log_file would not point to the current log file. 1398 // 1399 // Additionally, for the sake of optimization current_log_file->StartSequence 1400 // would always be set to 0 1401 virtual Status GetCurrentWalFile( 1402 std::unique_ptr<LogFile>* current_log_file) = 0; 1403 1404 // Retrieves the creation time of the oldest file in the DB. 1405 // This API only works if max_open_files = -1, if it is not then 1406 // Status returned is Status::NotSupported() 1407 // The file creation time is set using the env provided to the DB. 1408 // If the DB was created from a very old release then its possible that 1409 // the SST files might not have file_creation_time property and even after 1410 // moving to a newer release its possible that some files never got compacted 1411 // and may not have file_creation_time property. In both the cases 1412 // file_creation_time is considered 0 which means this API will return 1413 // creation_time = 0 as there wouldn't be a timestamp lower than 0. 1414 virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0; 1415 1416 // Note: this API is not yet consistent with WritePrepared transactions. 1417 // Sets iter to an iterator that is positioned at a write-batch containing 1418 // seq_number. If the sequence number is non existent, it returns an iterator 1419 // at the first available seq_no after the requested seq_no 1420 // Returns Status::OK if iterator is valid 1421 // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to 1422 // use this api, else the WAL files will get 1423 // cleared aggressively and the iterator might keep getting invalid before 1424 // an update is read. 1425 virtual Status GetUpdatesSince( 1426 SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter, 1427 const TransactionLogIterator::ReadOptions& read_options = 1428 TransactionLogIterator::ReadOptions()) = 0; 1429 1430 // Windows API macro interference 1431 #undef DeleteFile 1432 // WARNING: This API is planned for removal in RocksDB 7.0 since it does not 1433 // operate at the proper level of abstraction for a key-value store, and its 1434 // contract/restrictions are poorly documented. For example, it returns non-OK 1435 // `Status` for non-bottommost files and files undergoing compaction. Since we 1436 // do not plan to maintain it, the contract will likely remain underspecified 1437 // until its removal. Any user is encouraged to read the implementation 1438 // carefully and migrate away from it when possible. 1439 // 1440 // Delete the file name from the db directory and update the internal state to 1441 // reflect that. Supports deletion of sst and log files only. 'name' must be 1442 // path relative to the db directory. eg. 000001.sst, /archive/000003.log 1443 virtual Status DeleteFile(std::string name) = 0; 1444 1445 // Returns a list of all table files with their level, start key 1446 // and end key GetLiveFilesMetaData(std::vector<LiveFileMetaData> *)1447 virtual void GetLiveFilesMetaData( 1448 std::vector<LiveFileMetaData>* /*metadata*/) {} 1449 1450 // Return a list of all table and blob files checksum info. 1451 // Note: This function might be of limited use because it cannot be 1452 // synchronized with GetLiveFiles. 1453 virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0; 1454 1455 // EXPERIMENTAL: This function is not yet feature-complete. 1456 // Get information about all live files that make up a DB, for making 1457 // live copies (Checkpoint, backups, etc.) or other storage-related purposes. 1458 // Use DisableFileDeletions() before and EnableFileDeletions() after to 1459 // preserve the files for live copy. 1460 virtual Status GetLiveFilesStorageInfo( 1461 const LiveFilesStorageInfoOptions& opts, 1462 std::vector<LiveFileStorageInfo>* files) = 0; 1463 1464 // Obtains the meta data of the specified column family of the DB. GetColumnFamilyMetaData(ColumnFamilyHandle *,ColumnFamilyMetaData *)1465 virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, 1466 ColumnFamilyMetaData* /*metadata*/) {} 1467 1468 // Get the metadata of the default column family. GetColumnFamilyMetaData(ColumnFamilyMetaData * metadata)1469 void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) { 1470 GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); 1471 } 1472 1473 // Obtains the meta data of all column families for the DB. 1474 // The returned map contains one entry for each column family indexed by the 1475 // name of the column family. GetAllColumnFamilyMetaData(std::vector<ColumnFamilyMetaData> *)1476 virtual void GetAllColumnFamilyMetaData( 1477 std::vector<ColumnFamilyMetaData>* /*metadata*/) {} 1478 1479 // IngestExternalFile() will load a list of external SST files (1) into the DB 1480 // Two primary modes are supported: 1481 // - Duplicate keys in the new files will overwrite exiting keys (default) 1482 // - Duplicate keys will be skipped (set ingest_behind=true) 1483 // In the first mode we will try to find the lowest possible level that 1484 // the file can fit in, and ingest the file into this level (2). A file that 1485 // have a key range that overlap with the memtable key range will require us 1486 // to Flush the memtable first before ingesting the file. 1487 // In the second mode we will always ingest in the bottom most level (see 1488 // docs to IngestExternalFileOptions::ingest_behind). 1489 // 1490 // (1) External SST files can be created using SstFileWriter 1491 // (2) We will try to ingest the files to the lowest possible level 1492 // even if the file compression doesn't match the level compression 1493 // (3) If IngestExternalFileOptions->ingest_behind is set to true, 1494 // we always ingest at the bottommost level, which should be reserved 1495 // for this purpose (see DBOPtions::allow_ingest_behind flag). 1496 virtual Status IngestExternalFile( 1497 ColumnFamilyHandle* column_family, 1498 const std::vector<std::string>& external_files, 1499 const IngestExternalFileOptions& options) = 0; 1500 IngestExternalFile(const std::vector<std::string> & external_files,const IngestExternalFileOptions & options)1501 virtual Status IngestExternalFile( 1502 const std::vector<std::string>& external_files, 1503 const IngestExternalFileOptions& options) { 1504 return IngestExternalFile(DefaultColumnFamily(), external_files, options); 1505 } 1506 1507 // IngestExternalFiles() will ingest files for multiple column families, and 1508 // record the result atomically to the MANIFEST. 1509 // If this function returns OK, all column families' ingestion must succeed. 1510 // If this function returns NOK, or the process crashes, then non-of the 1511 // files will be ingested into the database after recovery. 1512 // Note that it is possible for application to observe a mixed state during 1513 // the execution of this function. If the user performs range scan over the 1514 // column families with iterators, iterator on one column family may return 1515 // ingested data, while iterator on other column family returns old data. 1516 // Users can use snapshot for a consistent view of data. 1517 // If your db ingests multiple SST files using this API, i.e. args.size() 1518 // > 1, then RocksDB 5.15 and earlier will not be able to open it. 1519 // 1520 // REQUIRES: each arg corresponds to a different column family: namely, for 1521 // 0 <= i < j < len(args), args[i].column_family != args[j].column_family. 1522 virtual Status IngestExternalFiles( 1523 const std::vector<IngestExternalFileArg>& args) = 0; 1524 1525 // CreateColumnFamilyWithImport() will create a new column family with 1526 // column_family_name and import external SST files specified in metadata into 1527 // this column family. 1528 // (1) External SST files can be created using SstFileWriter. 1529 // (2) External SST files can be exported from a particular column family in 1530 // an existing DB using Checkpoint::ExportColumnFamily. 1531 // Option in import_options specifies whether the external files are copied or 1532 // moved (default is copy). When option specifies copy, managing files at 1533 // external_file_path is caller's responsibility. When option specifies a 1534 // move, the call makes a best effort to delete the specified files at 1535 // external_file_path on successful return, logging any failure to delete 1536 // rather than returning in Status. Files are not modified on any error 1537 // return, and a best effort is made to remove any newly-created files. 1538 // On error return, column family handle returned will be nullptr. 1539 // ColumnFamily will be present on successful return and will not be present 1540 // on error return. ColumnFamily may be present on any crash during this call. 1541 virtual Status CreateColumnFamilyWithImport( 1542 const ColumnFamilyOptions& options, const std::string& column_family_name, 1543 const ImportColumnFamilyOptions& import_options, 1544 const ExportImportFilesMetaData& metadata, 1545 ColumnFamilyHandle** handle) = 0; 1546 1547 // Verify the checksums of files in db. Currently the whole-file checksum of 1548 // table files are checked. VerifyFileChecksums(const ReadOptions &)1549 virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) { 1550 return Status::NotSupported("File verification not supported"); 1551 } 1552 1553 // Verify the block checksums of files in db. The block checksums of table 1554 // files are checked. 1555 virtual Status VerifyChecksum(const ReadOptions& read_options) = 0; 1556 VerifyChecksum()1557 virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } 1558 1559 // AddFile() is deprecated, please use IngestExternalFile() 1560 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1561 ColumnFamilyHandle* column_family, 1562 const std::vector<std::string>& file_path_list, bool move_file = false, 1563 bool skip_snapshot_check = false) { 1564 IngestExternalFileOptions ifo; 1565 ifo.move_files = move_file; 1566 ifo.snapshot_consistency = !skip_snapshot_check; 1567 ifo.allow_global_seqno = false; 1568 ifo.allow_blocking_flush = false; 1569 return IngestExternalFile(column_family, file_path_list, ifo); 1570 } 1571 1572 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1573 const std::vector<std::string>& file_path_list, bool move_file = false, 1574 bool skip_snapshot_check = false) { 1575 IngestExternalFileOptions ifo; 1576 ifo.move_files = move_file; 1577 ifo.snapshot_consistency = !skip_snapshot_check; 1578 ifo.allow_global_seqno = false; 1579 ifo.allow_blocking_flush = false; 1580 return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo); 1581 } 1582 1583 // AddFile() is deprecated, please use IngestExternalFile() 1584 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1585 ColumnFamilyHandle* column_family, const std::string& file_path, 1586 bool move_file = false, bool skip_snapshot_check = false) { 1587 IngestExternalFileOptions ifo; 1588 ifo.move_files = move_file; 1589 ifo.snapshot_consistency = !skip_snapshot_check; 1590 ifo.allow_global_seqno = false; 1591 ifo.allow_blocking_flush = false; 1592 return IngestExternalFile(column_family, {file_path}, ifo); 1593 } 1594 1595 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1596 const std::string& file_path, bool move_file = false, 1597 bool skip_snapshot_check = false) { 1598 IngestExternalFileOptions ifo; 1599 ifo.move_files = move_file; 1600 ifo.snapshot_consistency = !skip_snapshot_check; 1601 ifo.allow_global_seqno = false; 1602 ifo.allow_blocking_flush = false; 1603 return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo); 1604 } 1605 1606 // Load table file with information "file_info" into "column_family" 1607 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1608 ColumnFamilyHandle* column_family, 1609 const std::vector<ExternalSstFileInfo>& file_info_list, 1610 bool move_file = false, bool skip_snapshot_check = false) { 1611 std::vector<std::string> external_files; 1612 for (const ExternalSstFileInfo& file_info : file_info_list) { 1613 external_files.push_back(file_info.file_path); 1614 } 1615 IngestExternalFileOptions ifo; 1616 ifo.move_files = move_file; 1617 ifo.snapshot_consistency = !skip_snapshot_check; 1618 ifo.allow_global_seqno = false; 1619 ifo.allow_blocking_flush = false; 1620 return IngestExternalFile(column_family, external_files, ifo); 1621 } 1622 1623 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1624 const std::vector<ExternalSstFileInfo>& file_info_list, 1625 bool move_file = false, bool skip_snapshot_check = false) { 1626 std::vector<std::string> external_files; 1627 for (const ExternalSstFileInfo& file_info : file_info_list) { 1628 external_files.push_back(file_info.file_path); 1629 } 1630 IngestExternalFileOptions ifo; 1631 ifo.move_files = move_file; 1632 ifo.snapshot_consistency = !skip_snapshot_check; 1633 ifo.allow_global_seqno = false; 1634 ifo.allow_blocking_flush = false; 1635 return IngestExternalFile(DefaultColumnFamily(), external_files, ifo); 1636 } 1637 1638 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1639 ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info, 1640 bool move_file = false, bool skip_snapshot_check = false) { 1641 IngestExternalFileOptions ifo; 1642 ifo.move_files = move_file; 1643 ifo.snapshot_consistency = !skip_snapshot_check; 1644 ifo.allow_global_seqno = false; 1645 ifo.allow_blocking_flush = false; 1646 return IngestExternalFile(column_family, {file_info->file_path}, ifo); 1647 } 1648 1649 ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( 1650 const ExternalSstFileInfo* file_info, bool move_file = false, 1651 bool skip_snapshot_check = false) { 1652 IngestExternalFileOptions ifo; 1653 ifo.move_files = move_file; 1654 ifo.snapshot_consistency = !skip_snapshot_check; 1655 ifo.allow_global_seqno = false; 1656 ifo.allow_blocking_flush = false; 1657 return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path}, 1658 ifo); 1659 } 1660 1661 #endif // ROCKSDB_LITE 1662 1663 // Returns the unique ID which is read from IDENTITY file during the opening 1664 // of database by setting in the identity variable 1665 // Returns Status::OK if identity could be set properly 1666 virtual Status GetDbIdentity(std::string& identity) const = 0; 1667 1668 // Return a unique identifier for each DB object that is opened 1669 // This DB session ID should be unique among all open DB instances on all 1670 // hosts, and should be unique among re-openings of the same or other DBs. 1671 // (Two open DBs have the same identity from other function GetDbIdentity when 1672 // one is physically copied from the other.) 1673 virtual Status GetDbSessionId(std::string& session_id) const = 0; 1674 1675 // Returns default column family handle 1676 virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0; 1677 1678 #ifndef ROCKSDB_LITE 1679 1680 virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, 1681 TablePropertiesCollection* props) = 0; GetPropertiesOfAllTables(TablePropertiesCollection * props)1682 virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { 1683 return GetPropertiesOfAllTables(DefaultColumnFamily(), props); 1684 } 1685 virtual Status GetPropertiesOfTablesInRange( 1686 ColumnFamilyHandle* column_family, const Range* range, std::size_t n, 1687 TablePropertiesCollection* props) = 0; 1688 SuggestCompactRange(ColumnFamilyHandle *,const Slice *,const Slice *)1689 virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/, 1690 const Slice* /*begin*/, 1691 const Slice* /*end*/) { 1692 return Status::NotSupported("SuggestCompactRange() is not implemented."); 1693 } 1694 PromoteL0(ColumnFamilyHandle *,int)1695 virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/, 1696 int /*target_level*/) { 1697 return Status::NotSupported("PromoteL0() is not implemented."); 1698 } 1699 1700 // Trace DB operations. Use EndTrace() to stop tracing. StartTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1701 virtual Status StartTrace(const TraceOptions& /*options*/, 1702 std::unique_ptr<TraceWriter>&& /*trace_writer*/) { 1703 return Status::NotSupported("StartTrace() is not implemented."); 1704 } 1705 EndTrace()1706 virtual Status EndTrace() { 1707 return Status::NotSupported("EndTrace() is not implemented."); 1708 } 1709 1710 // IO Tracing operations. Use EndIOTrace() to stop tracing. StartIOTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1711 virtual Status StartIOTrace(const TraceOptions& /*options*/, 1712 std::unique_ptr<TraceWriter>&& /*trace_writer*/) { 1713 return Status::NotSupported("StartIOTrace() is not implemented."); 1714 } 1715 EndIOTrace()1716 virtual Status EndIOTrace() { 1717 return Status::NotSupported("EndIOTrace() is not implemented."); 1718 } 1719 1720 // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing. StartBlockCacheTrace(const TraceOptions &,std::unique_ptr<TraceWriter> &&)1721 virtual Status StartBlockCacheTrace( 1722 const TraceOptions& /*options*/, 1723 std::unique_ptr<TraceWriter>&& /*trace_writer*/) { 1724 return Status::NotSupported("StartBlockCacheTrace() is not implemented."); 1725 } 1726 EndBlockCacheTrace()1727 virtual Status EndBlockCacheTrace() { 1728 return Status::NotSupported("EndBlockCacheTrace() is not implemented."); 1729 } 1730 1731 // Create a default trace replayer. NewDefaultReplayer(const std::vector<ColumnFamilyHandle * > &,std::unique_ptr<TraceReader> &&,std::unique_ptr<Replayer> *)1732 virtual Status NewDefaultReplayer( 1733 const std::vector<ColumnFamilyHandle*>& /*handles*/, 1734 std::unique_ptr<TraceReader>&& /*reader*/, 1735 std::unique_ptr<Replayer>* /*replayer*/) { 1736 return Status::NotSupported("NewDefaultReplayer() is not implemented."); 1737 } 1738 1739 #endif // ROCKSDB_LITE 1740 1741 // Needed for StackableDB GetRootDB()1742 virtual DB* GetRootDB() { return this; } 1743 1744 // Given a window [start_time, end_time), setup a StatsHistoryIterator 1745 // to access stats history. Note the start_time and end_time are epoch 1746 // time measured in seconds, and end_time is an exclusive bound. GetStatsHistory(uint64_t,uint64_t,std::unique_ptr<StatsHistoryIterator> *)1747 virtual Status GetStatsHistory( 1748 uint64_t /*start_time*/, uint64_t /*end_time*/, 1749 std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) { 1750 return Status::NotSupported("GetStatsHistory() is not implemented."); 1751 } 1752 1753 #ifndef ROCKSDB_LITE 1754 // Make the secondary instance catch up with the primary by tailing and 1755 // replaying the MANIFEST and WAL of the primary. 1756 // Column families created by the primary after the secondary instance starts 1757 // will be ignored unless the secondary instance closes and restarts with the 1758 // newly created column families. 1759 // Column families that exist before secondary instance starts and dropped by 1760 // the primary afterwards will be marked as dropped. However, as long as the 1761 // secondary instance does not delete the corresponding column family 1762 // handles, the data of the column family is still accessible to the 1763 // secondary. 1764 // TODO: we will support WAL tailing soon. TryCatchUpWithPrimary()1765 virtual Status TryCatchUpWithPrimary() { 1766 return Status::NotSupported("Supported only by secondary instance"); 1767 } 1768 #endif // !ROCKSDB_LITE 1769 }; 1770 1771 // Destroy the contents of the specified database. 1772 // Be very careful using this method. 1773 Status DestroyDB(const std::string& name, const Options& options, 1774 const std::vector<ColumnFamilyDescriptor>& column_families = 1775 std::vector<ColumnFamilyDescriptor>()); 1776 1777 #ifndef ROCKSDB_LITE 1778 // If a DB cannot be opened, you may attempt to call this method to 1779 // resurrect as much of the contents of the database as possible. 1780 // Some data may be lost, so be careful when calling this function 1781 // on a database that contains important information. 1782 // 1783 // With this API, we will warn and skip data associated with column families not 1784 // specified in column_families. 1785 // 1786 // @param column_families Descriptors for known column families 1787 Status RepairDB(const std::string& dbname, const DBOptions& db_options, 1788 const std::vector<ColumnFamilyDescriptor>& column_families); 1789 1790 // @param unknown_cf_opts Options for column families encountered during the 1791 // repair that were not specified in column_families. 1792 Status RepairDB(const std::string& dbname, const DBOptions& db_options, 1793 const std::vector<ColumnFamilyDescriptor>& column_families, 1794 const ColumnFamilyOptions& unknown_cf_opts); 1795 1796 // @param options These options will be used for the database and for ALL column 1797 // families encountered during the repair 1798 Status RepairDB(const std::string& dbname, const Options& options); 1799 1800 #endif 1801 1802 } // namespace ROCKSDB_NAMESPACE 1803