1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 package org.rocksdb; 7 8 import java.util.Collection; 9 import java.util.List; 10 11 public interface DBOptionsInterface<T extends DBOptionsInterface<T>> { 12 /** 13 * Use this if your DB is very small (like under 1GB) and you don't want to 14 * spend lots of memory for memtables. 15 * 16 * @return the instance of the current object. 17 */ optimizeForSmallDb()18 T optimizeForSmallDb(); 19 20 /** 21 * Use the specified object to interact with the environment, 22 * e.g. to read/write files, schedule background work, etc. 23 * Default: {@link Env#getDefault()} 24 * 25 * @param env {@link Env} instance. 26 * @return the instance of the current Options. 27 */ setEnv(final Env env)28 T setEnv(final Env env); 29 30 /** 31 * Returns the set RocksEnv instance. 32 * 33 * @return {@link RocksEnv} instance set in the options. 34 */ getEnv()35 Env getEnv(); 36 37 /** 38 * <p>By default, RocksDB uses only one background thread for flush and 39 * compaction. Calling this function will set it up such that total of 40 * `total_threads` is used.</p> 41 * 42 * <p>You almost definitely want to call this function if your system is 43 * bottlenecked by RocksDB.</p> 44 * 45 * @param totalThreads The total number of threads to be used by RocksDB. 46 * A good value is the number of cores. 47 * 48 * @return the instance of the current Options 49 */ setIncreaseParallelism(int totalThreads)50 T setIncreaseParallelism(int totalThreads); 51 52 /** 53 * If this value is set to true, then the database will be created 54 * if it is missing during {@code RocksDB.open()}. 55 * Default: false 56 * 57 * @param flag a flag indicating whether to create a database the 58 * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation 59 * is missing. 60 * @return the instance of the current Options 61 * @see RocksDB#open(org.rocksdb.Options, String) 62 */ setCreateIfMissing(boolean flag)63 T setCreateIfMissing(boolean flag); 64 65 /** 66 * Return true if the create_if_missing flag is set to true. 67 * If true, the database will be created if it is missing. 68 * 69 * @return true if the createIfMissing option is set to true. 70 * @see #setCreateIfMissing(boolean) 71 */ createIfMissing()72 boolean createIfMissing(); 73 74 /** 75 * <p>If true, missing column families will be automatically created</p> 76 * 77 * <p>Default: false</p> 78 * 79 * @param flag a flag indicating if missing column families shall be 80 * created automatically. 81 * @return true if missing column families shall be created automatically 82 * on open. 83 */ setCreateMissingColumnFamilies(boolean flag)84 T setCreateMissingColumnFamilies(boolean flag); 85 86 /** 87 * Return true if the create_missing_column_families flag is set 88 * to true. If true column families be created if missing. 89 * 90 * @return true if the createMissingColumnFamilies is set to 91 * true. 92 * @see #setCreateMissingColumnFamilies(boolean) 93 */ createMissingColumnFamilies()94 boolean createMissingColumnFamilies(); 95 96 /** 97 * If true, an error will be thrown during RocksDB.open() if the 98 * database already exists. 99 * Default: false 100 * 101 * @param errorIfExists if true, an exception will be thrown 102 * during {@code RocksDB.open()} if the database already exists. 103 * @return the reference to the current option. 104 * @see RocksDB#open(org.rocksdb.Options, String) 105 */ setErrorIfExists(boolean errorIfExists)106 T setErrorIfExists(boolean errorIfExists); 107 108 /** 109 * If true, an error will be thrown during RocksDB.open() if the 110 * database already exists. 111 * 112 * @return if true, an error is raised when the specified database 113 * already exists before open. 114 */ errorIfExists()115 boolean errorIfExists(); 116 117 /** 118 * If true, the implementation will do aggressive checking of the 119 * data it is processing and will stop early if it detects any 120 * errors. This may have unforeseen ramifications: for example, a 121 * corruption of one DB entry may cause a large number of entries to 122 * become unreadable or for the entire DB to become unopenable. 123 * If any of the writes to the database fails (Put, Delete, Merge, Write), 124 * the database will switch to read-only mode and fail all other 125 * Write operations. 126 * Default: true 127 * 128 * @param paranoidChecks a flag to indicate whether paranoid-check 129 * is on. 130 * @return the reference to the current option. 131 */ setParanoidChecks(boolean paranoidChecks)132 T setParanoidChecks(boolean paranoidChecks); 133 134 /** 135 * If true, the implementation will do aggressive checking of the 136 * data it is processing and will stop early if it detects any 137 * errors. This may have unforeseen ramifications: for example, a 138 * corruption of one DB entry may cause a large number of entries to 139 * become unreadable or for the entire DB to become unopenable. 140 * If any of the writes to the database fails (Put, Delete, Merge, Write), 141 * the database will switch to read-only mode and fail all other 142 * Write operations. 143 * 144 * @return a boolean indicating whether paranoid-check is on. 145 */ paranoidChecks()146 boolean paranoidChecks(); 147 148 /** 149 * Use to control write rate of flush and compaction. Flush has higher 150 * priority than compaction. Rate limiting is disabled if nullptr. 151 * Default: nullptr 152 * 153 * @param rateLimiter {@link org.rocksdb.RateLimiter} instance. 154 * @return the instance of the current object. 155 * 156 * @since 3.10.0 157 */ setRateLimiter(RateLimiter rateLimiter)158 T setRateLimiter(RateLimiter rateLimiter); 159 160 /** 161 * Use to track SST files and control their file deletion rate. 162 * 163 * Features: 164 * - Throttle the deletion rate of the SST files. 165 * - Keep track the total size of all SST files. 166 * - Set a maximum allowed space limit for SST files that when reached 167 * the DB wont do any further flushes or compactions and will set the 168 * background error. 169 * - Can be shared between multiple dbs. 170 * 171 * Limitations: 172 * - Only track and throttle deletes of SST files in 173 * first db_path (db_name if db_paths is empty). 174 * 175 * @param sstFileManager The SST File Manager for the db. 176 * @return the instance of the current object. 177 */ setSstFileManager(SstFileManager sstFileManager)178 T setSstFileManager(SstFileManager sstFileManager); 179 180 /** 181 * <p>Any internal progress/error information generated by 182 * the db will be written to the Logger if it is non-nullptr, 183 * or to a file stored in the same directory as the DB 184 * contents if info_log is nullptr.</p> 185 * 186 * <p>Default: nullptr</p> 187 * 188 * @param logger {@link Logger} instance. 189 * @return the instance of the current object. 190 */ setLogger(Logger logger)191 T setLogger(Logger logger); 192 193 /** 194 * <p>Sets the RocksDB log level. Default level is INFO</p> 195 * 196 * @param infoLogLevel log level to set. 197 * @return the instance of the current object. 198 */ setInfoLogLevel(InfoLogLevel infoLogLevel)199 T setInfoLogLevel(InfoLogLevel infoLogLevel); 200 201 /** 202 * <p>Returns currently set log level.</p> 203 * @return {@link org.rocksdb.InfoLogLevel} instance. 204 */ infoLogLevel()205 InfoLogLevel infoLogLevel(); 206 207 /** 208 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open 209 * all files on DB::Open(). You can use this option to increase the number 210 * of threads used to open the files. 211 * 212 * Default: 16 213 * 214 * @param maxFileOpeningThreads the maximum number of threads to use to 215 * open files 216 * 217 * @return the reference to the current options. 218 */ setMaxFileOpeningThreads(int maxFileOpeningThreads)219 T setMaxFileOpeningThreads(int maxFileOpeningThreads); 220 221 /** 222 * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all 223 * files on DB::Open(). You can use this option to increase the number of 224 * threads used to open the files. 225 * 226 * Default: 16 227 * 228 * @return the maximum number of threads to use to open files 229 */ maxFileOpeningThreads()230 int maxFileOpeningThreads(); 231 232 /** 233 * <p>Sets the statistics object which collects metrics about database operations. 234 * Statistics objects should not be shared between DB instances as 235 * it does not use any locks to prevent concurrent updates.</p> 236 * 237 * @param statistics The statistics to set 238 * 239 * @return the instance of the current object. 240 * 241 * @see RocksDB#open(org.rocksdb.Options, String) 242 */ setStatistics(final Statistics statistics)243 T setStatistics(final Statistics statistics); 244 245 /** 246 * <p>Returns statistics object.</p> 247 * 248 * @return the instance of the statistics object or null if there is no 249 * statistics object. 250 * 251 * @see #setStatistics(Statistics) 252 */ statistics()253 Statistics statistics(); 254 255 /** 256 * <p>If true, then every store to stable storage will issue a fsync.</p> 257 * <p>If false, then every store to stable storage will issue a fdatasync. 258 * This parameter should be set to true while storing data to 259 * filesystem like ext3 that can lose files after a reboot.</p> 260 * <p>Default: false</p> 261 * 262 * @param useFsync a boolean flag to specify whether to use fsync 263 * @return the instance of the current object. 264 */ setUseFsync(boolean useFsync)265 T setUseFsync(boolean useFsync); 266 267 /** 268 * <p>If true, then every store to stable storage will issue a fsync.</p> 269 * <p>If false, then every store to stable storage will issue a fdatasync. 270 * This parameter should be set to true while storing data to 271 * filesystem like ext3 that can lose files after a reboot.</p> 272 * 273 * @return boolean value indicating if fsync is used. 274 */ useFsync()275 boolean useFsync(); 276 277 /** 278 * A list of paths where SST files can be put into, with its target size. 279 * Newer data is placed into paths specified earlier in the vector while 280 * older data gradually moves to paths specified later in the vector. 281 * 282 * For example, you have a flash device with 10GB allocated for the DB, 283 * as well as a hard drive of 2TB, you should config it to be: 284 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] 285 * 286 * The system will try to guarantee data under each path is close to but 287 * not larger than the target size. But current and future file sizes used 288 * by determining where to place a file are based on best-effort estimation, 289 * which means there is a chance that the actual size under the directory 290 * is slightly more than target size under some workloads. User should give 291 * some buffer room for those cases. 292 * 293 * If none of the paths has sufficient room to place a file, the file will 294 * be placed to the last path anyway, despite to the target size. 295 * 296 * Placing newer data to earlier paths is also best-efforts. User should 297 * expect user files to be placed in higher levels in some extreme cases. 298 * 299 * If left empty, only one path will be used, which is db_name passed when 300 * opening the DB. 301 * 302 * Default: empty 303 * 304 * @param dbPaths the paths and target sizes 305 * 306 * @return the reference to the current options 307 */ setDbPaths(final Collection<DbPath> dbPaths)308 T setDbPaths(final Collection<DbPath> dbPaths); 309 310 /** 311 * A list of paths where SST files can be put into, with its target size. 312 * Newer data is placed into paths specified earlier in the vector while 313 * older data gradually moves to paths specified later in the vector. 314 * 315 * For example, you have a flash device with 10GB allocated for the DB, 316 * as well as a hard drive of 2TB, you should config it to be: 317 * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] 318 * 319 * The system will try to guarantee data under each path is close to but 320 * not larger than the target size. But current and future file sizes used 321 * by determining where to place a file are based on best-effort estimation, 322 * which means there is a chance that the actual size under the directory 323 * is slightly more than target size under some workloads. User should give 324 * some buffer room for those cases. 325 * 326 * If none of the paths has sufficient room to place a file, the file will 327 * be placed to the last path anyway, despite to the target size. 328 * 329 * Placing newer data to earlier paths is also best-efforts. User should 330 * expect user files to be placed in higher levels in some extreme cases. 331 * 332 * If left empty, only one path will be used, which is db_name passed when 333 * opening the DB. 334 * 335 * Default: {@link java.util.Collections#emptyList()} 336 * 337 * @return dbPaths the paths and target sizes 338 */ dbPaths()339 List<DbPath> dbPaths(); 340 341 /** 342 * This specifies the info LOG dir. 343 * If it is empty, the log files will be in the same dir as data. 344 * If it is non empty, the log files will be in the specified dir, 345 * and the db data dir's absolute path will be used as the log file 346 * name's prefix. 347 * 348 * @param dbLogDir the path to the info log directory 349 * @return the instance of the current object. 350 */ setDbLogDir(String dbLogDir)351 T setDbLogDir(String dbLogDir); 352 353 /** 354 * Returns the directory of info log. 355 * 356 * If it is empty, the log files will be in the same dir as data. 357 * If it is non empty, the log files will be in the specified dir, 358 * and the db data dir's absolute path will be used as the log file 359 * name's prefix. 360 * 361 * @return the path to the info log directory 362 */ dbLogDir()363 String dbLogDir(); 364 365 /** 366 * This specifies the absolute dir path for write-ahead logs (WAL). 367 * If it is empty, the log files will be in the same dir as data, 368 * dbname is used as the data dir by default 369 * If it is non empty, the log files will be in kept the specified dir. 370 * When destroying the db, 371 * all log files in wal_dir and the dir itself is deleted 372 * 373 * @param walDir the path to the write-ahead-log directory. 374 * @return the instance of the current object. 375 */ setWalDir(String walDir)376 T setWalDir(String walDir); 377 378 /** 379 * Returns the path to the write-ahead-logs (WAL) directory. 380 * 381 * If it is empty, the log files will be in the same dir as data, 382 * dbname is used as the data dir by default 383 * If it is non empty, the log files will be in kept the specified dir. 384 * When destroying the db, 385 * all log files in wal_dir and the dir itself is deleted 386 * 387 * @return the path to the write-ahead-logs (WAL) directory. 388 */ walDir()389 String walDir(); 390 391 /** 392 * The periodicity when obsolete files get deleted. The default 393 * value is 6 hours. The files that get out of scope by compaction 394 * process will still get automatically delete on every compaction, 395 * regardless of this setting 396 * 397 * @param micros the time interval in micros 398 * @return the instance of the current object. 399 */ setDeleteObsoleteFilesPeriodMicros(long micros)400 T setDeleteObsoleteFilesPeriodMicros(long micros); 401 402 /** 403 * The periodicity when obsolete files get deleted. The default 404 * value is 6 hours. The files that get out of scope by compaction 405 * process will still get automatically delete on every compaction, 406 * regardless of this setting 407 * 408 * @return the time interval in micros when obsolete files will be deleted. 409 */ deleteObsoleteFilesPeriodMicros()410 long deleteObsoleteFilesPeriodMicros(); 411 412 /** 413 * This value represents the maximum number of threads that will 414 * concurrently perform a compaction job by breaking it into multiple, 415 * smaller ones that are run simultaneously. 416 * Default: 1 (i.e. no subcompactions) 417 * 418 * @param maxSubcompactions The maximum number of threads that will 419 * concurrently perform a compaction job 420 * 421 * @return the instance of the current object. 422 */ setMaxSubcompactions(int maxSubcompactions)423 T setMaxSubcompactions(int maxSubcompactions); 424 425 /** 426 * This value represents the maximum number of threads that will 427 * concurrently perform a compaction job by breaking it into multiple, 428 * smaller ones that are run simultaneously. 429 * Default: 1 (i.e. no subcompactions) 430 * 431 * @return The maximum number of threads that will concurrently perform a 432 * compaction job 433 */ maxSubcompactions()434 int maxSubcompactions(); 435 436 /** 437 * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the 438 * value of max_background_jobs. For backwards compatibility we will set 439 * `max_background_jobs = max_background_compactions + max_background_flushes` 440 * in the case where user sets at least one of `max_background_compactions` or 441 * `max_background_flushes`. 442 * 443 * Specifies the maximum number of concurrent background flush jobs. 444 * If you're increasing this, also consider increasing number of threads in 445 * HIGH priority thread pool. For more information, see 446 * Default: -1 447 * 448 * @param maxBackgroundFlushes number of max concurrent flush jobs 449 * @return the instance of the current object. 450 * 451 * @see RocksEnv#setBackgroundThreads(int) 452 * @see RocksEnv#setBackgroundThreads(int, Priority) 453 * @see MutableDBOptionsInterface#maxBackgroundCompactions() 454 * 455 * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)} 456 */ 457 @Deprecated setMaxBackgroundFlushes(int maxBackgroundFlushes)458 T setMaxBackgroundFlushes(int maxBackgroundFlushes); 459 460 /** 461 * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the 462 * value of max_background_jobs. For backwards compatibility we will set 463 * `max_background_jobs = max_background_compactions + max_background_flushes` 464 * in the case where user sets at least one of `max_background_compactions` or 465 * `max_background_flushes`. 466 * 467 * Returns the maximum number of concurrent background flush jobs. 468 * If you're increasing this, also consider increasing number of threads in 469 * HIGH priority thread pool. For more information, see 470 * Default: -1 471 * 472 * @return the maximum number of concurrent background flush jobs. 473 * @see RocksEnv#setBackgroundThreads(int) 474 * @see RocksEnv#setBackgroundThreads(int, Priority) 475 */ 476 @Deprecated maxBackgroundFlushes()477 int maxBackgroundFlushes(); 478 479 /** 480 * Specifies the maximum size of a info log file. If the current log file 481 * is larger than `max_log_file_size`, a new info log file will 482 * be created. 483 * If 0, all logs will be written to one log file. 484 * 485 * @param maxLogFileSize the maximum size of a info log file. 486 * @return the instance of the current object. 487 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms 488 * while overflowing the underlying platform specific value. 489 */ setMaxLogFileSize(long maxLogFileSize)490 T setMaxLogFileSize(long maxLogFileSize); 491 492 /** 493 * Returns the maximum size of a info log file. If the current log file 494 * is larger than this size, a new info log file will be created. 495 * If 0, all logs will be written to one log file. 496 * 497 * @return the maximum size of the info log file. 498 */ maxLogFileSize()499 long maxLogFileSize(); 500 501 /** 502 * Specifies the time interval for the info log file to roll (in seconds). 503 * If specified with non-zero value, log file will be rolled 504 * if it has been active longer than `log_file_time_to_roll`. 505 * Default: 0 (disabled) 506 * 507 * @param logFileTimeToRoll the time interval in seconds. 508 * @return the instance of the current object. 509 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms 510 * while overflowing the underlying platform specific value. 511 */ setLogFileTimeToRoll(long logFileTimeToRoll)512 T setLogFileTimeToRoll(long logFileTimeToRoll); 513 514 /** 515 * Returns the time interval for the info log file to roll (in seconds). 516 * If specified with non-zero value, log file will be rolled 517 * if it has been active longer than `log_file_time_to_roll`. 518 * Default: 0 (disabled) 519 * 520 * @return the time interval in seconds. 521 */ logFileTimeToRoll()522 long logFileTimeToRoll(); 523 524 /** 525 * Specifies the maximum number of info log files to be kept. 526 * Default: 1000 527 * 528 * @param keepLogFileNum the maximum number of info log files to be kept. 529 * @return the instance of the current object. 530 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms 531 * while overflowing the underlying platform specific value. 532 */ setKeepLogFileNum(long keepLogFileNum)533 T setKeepLogFileNum(long keepLogFileNum); 534 535 /** 536 * Returns the maximum number of info log files to be kept. 537 * Default: 1000 538 * 539 * @return the maximum number of info log files to be kept. 540 */ keepLogFileNum()541 long keepLogFileNum(); 542 543 /** 544 * Recycle log files. 545 * 546 * If non-zero, we will reuse previously written log files for new 547 * logs, overwriting the old data. The value indicates how many 548 * such files we will keep around at any point in time for later 549 * use. 550 * 551 * This is more efficient because the blocks are already 552 * allocated and fdatasync does not need to update the inode after 553 * each write. 554 * 555 * Default: 0 556 * 557 * @param recycleLogFileNum the number of log files to keep for recycling 558 * 559 * @return the reference to the current options 560 */ setRecycleLogFileNum(long recycleLogFileNum)561 T setRecycleLogFileNum(long recycleLogFileNum); 562 563 /** 564 * Recycle log files. 565 * 566 * If non-zero, we will reuse previously written log files for new 567 * logs, overwriting the old data. The value indicates how many 568 * such files we will keep around at any point in time for later 569 * use. 570 * 571 * This is more efficient because the blocks are already 572 * allocated and fdatasync does not need to update the inode after 573 * each write. 574 * 575 * Default: 0 576 * 577 * @return the number of log files kept for recycling 578 */ recycleLogFileNum()579 long recycleLogFileNum(); 580 581 /** 582 * Manifest file is rolled over on reaching this limit. 583 * The older manifest file be deleted. 584 * The default value is 1GB so that the manifest file can grow, but not 585 * reach the limit of storage capacity. 586 * 587 * @param maxManifestFileSize the size limit of a manifest file. 588 * @return the instance of the current object. 589 */ setMaxManifestFileSize(long maxManifestFileSize)590 T setMaxManifestFileSize(long maxManifestFileSize); 591 592 /** 593 * Manifest file is rolled over on reaching this limit. 594 * The older manifest file be deleted. 595 * The default value is 1GB so that the manifest file can grow, but not 596 * reach the limit of storage capacity. 597 * 598 * @return the size limit of a manifest file. 599 */ maxManifestFileSize()600 long maxManifestFileSize(); 601 602 /** 603 * Number of shards used for table cache. 604 * 605 * @param tableCacheNumshardbits the number of chards 606 * @return the instance of the current object. 607 */ setTableCacheNumshardbits(int tableCacheNumshardbits)608 T setTableCacheNumshardbits(int tableCacheNumshardbits); 609 610 /** 611 * Number of shards used for table cache. 612 * 613 * @return the number of shards used for table cache. 614 */ tableCacheNumshardbits()615 int tableCacheNumshardbits(); 616 617 /** 618 * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs 619 * will be deleted. 620 * <ol> 621 * <li>If both set to 0, logs will be deleted asap and will not get into 622 * the archive.</li> 623 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, 624 * WAL files will be checked every 10 min and if total size is greater 625 * then WAL_size_limit_MB, they will be deleted starting with the 626 * earliest until size_limit is met. All empty files will be deleted.</li> 627 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then 628 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that 629 * are older than WAL_ttl_seconds will be deleted.</li> 630 * <li>If both are not 0, WAL files will be checked every 10 min and both 631 * checks will be performed with ttl being first.</li> 632 * </ol> 633 * 634 * @param walTtlSeconds the ttl seconds 635 * @return the instance of the current object. 636 * @see #setWalSizeLimitMB(long) 637 */ setWalTtlSeconds(long walTtlSeconds)638 T setWalTtlSeconds(long walTtlSeconds); 639 640 /** 641 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs 642 * will be deleted. 643 * <ol> 644 * <li>If both set to 0, logs will be deleted asap and will not get into 645 * the archive.</li> 646 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, 647 * WAL files will be checked every 10 min and if total size is greater 648 * then WAL_size_limit_MB, they will be deleted starting with the 649 * earliest until size_limit is met. All empty files will be deleted.</li> 650 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then 651 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that 652 * are older than WAL_ttl_seconds will be deleted.</li> 653 * <li>If both are not 0, WAL files will be checked every 10 min and both 654 * checks will be performed with ttl being first.</li> 655 * </ol> 656 * 657 * @return the wal-ttl seconds 658 * @see #walSizeLimitMB() 659 */ walTtlSeconds()660 long walTtlSeconds(); 661 662 /** 663 * WalTtlSeconds() and walSizeLimitMB() affect how archived logs 664 * will be deleted. 665 * <ol> 666 * <li>If both set to 0, logs will be deleted asap and will not get into 667 * the archive.</li> 668 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, 669 * WAL files will be checked every 10 min and if total size is greater 670 * then WAL_size_limit_MB, they will be deleted starting with the 671 * earliest until size_limit is met. All empty files will be deleted.</li> 672 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then 673 * WAL files will be checked every WAL_ttl_secondsi / 2 and those that 674 * are older than WAL_ttl_seconds will be deleted.</li> 675 * <li>If both are not 0, WAL files will be checked every 10 min and both 676 * checks will be performed with ttl being first.</li> 677 * </ol> 678 * 679 * @param sizeLimitMB size limit in mega-bytes. 680 * @return the instance of the current object. 681 * @see #setWalSizeLimitMB(long) 682 */ setWalSizeLimitMB(long sizeLimitMB)683 T setWalSizeLimitMB(long sizeLimitMB); 684 685 /** 686 * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs 687 * will be deleted. 688 * <ol> 689 * <li>If both set to 0, logs will be deleted asap and will not get into 690 * the archive.</li> 691 * <li>If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, 692 * WAL files will be checked every 10 min and if total size is greater 693 * then WAL_size_limit_MB, they will be deleted starting with the 694 * earliest until size_limit is met. All empty files will be deleted.</li> 695 * <li>If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then 696 * WAL files will be checked every WAL_ttl_seconds i / 2 and those that 697 * are older than WAL_ttl_seconds will be deleted.</li> 698 * <li>If both are not 0, WAL files will be checked every 10 min and both 699 * checks will be performed with ttl being first.</li> 700 * </ol> 701 * @return size limit in mega-bytes. 702 * @see #walSizeLimitMB() 703 */ walSizeLimitMB()704 long walSizeLimitMB(); 705 706 /** 707 * Number of bytes to preallocate (via fallocate) the manifest 708 * files. Default is 4mb, which is reasonable to reduce random IO 709 * as well as prevent overallocation for mounts that preallocate 710 * large amounts of data (such as xfs's allocsize option). 711 * 712 * @param size the size in byte 713 * @return the instance of the current object. 714 * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms 715 * while overflowing the underlying platform specific value. 716 */ setManifestPreallocationSize(long size)717 T setManifestPreallocationSize(long size); 718 719 /** 720 * Number of bytes to preallocate (via fallocate) the manifest 721 * files. Default is 4mb, which is reasonable to reduce random IO 722 * as well as prevent overallocation for mounts that preallocate 723 * large amounts of data (such as xfs's allocsize option). 724 * 725 * @return size in bytes. 726 */ manifestPreallocationSize()727 long manifestPreallocationSize(); 728 729 /** 730 * Enable the OS to use direct I/O for reading sst tables. 731 * Default: false 732 * 733 * @param useDirectReads if true, then direct read is enabled 734 * @return the instance of the current object. 735 */ setUseDirectReads(boolean useDirectReads)736 T setUseDirectReads(boolean useDirectReads); 737 738 /** 739 * Enable the OS to use direct I/O for reading sst tables. 740 * Default: false 741 * 742 * @return if true, then direct reads are enabled 743 */ useDirectReads()744 boolean useDirectReads(); 745 746 /** 747 * Enable the OS to use direct reads and writes in flush and 748 * compaction 749 * Default: false 750 * 751 * @param useDirectIoForFlushAndCompaction if true, then direct 752 * I/O will be enabled for background flush and compactions 753 * @return the instance of the current object. 754 */ setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction)755 T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction); 756 757 /** 758 * Enable the OS to use direct reads and writes in flush and 759 * compaction 760 * 761 * @return if true, then direct I/O is enabled for flush and 762 * compaction 763 */ useDirectIoForFlushAndCompaction()764 boolean useDirectIoForFlushAndCompaction(); 765 766 /** 767 * Whether fallocate calls are allowed 768 * 769 * @param allowFAllocate false if fallocate() calls are bypassed 770 * 771 * @return the reference to the current options. 772 */ setAllowFAllocate(boolean allowFAllocate)773 T setAllowFAllocate(boolean allowFAllocate); 774 775 /** 776 * Whether fallocate calls are allowed 777 * 778 * @return false if fallocate() calls are bypassed 779 */ allowFAllocate()780 boolean allowFAllocate(); 781 782 /** 783 * Allow the OS to mmap file for reading sst tables. 784 * Default: false 785 * 786 * @param allowMmapReads true if mmap reads are allowed. 787 * @return the instance of the current object. 788 */ setAllowMmapReads(boolean allowMmapReads)789 T setAllowMmapReads(boolean allowMmapReads); 790 791 /** 792 * Allow the OS to mmap file for reading sst tables. 793 * Default: false 794 * 795 * @return true if mmap reads are allowed. 796 */ allowMmapReads()797 boolean allowMmapReads(); 798 799 /** 800 * Allow the OS to mmap file for writing. Default: false 801 * 802 * @param allowMmapWrites true if mmap writes are allowd. 803 * @return the instance of the current object. 804 */ setAllowMmapWrites(boolean allowMmapWrites)805 T setAllowMmapWrites(boolean allowMmapWrites); 806 807 /** 808 * Allow the OS to mmap file for writing. Default: false 809 * 810 * @return true if mmap writes are allowed. 811 */ allowMmapWrites()812 boolean allowMmapWrites(); 813 814 /** 815 * Disable child process inherit open files. Default: true 816 * 817 * @param isFdCloseOnExec true if child process inheriting open 818 * files is disabled. 819 * @return the instance of the current object. 820 */ setIsFdCloseOnExec(boolean isFdCloseOnExec)821 T setIsFdCloseOnExec(boolean isFdCloseOnExec); 822 823 /** 824 * Disable child process inherit open files. Default: true 825 * 826 * @return true if child process inheriting open files is disabled. 827 */ isFdCloseOnExec()828 boolean isFdCloseOnExec(); 829 830 /** 831 * If set true, will hint the underlying file system that the file 832 * access pattern is random, when a sst file is opened. 833 * Default: true 834 * 835 * @param adviseRandomOnOpen true if hinting random access is on. 836 * @return the instance of the current object. 837 */ setAdviseRandomOnOpen(boolean adviseRandomOnOpen)838 T setAdviseRandomOnOpen(boolean adviseRandomOnOpen); 839 840 /** 841 * If set true, will hint the underlying file system that the file 842 * access pattern is random, when a sst file is opened. 843 * Default: true 844 * 845 * @return true if hinting random access is on. 846 */ adviseRandomOnOpen()847 boolean adviseRandomOnOpen(); 848 849 /** 850 * Amount of data to build up in memtables across all column 851 * families before writing to disk. 852 * 853 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, 854 * which enforces a limit for a single memtable. 855 * 856 * This feature is disabled by default. Specify a non-zero value 857 * to enable it. 858 * 859 * Default: 0 (disabled) 860 * 861 * @param dbWriteBufferSize the size of the write buffer 862 * 863 * @return the reference to the current options. 864 */ setDbWriteBufferSize(long dbWriteBufferSize)865 T setDbWriteBufferSize(long dbWriteBufferSize); 866 867 /** 868 * Use passed {@link WriteBufferManager} to control memory usage across 869 * multiple column families and/or DB instances. 870 * 871 * Check <a href="https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager"> 872 * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager</a> 873 * for more details on when to use it 874 * 875 * @param writeBufferManager The WriteBufferManager to use 876 * @return the reference of the current options. 877 */ setWriteBufferManager(final WriteBufferManager writeBufferManager)878 T setWriteBufferManager(final WriteBufferManager writeBufferManager); 879 880 /** 881 * Reference to {@link WriteBufferManager} used by it. <br> 882 * 883 * Default: null (Disabled) 884 * 885 * @return a reference to WriteBufferManager 886 */ writeBufferManager()887 WriteBufferManager writeBufferManager(); 888 889 /** 890 * Amount of data to build up in memtables across all column 891 * families before writing to disk. 892 * 893 * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, 894 * which enforces a limit for a single memtable. 895 * 896 * This feature is disabled by default. Specify a non-zero value 897 * to enable it. 898 * 899 * Default: 0 (disabled) 900 * 901 * @return the size of the write buffer 902 */ dbWriteBufferSize()903 long dbWriteBufferSize(); 904 905 /** 906 * Specify the file access pattern once a compaction is started. 907 * It will be applied to all input files of a compaction. 908 * 909 * Default: {@link AccessHint#NORMAL} 910 * 911 * @param accessHint The access hint 912 * 913 * @return the reference to the current options. 914 */ setAccessHintOnCompactionStart(final AccessHint accessHint)915 T setAccessHintOnCompactionStart(final AccessHint accessHint); 916 917 /** 918 * Specify the file access pattern once a compaction is started. 919 * It will be applied to all input files of a compaction. 920 * 921 * Default: {@link AccessHint#NORMAL} 922 * 923 * @return The access hint 924 */ accessHintOnCompactionStart()925 AccessHint accessHintOnCompactionStart(); 926 927 /** 928 * If true, always create a new file descriptor and new table reader 929 * for compaction inputs. Turn this parameter on may introduce extra 930 * memory usage in the table reader, if it allocates extra memory 931 * for indexes. This will allow file descriptor prefetch options 932 * to be set for compaction input files and not to impact file 933 * descriptors for the same file used by user queries. 934 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()} 935 * for this mode if using block-based table. 936 * 937 * Default: false 938 * 939 * @param newTableReaderForCompactionInputs true if a new file descriptor and 940 * table reader should be created for compaction inputs 941 * 942 * @return the reference to the current options. 943 */ setNewTableReaderForCompactionInputs( boolean newTableReaderForCompactionInputs)944 T setNewTableReaderForCompactionInputs( 945 boolean newTableReaderForCompactionInputs); 946 947 /** 948 * If true, always create a new file descriptor and new table reader 949 * for compaction inputs. Turn this parameter on may introduce extra 950 * memory usage in the table reader, if it allocates extra memory 951 * for indexes. This will allow file descriptor prefetch options 952 * to be set for compaction input files and not to impact file 953 * descriptors for the same file used by user queries. 954 * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()} 955 * for this mode if using block-based table. 956 * 957 * Default: false 958 * 959 * @return true if a new file descriptor and table reader are created for 960 * compaction inputs 961 */ newTableReaderForCompactionInputs()962 boolean newTableReaderForCompactionInputs(); 963 964 /** 965 * This is a maximum buffer size that is used by WinMmapReadableFile in 966 * unbuffered disk I/O mode. We need to maintain an aligned buffer for 967 * reads. We allow the buffer to grow until the specified value and then 968 * for bigger requests allocate one shot buffers. In unbuffered mode we 969 * always bypass read-ahead buffer at ReadaheadRandomAccessFile 970 * When read-ahead is required we then make use of 971 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and 972 * always try to read ahead. 973 * With read-ahead we always pre-allocate buffer to the size instead of 974 * growing it up to a limit. 975 * 976 * This option is currently honored only on Windows 977 * 978 * Default: 1 Mb 979 * 980 * Special value: 0 - means do not maintain per instance buffer. Allocate 981 * per request buffer and avoid locking. 982 * 983 * @param randomAccessMaxBufferSize the maximum size of the random access 984 * buffer 985 * 986 * @return the reference to the current options. 987 */ setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize)988 T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize); 989 990 /** 991 * This is a maximum buffer size that is used by WinMmapReadableFile in 992 * unbuffered disk I/O mode. We need to maintain an aligned buffer for 993 * reads. We allow the buffer to grow until the specified value and then 994 * for bigger requests allocate one shot buffers. In unbuffered mode we 995 * always bypass read-ahead buffer at ReadaheadRandomAccessFile 996 * When read-ahead is required we then make use of 997 * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and 998 * always try to read ahead. With read-ahead we always pre-allocate buffer 999 * to the size instead of growing it up to a limit. 1000 * 1001 * This option is currently honored only on Windows 1002 * 1003 * Default: 1 Mb 1004 * 1005 * Special value: 0 - means do not maintain per instance buffer. Allocate 1006 * per request buffer and avoid locking. 1007 * 1008 * @return the maximum size of the random access buffer 1009 */ randomAccessMaxBufferSize()1010 long randomAccessMaxBufferSize(); 1011 1012 /** 1013 * Use adaptive mutex, which spins in the user space before resorting 1014 * to kernel. This could reduce context switch when the mutex is not 1015 * heavily contended. However, if the mutex is hot, we could end up 1016 * wasting spin time. 1017 * Default: false 1018 * 1019 * @param useAdaptiveMutex true if adaptive mutex is used. 1020 * @return the instance of the current object. 1021 */ setUseAdaptiveMutex(boolean useAdaptiveMutex)1022 T setUseAdaptiveMutex(boolean useAdaptiveMutex); 1023 1024 /** 1025 * Use adaptive mutex, which spins in the user space before resorting 1026 * to kernel. This could reduce context switch when the mutex is not 1027 * heavily contended. However, if the mutex is hot, we could end up 1028 * wasting spin time. 1029 * Default: false 1030 * 1031 * @return true if adaptive mutex is used. 1032 */ useAdaptiveMutex()1033 boolean useAdaptiveMutex(); 1034 1035 //TODO(AR) NOW 1036 // /** 1037 // * Sets the {@link EventListener}s whose callback functions 1038 // * will be called when specific RocksDB event happens. 1039 // * 1040 // * @param listeners the listeners who should be notified on various events. 1041 // * 1042 // * @return the instance of the current object. 1043 // */ 1044 // T setListeners(final List<EventListener> listeners); 1045 // 1046 // /** 1047 // * Gets the {@link EventListener}s whose callback functions 1048 // * will be called when specific RocksDB event happens. 1049 // * 1050 // * @return a collection of Event listeners. 1051 // */ 1052 // Collection<EventListener> listeners(); 1053 1054 /** 1055 * If true, then the status of the threads involved in this DB will 1056 * be tracked and available via GetThreadList() API. 1057 * 1058 * Default: false 1059 * 1060 * @param enableThreadTracking true to enable tracking 1061 * 1062 * @return the reference to the current options. 1063 */ setEnableThreadTracking(boolean enableThreadTracking)1064 T setEnableThreadTracking(boolean enableThreadTracking); 1065 1066 /** 1067 * If true, then the status of the threads involved in this DB will 1068 * be tracked and available via GetThreadList() API. 1069 * 1070 * Default: false 1071 * 1072 * @return true if tracking is enabled 1073 */ enableThreadTracking()1074 boolean enableThreadTracking(); 1075 1076 /** 1077 * By default, a single write thread queue is maintained. The thread gets 1078 * to the head of the queue becomes write batch group leader and responsible 1079 * for writing to WAL and memtable for the batch group. 1080 * 1081 * If {@link #enablePipelinedWrite()} is true, separate write thread queue is 1082 * maintained for WAL write and memtable write. A write thread first enter WAL 1083 * writer queue and then memtable writer queue. Pending thread on the WAL 1084 * writer queue thus only have to wait for previous writers to finish their 1085 * WAL writing but not the memtable writing. Enabling the feature may improve 1086 * write throughput and reduce latency of the prepare phase of two-phase 1087 * commit. 1088 * 1089 * Default: false 1090 * 1091 * @param enablePipelinedWrite true to enabled pipelined writes 1092 * 1093 * @return the reference to the current options. 1094 */ setEnablePipelinedWrite(final boolean enablePipelinedWrite)1095 T setEnablePipelinedWrite(final boolean enablePipelinedWrite); 1096 1097 /** 1098 * Returns true if pipelined writes are enabled. 1099 * See {@link #setEnablePipelinedWrite(boolean)}. 1100 * 1101 * @return true if pipelined writes are enabled, false otherwise. 1102 */ enablePipelinedWrite()1103 boolean enablePipelinedWrite(); 1104 1105 /** 1106 * Setting {@link #unorderedWrite()} to true trades higher write throughput with 1107 * relaxing the immutability guarantee of snapshots. This violates the 1108 * repeatability one expects from ::Get from a snapshot, as well as 1109 * ::MultiGet and Iterator's consistent-point-in-time view property. 1110 * If the application cannot tolerate the relaxed guarantees, it can implement 1111 * its own mechanisms to work around that and yet benefit from the higher 1112 * throughput. Using TransactionDB with WRITE_PREPARED write policy and 1113 * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite 1114 * unordered_write. 1115 * 1116 * By default, i.e., when it is false, rocksdb does not advance the sequence 1117 * number for new snapshots unless all the writes with lower sequence numbers 1118 * are already finished. This provides the immutability that we except from 1119 * snapshots. Moreover, since Iterator and MultiGet internally depend on 1120 * snapshots, the snapshot immutability results into Iterator and MultiGet 1121 * offering consistent-point-in-time view. If set to true, although 1122 * Read-Your-Own-Write property is still provided, the snapshot immutability 1123 * property is relaxed: the writes issued after the snapshot is obtained (with 1124 * larger sequence numbers) will be still not visible to the reads from that 1125 * snapshot, however, there still might be pending writes (with lower sequence 1126 * number) that will change the state visible to the snapshot after they are 1127 * landed to the memtable. 1128 * 1129 * @param unorderedWrite true to enabled unordered write 1130 * 1131 * @return the reference to the current options. 1132 */ setUnorderedWrite(final boolean unorderedWrite)1133 T setUnorderedWrite(final boolean unorderedWrite); 1134 1135 /** 1136 * Returns true if unordered write are enabled. 1137 * See {@link #setUnorderedWrite(boolean)}. 1138 * 1139 * @return true if unordered write are enabled, false otherwise. 1140 */ unorderedWrite()1141 boolean unorderedWrite(); 1142 1143 /** 1144 * If true, allow multi-writers to update mem tables in parallel. 1145 * Only some memtable factorys support concurrent writes; currently it 1146 * is implemented only for SkipListFactory. Concurrent memtable writes 1147 * are not compatible with inplace_update_support or filter_deletes. 1148 * It is strongly recommended to set 1149 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use 1150 * this feature. 1151 * Default: true 1152 * 1153 * @param allowConcurrentMemtableWrite true to enable concurrent writes 1154 * for the memtable 1155 * 1156 * @return the reference to the current options. 1157 */ setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite)1158 T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite); 1159 1160 /** 1161 * If true, allow multi-writers to update mem tables in parallel. 1162 * Only some memtable factorys support concurrent writes; currently it 1163 * is implemented only for SkipListFactory. Concurrent memtable writes 1164 * are not compatible with inplace_update_support or filter_deletes. 1165 * It is strongly recommended to set 1166 * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use 1167 * this feature. 1168 * Default: true 1169 * 1170 * @return true if concurrent writes are enabled for the memtable 1171 */ allowConcurrentMemtableWrite()1172 boolean allowConcurrentMemtableWrite(); 1173 1174 /** 1175 * If true, threads synchronizing with the write batch group leader will 1176 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a 1177 * mutex. This can substantially improve throughput for concurrent workloads, 1178 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. 1179 * Default: true 1180 * 1181 * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the 1182 * write threads 1183 * 1184 * @return the reference to the current options. 1185 */ setEnableWriteThreadAdaptiveYield( boolean enableWriteThreadAdaptiveYield)1186 T setEnableWriteThreadAdaptiveYield( 1187 boolean enableWriteThreadAdaptiveYield); 1188 1189 /** 1190 * If true, threads synchronizing with the write batch group leader will 1191 * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a 1192 * mutex. This can substantially improve throughput for concurrent workloads, 1193 * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. 1194 * Default: true 1195 * 1196 * @return true if adaptive yield is enabled 1197 * for the writing threads 1198 */ enableWriteThreadAdaptiveYield()1199 boolean enableWriteThreadAdaptiveYield(); 1200 1201 /** 1202 * The maximum number of microseconds that a write operation will use 1203 * a yielding spin loop to coordinate with other write threads before 1204 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is 1205 * set properly) increasing this value is likely to increase RocksDB 1206 * throughput at the expense of increased CPU usage. 1207 * Default: 100 1208 * 1209 * @param writeThreadMaxYieldUsec maximum number of microseconds 1210 * 1211 * @return the reference to the current options. 1212 */ setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec)1213 T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec); 1214 1215 /** 1216 * The maximum number of microseconds that a write operation will use 1217 * a yielding spin loop to coordinate with other write threads before 1218 * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is 1219 * set properly) increasing this value is likely to increase RocksDB 1220 * throughput at the expense of increased CPU usage. 1221 * Default: 100 1222 * 1223 * @return the maximum number of microseconds 1224 */ writeThreadMaxYieldUsec()1225 long writeThreadMaxYieldUsec(); 1226 1227 /** 1228 * The latency in microseconds after which a std::this_thread::yield 1229 * call (sched_yield on Linux) is considered to be a signal that 1230 * other processes or threads would like to use the current core. 1231 * Increasing this makes writer threads more likely to take CPU 1232 * by spinning, which will show up as an increase in the number of 1233 * involuntary context switches. 1234 * Default: 3 1235 * 1236 * @param writeThreadSlowYieldUsec the latency in microseconds 1237 * 1238 * @return the reference to the current options. 1239 */ setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec)1240 T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec); 1241 1242 /** 1243 * The latency in microseconds after which a std::this_thread::yield 1244 * call (sched_yield on Linux) is considered to be a signal that 1245 * other processes or threads would like to use the current core. 1246 * Increasing this makes writer threads more likely to take CPU 1247 * by spinning, which will show up as an increase in the number of 1248 * involuntary context switches. 1249 * Default: 3 1250 * 1251 * @return writeThreadSlowYieldUsec the latency in microseconds 1252 */ writeThreadSlowYieldUsec()1253 long writeThreadSlowYieldUsec(); 1254 1255 /** 1256 * If true, then DB::Open() will not update the statistics used to optimize 1257 * compaction decision by loading table properties from many files. 1258 * Turning off this feature will improve DBOpen time especially in 1259 * disk environment. 1260 * 1261 * Default: false 1262 * 1263 * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped 1264 * 1265 * @return the reference to the current options. 1266 */ setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen)1267 T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen); 1268 1269 /** 1270 * If true, then DB::Open() will not update the statistics used to optimize 1271 * compaction decision by loading table properties from many files. 1272 * Turning off this feature will improve DBOpen time especially in 1273 * disk environment. 1274 * 1275 * Default: false 1276 * 1277 * @return true if updating stats will be skipped 1278 */ skipStatsUpdateOnDbOpen()1279 boolean skipStatsUpdateOnDbOpen(); 1280 1281 /** 1282 * Recovery mode to control the consistency while replaying WAL 1283 * 1284 * Default: {@link WALRecoveryMode#PointInTimeRecovery} 1285 * 1286 * @param walRecoveryMode The WAL recover mode 1287 * 1288 * @return the reference to the current options. 1289 */ setWalRecoveryMode(WALRecoveryMode walRecoveryMode)1290 T setWalRecoveryMode(WALRecoveryMode walRecoveryMode); 1291 1292 /** 1293 * Recovery mode to control the consistency while replaying WAL 1294 * 1295 * Default: {@link WALRecoveryMode#PointInTimeRecovery} 1296 * 1297 * @return The WAL recover mode 1298 */ walRecoveryMode()1299 WALRecoveryMode walRecoveryMode(); 1300 1301 /** 1302 * if set to false then recovery will fail when a prepared 1303 * transaction is encountered in the WAL 1304 * 1305 * Default: false 1306 * 1307 * @param allow2pc true if two-phase-commit is enabled 1308 * 1309 * @return the reference to the current options. 1310 */ setAllow2pc(boolean allow2pc)1311 T setAllow2pc(boolean allow2pc); 1312 1313 /** 1314 * if set to false then recovery will fail when a prepared 1315 * transaction is encountered in the WAL 1316 * 1317 * Default: false 1318 * 1319 * @return true if two-phase-commit is enabled 1320 */ allow2pc()1321 boolean allow2pc(); 1322 1323 /** 1324 * A global cache for table-level rows. 1325 * 1326 * Default: null (disabled) 1327 * 1328 * @param rowCache The global row cache 1329 * 1330 * @return the reference to the current options. 1331 */ setRowCache(final Cache rowCache)1332 T setRowCache(final Cache rowCache); 1333 1334 /** 1335 * A global cache for table-level rows. 1336 * 1337 * Default: null (disabled) 1338 * 1339 * @return The global row cache 1340 */ rowCache()1341 Cache rowCache(); 1342 1343 /** 1344 * A filter object supplied to be invoked while processing write-ahead-logs 1345 * (WALs) during recovery. The filter provides a way to inspect log 1346 * records, ignoring a particular record or skipping replay. 1347 * The filter is invoked at startup and is invoked from a single-thread 1348 * currently. 1349 * 1350 * @param walFilter the filter for processing WALs during recovery. 1351 * 1352 * @return the reference to the current options. 1353 */ setWalFilter(final AbstractWalFilter walFilter)1354 T setWalFilter(final AbstractWalFilter walFilter); 1355 1356 /** 1357 * Get's the filter for processing WALs during recovery. 1358 * See {@link #setWalFilter(AbstractWalFilter)}. 1359 * 1360 * @return the filter used for processing WALs during recovery. 1361 */ walFilter()1362 WalFilter walFilter(); 1363 1364 /** 1365 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily 1366 * / SetOptions will fail if options file is not detected or properly 1367 * persisted. 1368 * 1369 * DEFAULT: false 1370 * 1371 * @param failIfOptionsFileError true if we should fail if there is an error 1372 * in the options file 1373 * 1374 * @return the reference to the current options. 1375 */ setFailIfOptionsFileError(boolean failIfOptionsFileError)1376 T setFailIfOptionsFileError(boolean failIfOptionsFileError); 1377 1378 /** 1379 * If true, then DB::Open / CreateColumnFamily / DropColumnFamily 1380 * / SetOptions will fail if options file is not detected or properly 1381 * persisted. 1382 * 1383 * DEFAULT: false 1384 * 1385 * @return true if we should fail if there is an error in the options file 1386 */ failIfOptionsFileError()1387 boolean failIfOptionsFileError(); 1388 1389 /** 1390 * If true, then print malloc stats together with rocksdb.stats 1391 * when printing to LOG. 1392 * 1393 * DEFAULT: false 1394 * 1395 * @param dumpMallocStats true if malloc stats should be printed to LOG 1396 * 1397 * @return the reference to the current options. 1398 */ setDumpMallocStats(boolean dumpMallocStats)1399 T setDumpMallocStats(boolean dumpMallocStats); 1400 1401 /** 1402 * If true, then print malloc stats together with rocksdb.stats 1403 * when printing to LOG. 1404 * 1405 * DEFAULT: false 1406 * 1407 * @return true if malloc stats should be printed to LOG 1408 */ dumpMallocStats()1409 boolean dumpMallocStats(); 1410 1411 /** 1412 * By default RocksDB replay WAL logs and flush them on DB open, which may 1413 * create very small SST files. If this option is enabled, RocksDB will try 1414 * to avoid (but not guarantee not to) flush during recovery. Also, existing 1415 * WAL logs will be kept, so that if crash happened before flush, we still 1416 * have logs to recover from. 1417 * 1418 * DEFAULT: false 1419 * 1420 * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee 1421 * not to) flush during recovery 1422 * 1423 * @return the reference to the current options. 1424 */ setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery)1425 T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery); 1426 1427 /** 1428 * By default RocksDB replay WAL logs and flush them on DB open, which may 1429 * create very small SST files. If this option is enabled, RocksDB will try 1430 * to avoid (but not guarantee not to) flush during recovery. Also, existing 1431 * WAL logs will be kept, so that if crash happened before flush, we still 1432 * have logs to recover from. 1433 * 1434 * DEFAULT: false 1435 * 1436 * @return true to try to avoid (but not guarantee not to) flush during 1437 * recovery 1438 */ avoidFlushDuringRecovery()1439 boolean avoidFlushDuringRecovery(); 1440 1441 /** 1442 * Set this option to true during creation of database if you want 1443 * to be able to ingest behind (call IngestExternalFile() skipping keys 1444 * that already exist, rather than overwriting matching keys). 1445 * Setting this option to true will affect 2 things: 1446 * 1) Disable some internal optimizations around SST file compression 1447 * 2) Reserve bottom-most level for ingested files only. 1448 * 3) Note that num_levels should be >= 3 if this option is turned on. 1449 * 1450 * DEFAULT: false 1451 * 1452 * @param allowIngestBehind true to allow ingest behind, false to disallow. 1453 * 1454 * @return the reference to the current options. 1455 */ setAllowIngestBehind(final boolean allowIngestBehind)1456 T setAllowIngestBehind(final boolean allowIngestBehind); 1457 1458 /** 1459 * Returns true if ingest behind is allowed. 1460 * See {@link #setAllowIngestBehind(boolean)}. 1461 * 1462 * @return true if ingest behind is allowed, false otherwise. 1463 */ allowIngestBehind()1464 boolean allowIngestBehind(); 1465 1466 /** 1467 * Needed to support differential snapshots. 1468 * If set to true then DB will only process deletes with sequence number 1469 * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts). 1470 * Clients are responsible to periodically call this method to advance 1471 * the cutoff time. If this method is never called and preserve_deletes 1472 * is set to true NO deletes will ever be processed. 1473 * At the moment this only keeps normal deletes, SingleDeletes will 1474 * not be preserved. 1475 * 1476 * DEFAULT: false 1477 * 1478 * @param preserveDeletes true to preserve deletes. 1479 * 1480 * @return the reference to the current options. 1481 */ setPreserveDeletes(final boolean preserveDeletes)1482 T setPreserveDeletes(final boolean preserveDeletes); 1483 1484 /** 1485 * Returns true if deletes are preserved. 1486 * See {@link #setPreserveDeletes(boolean)}. 1487 * 1488 * @return true if deletes are preserved, false otherwise. 1489 */ preserveDeletes()1490 boolean preserveDeletes(); 1491 1492 /** 1493 * If enabled it uses two queues for writes, one for the ones with 1494 * disable_memtable and one for the ones that also write to memtable. This 1495 * allows the memtable writes not to lag behind other writes. It can be used 1496 * to optimize MySQL 2PC in which only the commits, which are serial, write to 1497 * memtable. 1498 * 1499 * DEFAULT: false 1500 * 1501 * @param twoWriteQueues true to enable two write queues, false otherwise. 1502 * 1503 * @return the reference to the current options. 1504 */ setTwoWriteQueues(final boolean twoWriteQueues)1505 T setTwoWriteQueues(final boolean twoWriteQueues); 1506 1507 /** 1508 * Returns true if two write queues are enabled. 1509 * 1510 * @return true if two write queues are enabled, false otherwise. 1511 */ twoWriteQueues()1512 boolean twoWriteQueues(); 1513 1514 /** 1515 * If true WAL is not flushed automatically after each write. Instead it 1516 * relies on manual invocation of FlushWAL to write the WAL buffer to its 1517 * file. 1518 * 1519 * DEFAULT: false 1520 * 1521 * @param manualWalFlush true to set disable automatic WAL flushing, 1522 * false otherwise. 1523 * 1524 * @return the reference to the current options. 1525 */ setManualWalFlush(final boolean manualWalFlush)1526 T setManualWalFlush(final boolean manualWalFlush); 1527 1528 /** 1529 * Returns true if automatic WAL flushing is disabled. 1530 * See {@link #setManualWalFlush(boolean)}. 1531 * 1532 * @return true if automatic WAL flushing is disabled, false otherwise. 1533 */ manualWalFlush()1534 boolean manualWalFlush(); 1535 1536 /** 1537 * If true, RocksDB supports flushing multiple column families and committing 1538 * their results atomically to MANIFEST. Note that it is not 1539 * necessary to set atomic_flush to true if WAL is always enabled since WAL 1540 * allows the database to be restored to the last persistent state in WAL. 1541 * This option is useful when there are column families with writes NOT 1542 * protected by WAL. 1543 * For manual flush, application has to specify which column families to 1544 * flush atomically in {@link RocksDB#flush(FlushOptions, List)}. 1545 * For auto-triggered flush, RocksDB atomically flushes ALL column families. 1546 * 1547 * Currently, any WAL-enabled writes after atomic flush may be replayed 1548 * independently if the process crashes later and tries to recover. 1549 * 1550 * @param atomicFlush true to enable atomic flush of multiple column families. 1551 * 1552 * @return the reference to the current options. 1553 */ setAtomicFlush(final boolean atomicFlush)1554 T setAtomicFlush(final boolean atomicFlush); 1555 1556 /** 1557 * Determine if atomic flush of multiple column families is enabled. 1558 * 1559 * See {@link #setAtomicFlush(boolean)}. 1560 * 1561 * @return true if atomic flush is enabled. 1562 */ atomicFlush()1563 boolean atomicFlush(); 1564 } 1565