1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 package org.rocksdb; 7 8 import java.util.List; 9 10 /** 11 * Advanced Column Family Options which are not 12 * mutable (i.e. present in {@link AdvancedMutableColumnFamilyOptionsInterface} 13 * 14 * Taken from include/rocksdb/advanced_options.h 15 */ 16 public interface AdvancedColumnFamilyOptionsInterface< 17 T extends AdvancedColumnFamilyOptionsInterface<T>> { 18 /** 19 * The minimum number of write buffers that will be merged together 20 * before writing to storage. If set to 1, then 21 * all write buffers are flushed to L0 as individual files and this increases 22 * read amplification because a get request has to check in all of these 23 * files. Also, an in-memory merge may result in writing lesser 24 * data to storage if there are duplicate records in each of these 25 * individual write buffers. Default: 1 26 * 27 * @param minWriteBufferNumberToMerge the minimum number of write buffers 28 * that will be merged together. 29 * @return the reference to the current options. 30 */ setMinWriteBufferNumberToMerge( int minWriteBufferNumberToMerge)31 T setMinWriteBufferNumberToMerge( 32 int minWriteBufferNumberToMerge); 33 34 /** 35 * The minimum number of write buffers that will be merged together 36 * before writing to storage. If set to 1, then 37 * all write buffers are flushed to L0 as individual files and this increases 38 * read amplification because a get request has to check in all of these 39 * files. Also, an in-memory merge may result in writing lesser 40 * data to storage if there are duplicate records in each of these 41 * individual write buffers. Default: 1 42 * 43 * @return the minimum number of write buffers that will be merged together. 44 */ minWriteBufferNumberToMerge()45 int minWriteBufferNumberToMerge(); 46 47 /** 48 * The total maximum number of write buffers to maintain in memory including 49 * copies of buffers that have already been flushed. Unlike 50 * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}, 51 * this parameter does not affect flushing. 52 * This controls the minimum amount of write history that will be available 53 * in memory for conflict checking when Transactions are used. 54 * 55 * When using an OptimisticTransactionDB: 56 * If this value is too low, some transactions may fail at commit time due 57 * to not being able to determine whether there were any write conflicts. 58 * 59 * When using a TransactionDB: 60 * If Transaction::SetSnapshot is used, TransactionDB will read either 61 * in-memory write buffers or SST files to do write-conflict checking. 62 * Increasing this value can reduce the number of reads to SST files 63 * done for conflict detection. 64 * 65 * Setting this value to 0 will cause write buffers to be freed immediately 66 * after they are flushed. 67 * If this value is set to -1, 68 * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} 69 * will be used. 70 * 71 * Default: 72 * If using a TransactionDB/OptimisticTransactionDB, the default value will 73 * be set to the value of 74 * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} 75 * if it is not explicitly set by the user. Otherwise, the default is 0. 76 * 77 * @param maxWriteBufferNumberToMaintain The maximum number of write 78 * buffers to maintain 79 * 80 * @return the reference to the current options. 81 */ setMaxWriteBufferNumberToMaintain( int maxWriteBufferNumberToMaintain)82 T setMaxWriteBufferNumberToMaintain( 83 int maxWriteBufferNumberToMaintain); 84 85 /** 86 * The total maximum number of write buffers to maintain in memory including 87 * copies of buffers that have already been flushed. 88 * 89 * @return maxWriteBufferNumberToMaintain The maximum number of write buffers 90 * to maintain 91 */ maxWriteBufferNumberToMaintain()92 int maxWriteBufferNumberToMaintain(); 93 94 /** 95 * Allows thread-safe inplace updates. 96 * If inplace_callback function is not set, 97 * Put(key, new_value) will update inplace the existing_value iff 98 * * key exists in current memtable 99 * * new sizeof(new_value) ≤ sizeof(existing_value) 100 * * existing_value for that key is a put i.e. kTypeValue 101 * If inplace_callback function is set, check doc for inplace_callback. 102 * Default: false. 103 * 104 * @param inplaceUpdateSupport true if thread-safe inplace updates 105 * are allowed. 106 * @return the reference to the current options. 107 */ setInplaceUpdateSupport( boolean inplaceUpdateSupport)108 T setInplaceUpdateSupport( 109 boolean inplaceUpdateSupport); 110 111 /** 112 * Allows thread-safe inplace updates. 113 * If inplace_callback function is not set, 114 * Put(key, new_value) will update inplace the existing_value iff 115 * * key exists in current memtable 116 * * new sizeof(new_value) ≤ sizeof(existing_value) 117 * * existing_value for that key is a put i.e. kTypeValue 118 * If inplace_callback function is set, check doc for inplace_callback. 119 * Default: false. 120 * 121 * @return true if thread-safe inplace updates are allowed. 122 */ inplaceUpdateSupport()123 boolean inplaceUpdateSupport(); 124 125 /** 126 * Control locality of bloom filter probes to improve cache miss rate. 127 * This option only applies to memtable prefix bloom and plaintable 128 * prefix bloom. It essentially limits the max number of cache lines each 129 * bloom filter check can touch. 130 * This optimization is turned off when set to 0. The number should never 131 * be greater than number of probes. This option can boost performance 132 * for in-memory workload but should use with care since it can cause 133 * higher false positive rate. 134 * Default: 0 135 * 136 * @param bloomLocality the level of locality of bloom-filter probes. 137 * @return the reference to the current options. 138 */ setBloomLocality(int bloomLocality)139 T setBloomLocality(int bloomLocality); 140 141 /** 142 * Control locality of bloom filter probes to improve cache miss rate. 143 * This option only applies to memtable prefix bloom and plaintable 144 * prefix bloom. It essentially limits the max number of cache lines each 145 * bloom filter check can touch. 146 * This optimization is turned off when set to 0. The number should never 147 * be greater than number of probes. This option can boost performance 148 * for in-memory workload but should use with care since it can cause 149 * higher false positive rate. 150 * Default: 0 151 * 152 * @return the level of locality of bloom-filter probes. 153 * @see #setBloomLocality(int) 154 */ bloomLocality()155 int bloomLocality(); 156 157 /** 158 * <p>Different levels can have different compression 159 * policies. There are cases where most lower levels 160 * would like to use quick compression algorithms while 161 * the higher levels (which have more data) use 162 * compression algorithms that have better compression 163 * but could be slower. This array, if non-empty, should 164 * have an entry for each level of the database; 165 * these override the value specified in the previous 166 * field 'compression'.</p> 167 * 168 * <strong>NOTICE</strong> 169 * <p>If {@code level_compaction_dynamic_level_bytes=true}, 170 * {@code compression_per_level[0]} still determines {@code L0}, 171 * but other elements of the array are based on base level 172 * (the level {@code L0} files are merged to), and may not 173 * match the level users see from info log for metadata. 174 * </p> 175 * <p>If {@code L0} files are merged to {@code level - n}, 176 * then, for {@code i>0}, {@code compression_per_level[i]} 177 * determines compaction type for level {@code n+i-1}.</p> 178 * 179 * <strong>Example</strong> 180 * <p>For example, if we have 5 levels, and we determine to 181 * merge {@code L0} data to {@code L4} (which means {@code L1..L3} 182 * will be empty), then the new files go to {@code L4} uses 183 * compression type {@code compression_per_level[1]}.</p> 184 * 185 * <p>If now {@code L0} is merged to {@code L2}. Data goes to 186 * {@code L2} will be compressed according to 187 * {@code compression_per_level[1]}, {@code L3} using 188 * {@code compression_per_level[2]}and {@code L4} using 189 * {@code compression_per_level[3]}. Compaction for each 190 * level can change when data grows.</p> 191 * 192 * <p><strong>Default:</strong> empty</p> 193 * 194 * @param compressionLevels list of 195 * {@link org.rocksdb.CompressionType} instances. 196 * 197 * @return the reference to the current options. 198 */ setCompressionPerLevel( List<CompressionType> compressionLevels)199 T setCompressionPerLevel( 200 List<CompressionType> compressionLevels); 201 202 /** 203 * <p>Return the currently set {@link org.rocksdb.CompressionType} 204 * per instances.</p> 205 * 206 * <p>See: {@link #setCompressionPerLevel(java.util.List)}</p> 207 * 208 * @return list of {@link org.rocksdb.CompressionType} 209 * instances. 210 */ compressionPerLevel()211 List<CompressionType> compressionPerLevel(); 212 213 /** 214 * Set the number of levels for this database 215 * If level-styled compaction is used, then this number determines 216 * the total number of levels. 217 * 218 * @param numLevels the number of levels. 219 * @return the reference to the current options. 220 */ setNumLevels(int numLevels)221 T setNumLevels(int numLevels); 222 223 /** 224 * If level-styled compaction is used, then this number determines 225 * the total number of levels. 226 * 227 * @return the number of levels. 228 */ numLevels()229 int numLevels(); 230 231 /** 232 * <p>If {@code true}, RocksDB will pick target size of each level 233 * dynamically. We will pick a base level b >= 1. L0 will be 234 * directly merged into level b, instead of always into level 1. 235 * Level 1 to b-1 need to be empty. We try to pick b and its target 236 * size so that</p> 237 * 238 * <ol> 239 * <li>target size is in the range of 240 * (max_bytes_for_level_base / max_bytes_for_level_multiplier, 241 * max_bytes_for_level_base]</li> 242 * <li>target size of the last level (level num_levels-1) equals to extra size 243 * of the level.</li> 244 * </ol> 245 * 246 * <p>At the same time max_bytes_for_level_multiplier and 247 * max_bytes_for_level_multiplier_additional are still satisfied.</p> 248 * 249 * <p>With this option on, from an empty DB, we make last level the base 250 * level, which means merging L0 data into the last level, until it exceeds 251 * max_bytes_for_level_base. And then we make the second last level to be 252 * base level, to start to merge L0 data to second last level, with its 253 * target size to be {@code 1/max_bytes_for_level_multiplier} of the last 254 * levels extra size. After the data accumulates more so that we need to 255 * move the base level to the third last one, and so on.</p> 256 * 257 * <p><b>Example</b></p> 258 * 259 * <p>For example, assume {@code max_bytes_for_level_multiplier=10}, 260 * {@code num_levels=6}, and {@code max_bytes_for_level_base=10MB}.</p> 261 * 262 * <p>Target sizes of level 1 to 5 starts with:</p> 263 * {@code [- - - - 10MB]} 264 * <p>with base level is level. Target sizes of level 1 to 4 are not applicable 265 * because they will not be used. 266 * Until the size of Level 5 grows to more than 10MB, say 11MB, we make 267 * base target to level 4 and now the targets looks like:</p> 268 * {@code [- - - 1.1MB 11MB]} 269 * <p>While data are accumulated, size targets are tuned based on actual data 270 * of level 5. When level 5 has 50MB of data, the target is like:</p> 271 * {@code [- - - 5MB 50MB]} 272 * <p>Until level 5's actual size is more than 100MB, say 101MB. Now if we 273 * keep level 4 to be the base level, its target size needs to be 10.1MB, 274 * which doesn't satisfy the target size range. So now we make level 3 275 * the target size and the target sizes of the levels look like:</p> 276 * {@code [- - 1.01MB 10.1MB 101MB]} 277 * <p>In the same way, while level 5 further grows, all levels' targets grow, 278 * like</p> 279 * {@code [- - 5MB 50MB 500MB]} 280 * <p>Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the 281 * base level and make levels' target sizes like this:</p> 282 * {@code [- 1.001MB 10.01MB 100.1MB 1001MB]} 283 * <p>and go on...</p> 284 * 285 * <p>By doing it, we give {@code max_bytes_for_level_multiplier} a priority 286 * against {@code max_bytes_for_level_base}, for a more predictable LSM tree 287 * shape. It is useful to limit worse case space amplification.</p> 288 * 289 * <p>{@code max_bytes_for_level_multiplier_additional} is ignored with 290 * this flag on.</p> 291 * 292 * <p>Turning this feature on or off for an existing DB can cause unexpected 293 * LSM tree structure so it's not recommended.</p> 294 * 295 * <p><strong>Caution</strong>: this option is experimental</p> 296 * 297 * <p>Default: false</p> 298 * 299 * @param enableLevelCompactionDynamicLevelBytes boolean value indicating 300 * if {@code LevelCompactionDynamicLevelBytes} shall be enabled. 301 * @return the reference to the current options. 302 */ 303 @Experimental("Turning this feature on or off for an existing DB can cause" + 304 "unexpected LSM tree structure so it's not recommended") setLevelCompactionDynamicLevelBytes( boolean enableLevelCompactionDynamicLevelBytes)305 T setLevelCompactionDynamicLevelBytes( 306 boolean enableLevelCompactionDynamicLevelBytes); 307 308 /** 309 * <p>Return if {@code LevelCompactionDynamicLevelBytes} is enabled. 310 * </p> 311 * 312 * <p>For further information see 313 * {@link #setLevelCompactionDynamicLevelBytes(boolean)}</p> 314 * 315 * @return boolean value indicating if 316 * {@code levelCompactionDynamicLevelBytes} is enabled. 317 */ 318 @Experimental("Caution: this option is experimental") levelCompactionDynamicLevelBytes()319 boolean levelCompactionDynamicLevelBytes(); 320 321 /** 322 * Maximum size of each compaction (not guarantee) 323 * 324 * @param maxCompactionBytes the compaction size limit 325 * @return the reference to the current options. 326 */ setMaxCompactionBytes( long maxCompactionBytes)327 T setMaxCompactionBytes( 328 long maxCompactionBytes); 329 330 /** 331 * Control maximum size of each compaction (not guaranteed) 332 * 333 * @return compaction size threshold 334 */ maxCompactionBytes()335 long maxCompactionBytes(); 336 337 /** 338 * Set compaction style for DB. 339 * 340 * Default: LEVEL. 341 * 342 * @param compactionStyle Compaction style. 343 * @return the reference to the current options. 344 */ setCompactionStyle( CompactionStyle compactionStyle)345 ColumnFamilyOptionsInterface setCompactionStyle( 346 CompactionStyle compactionStyle); 347 348 /** 349 * Compaction style for DB. 350 * 351 * @return Compaction style. 352 */ compactionStyle()353 CompactionStyle compactionStyle(); 354 355 /** 356 * If level {@link #compactionStyle()} == {@link CompactionStyle#LEVEL}, 357 * for each level, which files are prioritized to be picked to compact. 358 * 359 * Default: {@link CompactionPriority#ByCompensatedSize} 360 * 361 * @param compactionPriority The compaction priority 362 * 363 * @return the reference to the current options. 364 */ setCompactionPriority( CompactionPriority compactionPriority)365 T setCompactionPriority( 366 CompactionPriority compactionPriority); 367 368 /** 369 * Get the Compaction priority if level compaction 370 * is used for all levels 371 * 372 * @return The compaction priority 373 */ compactionPriority()374 CompactionPriority compactionPriority(); 375 376 /** 377 * Set the options needed to support Universal Style compactions 378 * 379 * @param compactionOptionsUniversal The Universal Style compaction options 380 * 381 * @return the reference to the current options. 382 */ setCompactionOptionsUniversal( CompactionOptionsUniversal compactionOptionsUniversal)383 T setCompactionOptionsUniversal( 384 CompactionOptionsUniversal compactionOptionsUniversal); 385 386 /** 387 * The options needed to support Universal Style compactions 388 * 389 * @return The Universal Style compaction options 390 */ compactionOptionsUniversal()391 CompactionOptionsUniversal compactionOptionsUniversal(); 392 393 /** 394 * The options for FIFO compaction style 395 * 396 * @param compactionOptionsFIFO The FIFO compaction options 397 * 398 * @return the reference to the current options. 399 */ setCompactionOptionsFIFO( CompactionOptionsFIFO compactionOptionsFIFO)400 T setCompactionOptionsFIFO( 401 CompactionOptionsFIFO compactionOptionsFIFO); 402 403 /** 404 * The options for FIFO compaction style 405 * 406 * @return The FIFO compaction options 407 */ compactionOptionsFIFO()408 CompactionOptionsFIFO compactionOptionsFIFO(); 409 410 /** 411 * <p>This flag specifies that the implementation should optimize the filters 412 * mainly for cases where keys are found rather than also optimize for keys 413 * missed. This would be used in cases where the application knows that 414 * there are very few misses or the performance in the case of misses is not 415 * important.</p> 416 * 417 * <p>For now, this flag allows us to not store filters for the last level i.e 418 * the largest level which contains data of the LSM store. For keys which 419 * are hits, the filters in this level are not useful because we will search 420 * for the data anyway.</p> 421 * 422 * <p><strong>NOTE</strong>: the filters in other levels are still useful 423 * even for key hit because they tell us whether to look in that level or go 424 * to the higher level.</p> 425 * 426 * <p>Default: false<p> 427 * 428 * @param optimizeFiltersForHits boolean value indicating if this flag is set. 429 * @return the reference to the current options. 430 */ setOptimizeFiltersForHits( boolean optimizeFiltersForHits)431 T setOptimizeFiltersForHits( 432 boolean optimizeFiltersForHits); 433 434 /** 435 * <p>Returns the current state of the {@code optimize_filters_for_hits} 436 * setting.</p> 437 * 438 * @return boolean value indicating if the flag 439 * {@code optimize_filters_for_hits} was set. 440 */ optimizeFiltersForHits()441 boolean optimizeFiltersForHits(); 442 443 /** 444 * In debug mode, RocksDB run consistency checks on the LSM every time the LSM 445 * change (Flush, Compaction, AddFile). These checks are disabled in release 446 * mode, use this option to enable them in release mode as well. 447 * 448 * Default: false 449 * 450 * @param forceConsistencyChecks true to force consistency checks 451 * 452 * @return the reference to the current options. 453 */ setForceConsistencyChecks( boolean forceConsistencyChecks)454 T setForceConsistencyChecks( 455 boolean forceConsistencyChecks); 456 457 /** 458 * In debug mode, RocksDB run consistency checks on the LSM every time the LSM 459 * change (Flush, Compaction, AddFile). These checks are disabled in release 460 * mode. 461 * 462 * @return true if consistency checks are enforced 463 */ forceConsistencyChecks()464 boolean forceConsistencyChecks(); 465 } 466