1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 package org.rocksdb;
7 
8 import java.util.List;
9 
10 /**
11  * Advanced Column Family Options which are not
12  * mutable (i.e. present in {@link AdvancedMutableColumnFamilyOptionsInterface}
13  *
14  * Taken from include/rocksdb/advanced_options.h
15  */
16 public interface AdvancedColumnFamilyOptionsInterface<
17     T extends AdvancedColumnFamilyOptionsInterface<T>> {
18   /**
19    * The minimum number of write buffers that will be merged together
20    * before writing to storage.  If set to 1, then
21    * all write buffers are flushed to L0 as individual files and this increases
22    * read amplification because a get request has to check in all of these
23    * files. Also, an in-memory merge may result in writing lesser
24    * data to storage if there are duplicate records in each of these
25    * individual write buffers.  Default: 1
26    *
27    * @param minWriteBufferNumberToMerge the minimum number of write buffers
28    *     that will be merged together.
29    * @return the reference to the current options.
30    */
setMinWriteBufferNumberToMerge( int minWriteBufferNumberToMerge)31   T setMinWriteBufferNumberToMerge(
32       int minWriteBufferNumberToMerge);
33 
34   /**
35    * The minimum number of write buffers that will be merged together
36    * before writing to storage.  If set to 1, then
37    * all write buffers are flushed to L0 as individual files and this increases
38    * read amplification because a get request has to check in all of these
39    * files. Also, an in-memory merge may result in writing lesser
40    * data to storage if there are duplicate records in each of these
41    * individual write buffers.  Default: 1
42    *
43    * @return the minimum number of write buffers that will be merged together.
44    */
minWriteBufferNumberToMerge()45   int minWriteBufferNumberToMerge();
46 
47   /**
48    * The total maximum number of write buffers to maintain in memory including
49    * copies of buffers that have already been flushed.  Unlike
50    * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()},
51    * this parameter does not affect flushing.
52    * This controls the minimum amount of write history that will be available
53    * in memory for conflict checking when Transactions are used.
54    *
55    * When using an OptimisticTransactionDB:
56    * If this value is too low, some transactions may fail at commit time due
57    * to not being able to determine whether there were any write conflicts.
58    *
59    * When using a TransactionDB:
60    * If Transaction::SetSnapshot is used, TransactionDB will read either
61    * in-memory write buffers or SST files to do write-conflict checking.
62    * Increasing this value can reduce the number of reads to SST files
63    * done for conflict detection.
64    *
65    * Setting this value to 0 will cause write buffers to be freed immediately
66    * after they are flushed.
67    * If this value is set to -1,
68    * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}
69    * will be used.
70    *
71    * Default:
72    * If using a TransactionDB/OptimisticTransactionDB, the default value will
73    * be set to the value of
74    * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}
75    * if it is not explicitly set by the user. Otherwise, the default is 0.
76    *
77    * @param maxWriteBufferNumberToMaintain The maximum number of write
78    *     buffers to maintain
79    *
80    * @return the reference to the current options.
81    */
setMaxWriteBufferNumberToMaintain( int maxWriteBufferNumberToMaintain)82   T setMaxWriteBufferNumberToMaintain(
83       int maxWriteBufferNumberToMaintain);
84 
85   /**
86    * The total maximum number of write buffers to maintain in memory including
87    * copies of buffers that have already been flushed.
88    *
89    * @return maxWriteBufferNumberToMaintain The maximum number of write buffers
90    *     to maintain
91    */
maxWriteBufferNumberToMaintain()92   int maxWriteBufferNumberToMaintain();
93 
94   /**
95    * Allows thread-safe inplace updates.
96    * If inplace_callback function is not set,
97    *   Put(key, new_value) will update inplace the existing_value iff
98    *   * key exists in current memtable
99    *   * new sizeof(new_value) &le; sizeof(existing_value)
100    *   * existing_value for that key is a put i.e. kTypeValue
101    * If inplace_callback function is set, check doc for inplace_callback.
102    * Default: false.
103    *
104    * @param inplaceUpdateSupport true if thread-safe inplace updates
105    *     are allowed.
106    * @return the reference to the current options.
107    */
setInplaceUpdateSupport( boolean inplaceUpdateSupport)108   T setInplaceUpdateSupport(
109       boolean inplaceUpdateSupport);
110 
111   /**
112    * Allows thread-safe inplace updates.
113    * If inplace_callback function is not set,
114    *   Put(key, new_value) will update inplace the existing_value iff
115    *   * key exists in current memtable
116    *   * new sizeof(new_value) &le; sizeof(existing_value)
117    *   * existing_value for that key is a put i.e. kTypeValue
118    * If inplace_callback function is set, check doc for inplace_callback.
119    * Default: false.
120    *
121    * @return true if thread-safe inplace updates are allowed.
122    */
inplaceUpdateSupport()123   boolean inplaceUpdateSupport();
124 
125   /**
126    * Control locality of bloom filter probes to improve cache miss rate.
127    * This option only applies to memtable prefix bloom and plaintable
128    * prefix bloom. It essentially limits the max number of cache lines each
129    * bloom filter check can touch.
130    * This optimization is turned off when set to 0. The number should never
131    * be greater than number of probes. This option can boost performance
132    * for in-memory workload but should use with care since it can cause
133    * higher false positive rate.
134    * Default: 0
135    *
136    * @param bloomLocality the level of locality of bloom-filter probes.
137    * @return the reference to the current options.
138    */
setBloomLocality(int bloomLocality)139   T setBloomLocality(int bloomLocality);
140 
141   /**
142    * Control locality of bloom filter probes to improve cache miss rate.
143    * This option only applies to memtable prefix bloom and plaintable
144    * prefix bloom. It essentially limits the max number of cache lines each
145    * bloom filter check can touch.
146    * This optimization is turned off when set to 0. The number should never
147    * be greater than number of probes. This option can boost performance
148    * for in-memory workload but should use with care since it can cause
149    * higher false positive rate.
150    * Default: 0
151    *
152    * @return the level of locality of bloom-filter probes.
153    * @see #setBloomLocality(int)
154    */
bloomLocality()155   int bloomLocality();
156 
157   /**
158    * <p>Different levels can have different compression
159    * policies. There are cases where most lower levels
160    * would like to use quick compression algorithms while
161    * the higher levels (which have more data) use
162    * compression algorithms that have better compression
163    * but could be slower. This array, if non-empty, should
164    * have an entry for each level of the database;
165    * these override the value specified in the previous
166    * field 'compression'.</p>
167    *
168    * <strong>NOTICE</strong>
169    * <p>If {@code level_compaction_dynamic_level_bytes=true},
170    * {@code compression_per_level[0]} still determines {@code L0},
171    * but other elements of the array are based on base level
172    * (the level {@code L0} files are merged to), and may not
173    * match the level users see from info log for metadata.
174    * </p>
175    * <p>If {@code L0} files are merged to {@code level - n},
176    * then, for {@code i&gt;0}, {@code compression_per_level[i]}
177    * determines compaction type for level {@code n+i-1}.</p>
178    *
179    * <strong>Example</strong>
180    * <p>For example, if we have 5 levels, and we determine to
181    * merge {@code L0} data to {@code L4} (which means {@code L1..L3}
182    * will be empty), then the new files go to {@code L4} uses
183    * compression type {@code compression_per_level[1]}.</p>
184    *
185    * <p>If now {@code L0} is merged to {@code L2}. Data goes to
186    * {@code L2} will be compressed according to
187    * {@code compression_per_level[1]}, {@code L3} using
188    * {@code compression_per_level[2]}and {@code L4} using
189    * {@code compression_per_level[3]}. Compaction for each
190    * level can change when data grows.</p>
191    *
192    * <p><strong>Default:</strong> empty</p>
193    *
194    * @param compressionLevels list of
195    *     {@link org.rocksdb.CompressionType} instances.
196    *
197    * @return the reference to the current options.
198    */
setCompressionPerLevel( List<CompressionType> compressionLevels)199   T setCompressionPerLevel(
200       List<CompressionType> compressionLevels);
201 
202   /**
203    * <p>Return the currently set {@link org.rocksdb.CompressionType}
204    * per instances.</p>
205    *
206    * <p>See: {@link #setCompressionPerLevel(java.util.List)}</p>
207    *
208    * @return list of {@link org.rocksdb.CompressionType}
209    *     instances.
210    */
compressionPerLevel()211   List<CompressionType> compressionPerLevel();
212 
213   /**
214    * Set the number of levels for this database
215    * If level-styled compaction is used, then this number determines
216    * the total number of levels.
217    *
218    * @param numLevels the number of levels.
219    * @return the reference to the current options.
220    */
setNumLevels(int numLevels)221   T setNumLevels(int numLevels);
222 
223   /**
224    * If level-styled compaction is used, then this number determines
225    * the total number of levels.
226    *
227    * @return the number of levels.
228    */
numLevels()229   int numLevels();
230 
231   /**
232    * <p>If {@code true}, RocksDB will pick target size of each level
233    * dynamically. We will pick a base level b &gt;= 1. L0 will be
234    * directly merged into level b, instead of always into level 1.
235    * Level 1 to b-1 need to be empty. We try to pick b and its target
236    * size so that</p>
237    *
238    * <ol>
239    * <li>target size is in the range of
240    *   (max_bytes_for_level_base / max_bytes_for_level_multiplier,
241    *    max_bytes_for_level_base]</li>
242    * <li>target size of the last level (level num_levels-1) equals to extra size
243    *    of the level.</li>
244    * </ol>
245    *
246    * <p>At the same time max_bytes_for_level_multiplier and
247    * max_bytes_for_level_multiplier_additional are still satisfied.</p>
248    *
249    * <p>With this option on, from an empty DB, we make last level the base
250    * level, which means merging L0 data into the last level, until it exceeds
251    * max_bytes_for_level_base. And then we make the second last level to be
252    * base level, to start to merge L0 data to second last level, with its
253    * target size to be {@code 1/max_bytes_for_level_multiplier} of the last
254    * levels extra size. After the data accumulates more so that we need to
255    * move the base level to the third last one, and so on.</p>
256    *
257    * <p><b>Example</b></p>
258    *
259    * <p>For example, assume {@code max_bytes_for_level_multiplier=10},
260    * {@code num_levels=6}, and {@code max_bytes_for_level_base=10MB}.</p>
261    *
262    * <p>Target sizes of level 1 to 5 starts with:</p>
263    * {@code [- - - - 10MB]}
264    * <p>with base level is level. Target sizes of level 1 to 4 are not applicable
265    * because they will not be used.
266    * Until the size of Level 5 grows to more than 10MB, say 11MB, we make
267    * base target to level 4 and now the targets looks like:</p>
268    * {@code [- - - 1.1MB 11MB]}
269    * <p>While data are accumulated, size targets are tuned based on actual data
270    * of level 5. When level 5 has 50MB of data, the target is like:</p>
271    * {@code [- - - 5MB 50MB]}
272    * <p>Until level 5's actual size is more than 100MB, say 101MB. Now if we
273    * keep level 4 to be the base level, its target size needs to be 10.1MB,
274    * which doesn't satisfy the target size range. So now we make level 3
275    * the target size and the target sizes of the levels look like:</p>
276    * {@code [- - 1.01MB 10.1MB 101MB]}
277    * <p>In the same way, while level 5 further grows, all levels' targets grow,
278    * like</p>
279    * {@code [- - 5MB 50MB 500MB]}
280    * <p>Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
281    * base level and make levels' target sizes like this:</p>
282    * {@code [- 1.001MB 10.01MB 100.1MB 1001MB]}
283    * <p>and go on...</p>
284    *
285    * <p>By doing it, we give {@code max_bytes_for_level_multiplier} a priority
286    * against {@code max_bytes_for_level_base}, for a more predictable LSM tree
287    * shape. It is useful to limit worse case space amplification.</p>
288    *
289    * <p>{@code max_bytes_for_level_multiplier_additional} is ignored with
290    * this flag on.</p>
291    *
292    * <p>Turning this feature on or off for an existing DB can cause unexpected
293    * LSM tree structure so it's not recommended.</p>
294    *
295    * <p><strong>Caution</strong>: this option is experimental</p>
296    *
297    * <p>Default: false</p>
298    *
299    * @param enableLevelCompactionDynamicLevelBytes boolean value indicating
300    *     if {@code LevelCompactionDynamicLevelBytes} shall be enabled.
301    * @return the reference to the current options.
302    */
303   @Experimental("Turning this feature on or off for an existing DB can cause" +
304       "unexpected LSM tree structure so it's not recommended")
setLevelCompactionDynamicLevelBytes( boolean enableLevelCompactionDynamicLevelBytes)305   T setLevelCompactionDynamicLevelBytes(
306       boolean enableLevelCompactionDynamicLevelBytes);
307 
308   /**
309    * <p>Return if {@code LevelCompactionDynamicLevelBytes} is enabled.
310    * </p>
311    *
312    * <p>For further information see
313    * {@link #setLevelCompactionDynamicLevelBytes(boolean)}</p>
314    *
315    * @return boolean value indicating if
316    *    {@code levelCompactionDynamicLevelBytes} is enabled.
317    */
318   @Experimental("Caution: this option is experimental")
levelCompactionDynamicLevelBytes()319   boolean levelCompactionDynamicLevelBytes();
320 
321   /**
322    * Maximum size of each compaction (not guarantee)
323    *
324    * @param maxCompactionBytes the compaction size limit
325    * @return the reference to the current options.
326    */
setMaxCompactionBytes( long maxCompactionBytes)327   T setMaxCompactionBytes(
328       long maxCompactionBytes);
329 
330   /**
331    * Control maximum size of each compaction (not guaranteed)
332    *
333    * @return compaction size threshold
334    */
maxCompactionBytes()335   long maxCompactionBytes();
336 
337   /**
338    * Set compaction style for DB.
339    *
340    * Default: LEVEL.
341    *
342    * @param compactionStyle Compaction style.
343    * @return the reference to the current options.
344    */
setCompactionStyle( CompactionStyle compactionStyle)345   ColumnFamilyOptionsInterface setCompactionStyle(
346       CompactionStyle compactionStyle);
347 
348   /**
349    * Compaction style for DB.
350    *
351    * @return Compaction style.
352    */
compactionStyle()353   CompactionStyle compactionStyle();
354 
355   /**
356    * If level {@link #compactionStyle()} == {@link CompactionStyle#LEVEL},
357    * for each level, which files are prioritized to be picked to compact.
358    *
359    * Default: {@link CompactionPriority#ByCompensatedSize}
360    *
361    * @param compactionPriority The compaction priority
362    *
363    * @return the reference to the current options.
364    */
setCompactionPriority( CompactionPriority compactionPriority)365   T setCompactionPriority(
366       CompactionPriority compactionPriority);
367 
368   /**
369    * Get the Compaction priority if level compaction
370    * is used for all levels
371    *
372    * @return The compaction priority
373    */
compactionPriority()374   CompactionPriority compactionPriority();
375 
376   /**
377    * Set the options needed to support Universal Style compactions
378    *
379    * @param compactionOptionsUniversal The Universal Style compaction options
380    *
381    * @return the reference to the current options.
382    */
setCompactionOptionsUniversal( CompactionOptionsUniversal compactionOptionsUniversal)383   T setCompactionOptionsUniversal(
384       CompactionOptionsUniversal compactionOptionsUniversal);
385 
386   /**
387    * The options needed to support Universal Style compactions
388    *
389    * @return The Universal Style compaction options
390    */
compactionOptionsUniversal()391   CompactionOptionsUniversal compactionOptionsUniversal();
392 
393   /**
394    * The options for FIFO compaction style
395    *
396    * @param compactionOptionsFIFO The FIFO compaction options
397    *
398    * @return the reference to the current options.
399    */
setCompactionOptionsFIFO( CompactionOptionsFIFO compactionOptionsFIFO)400   T setCompactionOptionsFIFO(
401       CompactionOptionsFIFO compactionOptionsFIFO);
402 
403   /**
404    * The options for FIFO compaction style
405    *
406    * @return The FIFO compaction options
407    */
compactionOptionsFIFO()408   CompactionOptionsFIFO compactionOptionsFIFO();
409 
410   /**
411    * <p>This flag specifies that the implementation should optimize the filters
412    * mainly for cases where keys are found rather than also optimize for keys
413    * missed. This would be used in cases where the application knows that
414    * there are very few misses or the performance in the case of misses is not
415    * important.</p>
416    *
417    * <p>For now, this flag allows us to not store filters for the last level i.e
418    * the largest level which contains data of the LSM store. For keys which
419    * are hits, the filters in this level are not useful because we will search
420    * for the data anyway.</p>
421    *
422    * <p><strong>NOTE</strong>: the filters in other levels are still useful
423    * even for key hit because they tell us whether to look in that level or go
424    * to the higher level.</p>
425    *
426    * <p>Default: false<p>
427    *
428    * @param optimizeFiltersForHits boolean value indicating if this flag is set.
429    * @return the reference to the current options.
430    */
setOptimizeFiltersForHits( boolean optimizeFiltersForHits)431   T setOptimizeFiltersForHits(
432       boolean optimizeFiltersForHits);
433 
434   /**
435    * <p>Returns the current state of the {@code optimize_filters_for_hits}
436    * setting.</p>
437    *
438    * @return boolean value indicating if the flag
439    *     {@code optimize_filters_for_hits} was set.
440    */
optimizeFiltersForHits()441   boolean optimizeFiltersForHits();
442 
443   /**
444    * In debug mode, RocksDB run consistency checks on the LSM every time the LSM
445    * change (Flush, Compaction, AddFile). These checks are disabled in release
446    * mode, use this option to enable them in release mode as well.
447    *
448    * Default: false
449    *
450    * @param forceConsistencyChecks true to force consistency checks
451    *
452    * @return the reference to the current options.
453    */
setForceConsistencyChecks( boolean forceConsistencyChecks)454   T setForceConsistencyChecks(
455       boolean forceConsistencyChecks);
456 
457   /**
458    * In debug mode, RocksDB run consistency checks on the LSM every time the LSM
459    * change (Flush, Compaction, AddFile). These checks are disabled in release
460    * mode.
461    *
462    * @return true if consistency checks are enforced
463    */
forceConsistencyChecks()464   boolean forceConsistencyChecks();
465 }
466