1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "rocksdb/options.h"
11
12 #include <cinttypes>
13 #include <limits>
14
15 #include "monitoring/statistics.h"
16 #include "options/db_options.h"
17 #include "options/options_helper.h"
18 #include "rocksdb/cache.h"
19 #include "rocksdb/compaction_filter.h"
20 #include "rocksdb/comparator.h"
21 #include "rocksdb/env.h"
22 #include "rocksdb/memtablerep.h"
23 #include "rocksdb/merge_operator.h"
24 #include "rocksdb/slice.h"
25 #include "rocksdb/slice_transform.h"
26 #include "rocksdb/sst_file_manager.h"
27 #include "rocksdb/sst_partitioner.h"
28 #include "rocksdb/table.h"
29 #include "rocksdb/table_properties.h"
30 #include "rocksdb/wal_filter.h"
31 #include "table/block_based/block_based_table_factory.h"
32 #include "util/compression.h"
33
34 namespace ROCKSDB_NAMESPACE {
35
AdvancedColumnFamilyOptions()36 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
37 assert(memtable_factory.get() != nullptr);
38 }
39
AdvancedColumnFamilyOptions(const Options & options)40 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
41 : max_write_buffer_number(options.max_write_buffer_number),
42 min_write_buffer_number_to_merge(
43 options.min_write_buffer_number_to_merge),
44 max_write_buffer_number_to_maintain(
45 options.max_write_buffer_number_to_maintain),
46 max_write_buffer_size_to_maintain(
47 options.max_write_buffer_size_to_maintain),
48 inplace_update_support(options.inplace_update_support),
49 inplace_update_num_locks(options.inplace_update_num_locks),
50 inplace_callback(options.inplace_callback),
51 memtable_prefix_bloom_size_ratio(
52 options.memtable_prefix_bloom_size_ratio),
53 memtable_whole_key_filtering(options.memtable_whole_key_filtering),
54 memtable_huge_page_size(options.memtable_huge_page_size),
55 memtable_insert_with_hint_prefix_extractor(
56 options.memtable_insert_with_hint_prefix_extractor),
57 bloom_locality(options.bloom_locality),
58 arena_block_size(options.arena_block_size),
59 compression_per_level(options.compression_per_level),
60 num_levels(options.num_levels),
61 level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
62 level0_stop_writes_trigger(options.level0_stop_writes_trigger),
63 target_file_size_base(options.target_file_size_base),
64 target_file_size_multiplier(options.target_file_size_multiplier),
65 level_compaction_dynamic_level_bytes(
66 options.level_compaction_dynamic_level_bytes),
67 max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
68 max_bytes_for_level_multiplier_additional(
69 options.max_bytes_for_level_multiplier_additional),
70 max_compaction_bytes(options.max_compaction_bytes),
71 soft_pending_compaction_bytes_limit(
72 options.soft_pending_compaction_bytes_limit),
73 hard_pending_compaction_bytes_limit(
74 options.hard_pending_compaction_bytes_limit),
75 compaction_style(options.compaction_style),
76 compaction_pri(options.compaction_pri),
77 compaction_options_universal(options.compaction_options_universal),
78 compaction_options_fifo(options.compaction_options_fifo),
79 max_sequential_skip_in_iterations(
80 options.max_sequential_skip_in_iterations),
81 memtable_factory(options.memtable_factory),
82 table_properties_collector_factories(
83 options.table_properties_collector_factories),
84 max_successive_merges(options.max_successive_merges),
85 optimize_filters_for_hits(options.optimize_filters_for_hits),
86 paranoid_file_checks(options.paranoid_file_checks),
87 force_consistency_checks(options.force_consistency_checks),
88 report_bg_io_stats(options.report_bg_io_stats),
89 ttl(options.ttl),
90 periodic_compaction_seconds(options.periodic_compaction_seconds),
91 sample_for_compression(options.sample_for_compression),
92 enable_blob_files(options.enable_blob_files),
93 min_blob_size(options.min_blob_size),
94 blob_file_size(options.blob_file_size),
95 blob_compression_type(options.blob_compression_type),
96 enable_blob_garbage_collection(options.enable_blob_garbage_collection),
97 blob_garbage_collection_age_cutoff(
98 options.blob_garbage_collection_age_cutoff) {
99 assert(memtable_factory.get() != nullptr);
100 if (max_bytes_for_level_multiplier_additional.size() <
101 static_cast<unsigned int>(num_levels)) {
102 max_bytes_for_level_multiplier_additional.resize(num_levels, 1);
103 }
104 }
105
ColumnFamilyOptions()106 ColumnFamilyOptions::ColumnFamilyOptions()
107 : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
108 table_factory(
109 std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
110
ColumnFamilyOptions(const Options & options)111 ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
112 : ColumnFamilyOptions(*static_cast<const ColumnFamilyOptions*>(&options)) {}
113
DBOptions()114 DBOptions::DBOptions() {}
DBOptions(const Options & options)115 DBOptions::DBOptions(const Options& options)
116 : DBOptions(*static_cast<const DBOptions*>(&options)) {}
117
Dump(Logger * log) const118 void DBOptions::Dump(Logger* log) const {
119 ImmutableDBOptions(*this).Dump(log);
120 MutableDBOptions(*this).Dump(log);
121 } // DBOptions::Dump
122
Dump(Logger * log) const123 void ColumnFamilyOptions::Dump(Logger* log) const {
124 ROCKS_LOG_HEADER(log, " Options.comparator: %s",
125 comparator->Name());
126 ROCKS_LOG_HEADER(log, " Options.merge_operator: %s",
127 merge_operator ? merge_operator->Name() : "None");
128 ROCKS_LOG_HEADER(log, " Options.compaction_filter: %s",
129 compaction_filter ? compaction_filter->Name() : "None");
130 ROCKS_LOG_HEADER(
131 log, " Options.compaction_filter_factory: %s",
132 compaction_filter_factory ? compaction_filter_factory->Name() : "None");
133 ROCKS_LOG_HEADER(
134 log, " Options.sst_partitioner_factory: %s",
135 sst_partitioner_factory ? sst_partitioner_factory->Name() : "None");
136 ROCKS_LOG_HEADER(log, " Options.memtable_factory: %s",
137 memtable_factory->Name());
138 ROCKS_LOG_HEADER(log, " Options.table_factory: %s",
139 table_factory->Name());
140 ROCKS_LOG_HEADER(log, " table_factory options: %s",
141 table_factory->GetPrintableOptions().c_str());
142 ROCKS_LOG_HEADER(log, " Options.write_buffer_size: %" ROCKSDB_PRIszt,
143 write_buffer_size);
144 ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d",
145 max_write_buffer_number);
146 if (!compression_per_level.empty()) {
147 for (unsigned int i = 0; i < compression_per_level.size(); i++) {
148 ROCKS_LOG_HEADER(
149 log, " Options.compression[%d]: %s", i,
150 CompressionTypeToString(compression_per_level[i]).c_str());
151 }
152 } else {
153 ROCKS_LOG_HEADER(log, " Options.compression: %s",
154 CompressionTypeToString(compression).c_str());
155 }
156 ROCKS_LOG_HEADER(
157 log, " Options.bottommost_compression: %s",
158 bottommost_compression == kDisableCompressionOption
159 ? "Disabled"
160 : CompressionTypeToString(bottommost_compression).c_str());
161 ROCKS_LOG_HEADER(
162 log, " Options.prefix_extractor: %s",
163 prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name());
164 ROCKS_LOG_HEADER(log,
165 " Options.memtable_insert_with_hint_prefix_extractor: %s",
166 memtable_insert_with_hint_prefix_extractor == nullptr
167 ? "nullptr"
168 : memtable_insert_with_hint_prefix_extractor->Name());
169 ROCKS_LOG_HEADER(log, " Options.num_levels: %d", num_levels);
170 ROCKS_LOG_HEADER(log, " Options.min_write_buffer_number_to_merge: %d",
171 min_write_buffer_number_to_merge);
172 ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number_to_maintain: %d",
173 max_write_buffer_number_to_maintain);
174 ROCKS_LOG_HEADER(log,
175 " Options.max_write_buffer_size_to_maintain: %" PRIu64,
176 max_write_buffer_size_to_maintain);
177 ROCKS_LOG_HEADER(
178 log, " Options.bottommost_compression_opts.window_bits: %d",
179 bottommost_compression_opts.window_bits);
180 ROCKS_LOG_HEADER(
181 log, " Options.bottommost_compression_opts.level: %d",
182 bottommost_compression_opts.level);
183 ROCKS_LOG_HEADER(
184 log, " Options.bottommost_compression_opts.strategy: %d",
185 bottommost_compression_opts.strategy);
186 ROCKS_LOG_HEADER(
187 log,
188 " Options.bottommost_compression_opts.max_dict_bytes: "
189 "%" PRIu32,
190 bottommost_compression_opts.max_dict_bytes);
191 ROCKS_LOG_HEADER(
192 log,
193 " Options.bottommost_compression_opts.zstd_max_train_bytes: "
194 "%" PRIu32,
195 bottommost_compression_opts.zstd_max_train_bytes);
196 ROCKS_LOG_HEADER(
197 log,
198 " Options.bottommost_compression_opts.parallel_threads: "
199 "%" PRIu32,
200 bottommost_compression_opts.parallel_threads);
201 ROCKS_LOG_HEADER(
202 log, " Options.bottommost_compression_opts.enabled: %s",
203 bottommost_compression_opts.enabled ? "true" : "false");
204 ROCKS_LOG_HEADER(
205 log,
206 " Options.bottommost_compression_opts.max_dict_buffer_bytes: "
207 "%" PRIu64,
208 bottommost_compression_opts.max_dict_buffer_bytes);
209 ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d",
210 compression_opts.window_bits);
211 ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d",
212 compression_opts.level);
213 ROCKS_LOG_HEADER(log, " Options.compression_opts.strategy: %d",
214 compression_opts.strategy);
215 ROCKS_LOG_HEADER(
216 log,
217 " Options.compression_opts.max_dict_bytes: %" PRIu32,
218 compression_opts.max_dict_bytes);
219 ROCKS_LOG_HEADER(log,
220 " Options.compression_opts.zstd_max_train_bytes: "
221 "%" PRIu32,
222 compression_opts.zstd_max_train_bytes);
223 ROCKS_LOG_HEADER(log,
224 " Options.compression_opts.parallel_threads: "
225 "%" PRIu32,
226 compression_opts.parallel_threads);
227 ROCKS_LOG_HEADER(log,
228 " Options.compression_opts.enabled: %s",
229 compression_opts.enabled ? "true" : "false");
230 ROCKS_LOG_HEADER(log,
231 " Options.compression_opts.max_dict_buffer_bytes: "
232 "%" PRIu64,
233 compression_opts.max_dict_buffer_bytes);
234 ROCKS_LOG_HEADER(log, " Options.level0_file_num_compaction_trigger: %d",
235 level0_file_num_compaction_trigger);
236 ROCKS_LOG_HEADER(log, " Options.level0_slowdown_writes_trigger: %d",
237 level0_slowdown_writes_trigger);
238 ROCKS_LOG_HEADER(log, " Options.level0_stop_writes_trigger: %d",
239 level0_stop_writes_trigger);
240 ROCKS_LOG_HEADER(
241 log, " Options.target_file_size_base: %" PRIu64,
242 target_file_size_base);
243 ROCKS_LOG_HEADER(log, " Options.target_file_size_multiplier: %d",
244 target_file_size_multiplier);
245 ROCKS_LOG_HEADER(
246 log, " Options.max_bytes_for_level_base: %" PRIu64,
247 max_bytes_for_level_base);
248 ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d",
249 level_compaction_dynamic_level_bytes);
250 ROCKS_LOG_HEADER(log, " Options.max_bytes_for_level_multiplier: %f",
251 max_bytes_for_level_multiplier);
252 for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size();
253 i++) {
254 ROCKS_LOG_HEADER(
255 log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt
256 "]: %d",
257 i, max_bytes_for_level_multiplier_additional[i]);
258 }
259 ROCKS_LOG_HEADER(
260 log, " Options.max_sequential_skip_in_iterations: %" PRIu64,
261 max_sequential_skip_in_iterations);
262 ROCKS_LOG_HEADER(
263 log, " Options.max_compaction_bytes: %" PRIu64,
264 max_compaction_bytes);
265 ROCKS_LOG_HEADER(
266 log,
267 " Options.arena_block_size: %" ROCKSDB_PRIszt,
268 arena_block_size);
269 ROCKS_LOG_HEADER(log,
270 " Options.soft_pending_compaction_bytes_limit: %" PRIu64,
271 soft_pending_compaction_bytes_limit);
272 ROCKS_LOG_HEADER(log,
273 " Options.hard_pending_compaction_bytes_limit: %" PRIu64,
274 hard_pending_compaction_bytes_limit);
275 ROCKS_LOG_HEADER(log, " Options.rate_limit_delay_max_milliseconds: %u",
276 rate_limit_delay_max_milliseconds);
277 ROCKS_LOG_HEADER(log, " Options.disable_auto_compactions: %d",
278 disable_auto_compactions);
279
280 const auto& it_compaction_style =
281 compaction_style_to_string.find(compaction_style);
282 std::string str_compaction_style;
283 if (it_compaction_style == compaction_style_to_string.end()) {
284 assert(false);
285 str_compaction_style = "unknown_" + std::to_string(compaction_style);
286 } else {
287 str_compaction_style = it_compaction_style->second;
288 }
289 ROCKS_LOG_HEADER(log,
290 " Options.compaction_style: %s",
291 str_compaction_style.c_str());
292
293 const auto& it_compaction_pri =
294 compaction_pri_to_string.find(compaction_pri);
295 std::string str_compaction_pri;
296 if (it_compaction_pri == compaction_pri_to_string.end()) {
297 assert(false);
298 str_compaction_pri = "unknown_" + std::to_string(compaction_pri);
299 } else {
300 str_compaction_pri = it_compaction_pri->second;
301 }
302 ROCKS_LOG_HEADER(log,
303 " Options.compaction_pri: %s",
304 str_compaction_pri.c_str());
305 ROCKS_LOG_HEADER(log,
306 "Options.compaction_options_universal.size_ratio: %u",
307 compaction_options_universal.size_ratio);
308 ROCKS_LOG_HEADER(log,
309 "Options.compaction_options_universal.min_merge_width: %u",
310 compaction_options_universal.min_merge_width);
311 ROCKS_LOG_HEADER(log,
312 "Options.compaction_options_universal.max_merge_width: %u",
313 compaction_options_universal.max_merge_width);
314 ROCKS_LOG_HEADER(
315 log,
316 "Options.compaction_options_universal."
317 "max_size_amplification_percent: %u",
318 compaction_options_universal.max_size_amplification_percent);
319 ROCKS_LOG_HEADER(
320 log,
321 "Options.compaction_options_universal.compression_size_percent: %d",
322 compaction_options_universal.compression_size_percent);
323 const auto& it_compaction_stop_style = compaction_stop_style_to_string.find(
324 compaction_options_universal.stop_style);
325 std::string str_compaction_stop_style;
326 if (it_compaction_stop_style == compaction_stop_style_to_string.end()) {
327 assert(false);
328 str_compaction_stop_style =
329 "unknown_" + std::to_string(compaction_options_universal.stop_style);
330 } else {
331 str_compaction_stop_style = it_compaction_stop_style->second;
332 }
333 ROCKS_LOG_HEADER(log,
334 "Options.compaction_options_universal.stop_style: %s",
335 str_compaction_stop_style.c_str());
336 ROCKS_LOG_HEADER(
337 log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
338 compaction_options_fifo.max_table_files_size);
339 ROCKS_LOG_HEADER(log,
340 "Options.compaction_options_fifo.allow_compaction: %d",
341 compaction_options_fifo.allow_compaction);
342 std::ostringstream collector_info;
343 for (const auto& collector_factory : table_properties_collector_factories) {
344 collector_info << collector_factory->ToString() << ';';
345 }
346 ROCKS_LOG_HEADER(
347 log, " Options.table_properties_collectors: %s",
348 collector_info.str().c_str());
349 ROCKS_LOG_HEADER(log,
350 " Options.inplace_update_support: %d",
351 inplace_update_support);
352 ROCKS_LOG_HEADER(
353 log,
354 " Options.inplace_update_num_locks: %" ROCKSDB_PRIszt,
355 inplace_update_num_locks);
356 // TODO: easier config for bloom (maybe based on avg key/value size)
357 ROCKS_LOG_HEADER(
358 log, " Options.memtable_prefix_bloom_size_ratio: %f",
359 memtable_prefix_bloom_size_ratio);
360 ROCKS_LOG_HEADER(log,
361 " Options.memtable_whole_key_filtering: %d",
362 memtable_whole_key_filtering);
363
364 ROCKS_LOG_HEADER(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
365 memtable_huge_page_size);
366 ROCKS_LOG_HEADER(log,
367 " Options.bloom_locality: %d",
368 bloom_locality);
369
370 ROCKS_LOG_HEADER(
371 log,
372 " Options.max_successive_merges: %" ROCKSDB_PRIszt,
373 max_successive_merges);
374 ROCKS_LOG_HEADER(log,
375 " Options.optimize_filters_for_hits: %d",
376 optimize_filters_for_hits);
377 ROCKS_LOG_HEADER(log, " Options.paranoid_file_checks: %d",
378 paranoid_file_checks);
379 ROCKS_LOG_HEADER(log, " Options.force_consistency_checks: %d",
380 force_consistency_checks);
381 ROCKS_LOG_HEADER(log, " Options.report_bg_io_stats: %d",
382 report_bg_io_stats);
383 ROCKS_LOG_HEADER(log, " Options.ttl: %" PRIu64,
384 ttl);
385 ROCKS_LOG_HEADER(log,
386 " Options.periodic_compaction_seconds: %" PRIu64,
387 periodic_compaction_seconds);
388 ROCKS_LOG_HEADER(log, " Options.enable_blob_files: %s",
389 enable_blob_files ? "true" : "false");
390 ROCKS_LOG_HEADER(log,
391 " Options.min_blob_size: %" PRIu64,
392 min_blob_size);
393 ROCKS_LOG_HEADER(log,
394 " Options.blob_file_size: %" PRIu64,
395 blob_file_size);
396 ROCKS_LOG_HEADER(log, " Options.blob_compression_type: %s",
397 CompressionTypeToString(blob_compression_type).c_str());
398 ROCKS_LOG_HEADER(log, " Options.enable_blob_garbage_collection: %s",
399 enable_blob_garbage_collection ? "true" : "false");
400 ROCKS_LOG_HEADER(log, " Options.blob_garbage_collection_age_cutoff: %f",
401 blob_garbage_collection_age_cutoff);
402 } // ColumnFamilyOptions::Dump
403
Dump(Logger * log) const404 void Options::Dump(Logger* log) const {
405 DBOptions::Dump(log);
406 ColumnFamilyOptions::Dump(log);
407 } // Options::Dump
408
DumpCFOptions(Logger * log) const409 void Options::DumpCFOptions(Logger* log) const {
410 ColumnFamilyOptions::Dump(log);
411 } // Options::DumpCFOptions
412
413 //
414 // The goal of this method is to create a configuration that
415 // allows an application to write all files into L0 and
416 // then do a single compaction to output all files into L1.
417 Options*
PrepareForBulkLoad()418 Options::PrepareForBulkLoad()
419 {
420 // never slowdown ingest.
421 level0_file_num_compaction_trigger = (1<<30);
422 level0_slowdown_writes_trigger = (1<<30);
423 level0_stop_writes_trigger = (1<<30);
424 soft_pending_compaction_bytes_limit = 0;
425 hard_pending_compaction_bytes_limit = 0;
426
427 // no auto compactions please. The application should issue a
428 // manual compaction after all data is loaded into L0.
429 disable_auto_compactions = true;
430 // A manual compaction run should pick all files in L0 in
431 // a single compaction run.
432 max_compaction_bytes = (static_cast<uint64_t>(1) << 60);
433
434 // It is better to have only 2 levels, otherwise a manual
435 // compaction would compact at every possible level, thereby
436 // increasing the total time needed for compactions.
437 num_levels = 2;
438
439 // Need to allow more write buffers to allow more parallism
440 // of flushes.
441 max_write_buffer_number = 6;
442 min_write_buffer_number_to_merge = 1;
443
444 // When compaction is disabled, more parallel flush threads can
445 // help with write throughput.
446 max_background_flushes = 4;
447
448 // Prevent a memtable flush to automatically promote files
449 // to L1. This is helpful so that all files that are
450 // input to the manual compaction are all at L0.
451 max_background_compactions = 2;
452
453 // The compaction would create large files in L1.
454 target_file_size_base = 256 * 1024 * 1024;
455 return this;
456 }
457
OptimizeForSmallDb()458 Options* Options::OptimizeForSmallDb() {
459 // 16MB block cache
460 std::shared_ptr<Cache> cache = NewLRUCache(16 << 20);
461
462 ColumnFamilyOptions::OptimizeForSmallDb(&cache);
463 DBOptions::OptimizeForSmallDb(&cache);
464 return this;
465 }
466
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)467 Options* Options::OldDefaults(int rocksdb_major_version,
468 int rocksdb_minor_version) {
469 ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
470 rocksdb_minor_version);
471 DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version);
472 return this;
473 }
474
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)475 DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
476 int rocksdb_minor_version) {
477 if (rocksdb_major_version < 4 ||
478 (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
479 max_file_opening_threads = 1;
480 table_cache_numshardbits = 4;
481 }
482 if (rocksdb_major_version < 5 ||
483 (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
484 delayed_write_rate = 2 * 1024U * 1024U;
485 } else if (rocksdb_major_version < 5 ||
486 (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
487 delayed_write_rate = 16 * 1024U * 1024U;
488 }
489 max_open_files = 5000;
490 wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
491 return this;
492 }
493
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)494 ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
495 int rocksdb_major_version, int rocksdb_minor_version) {
496 if (rocksdb_major_version < 5 ||
497 (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
498 compaction_pri = CompactionPri::kByCompensatedSize;
499 }
500 if (rocksdb_major_version < 4 ||
501 (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
502 write_buffer_size = 4 << 20;
503 target_file_size_base = 2 * 1048576;
504 max_bytes_for_level_base = 10 * 1048576;
505 soft_pending_compaction_bytes_limit = 0;
506 hard_pending_compaction_bytes_limit = 0;
507 }
508 if (rocksdb_major_version < 5) {
509 level0_stop_writes_trigger = 24;
510 } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
511 level0_stop_writes_trigger = 30;
512 }
513
514 return this;
515 }
516
517 // Optimization functions
OptimizeForSmallDb(std::shared_ptr<Cache> * cache)518 DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr<Cache>* cache) {
519 max_file_opening_threads = 1;
520 max_open_files = 5000;
521
522 // Cost memtable to block cache too.
523 std::shared_ptr<ROCKSDB_NAMESPACE::WriteBufferManager> wbm =
524 std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(
525 0, (cache != nullptr) ? *cache : std::shared_ptr<Cache>());
526 write_buffer_manager = wbm;
527
528 return this;
529 }
530
OptimizeForSmallDb(std::shared_ptr<Cache> * cache)531 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb(
532 std::shared_ptr<Cache>* cache) {
533 write_buffer_size = 2 << 20;
534 target_file_size_base = 2 * 1048576;
535 max_bytes_for_level_base = 10 * 1048576;
536 soft_pending_compaction_bytes_limit = 256 * 1048576;
537 hard_pending_compaction_bytes_limit = 1073741824ul;
538
539 BlockBasedTableOptions table_options;
540 table_options.block_cache =
541 (cache != nullptr) ? *cache : std::shared_ptr<Cache>();
542 table_options.cache_index_and_filter_blocks = true;
543 // Two level iterator to avoid LRU cache imbalance
544 table_options.index_type =
545 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
546 table_factory.reset(new BlockBasedTableFactory(table_options));
547
548 return this;
549 }
550
551 #ifndef ROCKSDB_LITE
OptimizeForPointLookup(uint64_t block_cache_size_mb)552 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup(
553 uint64_t block_cache_size_mb) {
554 BlockBasedTableOptions block_based_options;
555 block_based_options.data_block_index_type =
556 BlockBasedTableOptions::kDataBlockBinaryAndHash;
557 block_based_options.data_block_hash_table_util_ratio = 0.75;
558 block_based_options.filter_policy.reset(NewBloomFilterPolicy(10));
559 block_based_options.block_cache =
560 NewLRUCache(static_cast<size_t>(block_cache_size_mb * 1024 * 1024));
561 table_factory.reset(new BlockBasedTableFactory(block_based_options));
562 memtable_prefix_bloom_size_ratio = 0.02;
563 memtable_whole_key_filtering = true;
564 return this;
565 }
566
OptimizeLevelStyleCompaction(uint64_t memtable_memory_budget)567 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction(
568 uint64_t memtable_memory_budget) {
569 write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
570 // merge two memtables when flushing to L0
571 min_write_buffer_number_to_merge = 2;
572 // this means we'll use 50% extra memory in the worst case, but will reduce
573 // write stalls.
574 max_write_buffer_number = 6;
575 // start flushing L0->L1 as soon as possible. each file on level0 is
576 // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
577 // memtable_memory_budget.
578 level0_file_num_compaction_trigger = 2;
579 // doesn't really matter much, but we don't want to create too many files
580 target_file_size_base = memtable_memory_budget / 8;
581 // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
582 max_bytes_for_level_base = memtable_memory_budget;
583
584 // level style compaction
585 compaction_style = kCompactionStyleLevel;
586
587 // only compress levels >= 2
588 compression_per_level.resize(num_levels);
589 for (int i = 0; i < num_levels; ++i) {
590 if (i < 2) {
591 compression_per_level[i] = kNoCompression;
592 } else {
593 compression_per_level[i] =
594 LZ4_Supported()
595 ? kLZ4Compression
596 : (Snappy_Supported() ? kSnappyCompression : kNoCompression);
597 }
598 }
599 return this;
600 }
601
OptimizeUniversalStyleCompaction(uint64_t memtable_memory_budget)602 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction(
603 uint64_t memtable_memory_budget) {
604 write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
605 // merge two memtables when flushing to L0
606 min_write_buffer_number_to_merge = 2;
607 // this means we'll use 50% extra memory in the worst case, but will reduce
608 // write stalls.
609 max_write_buffer_number = 6;
610 // universal style compaction
611 compaction_style = kCompactionStyleUniversal;
612 compaction_options_universal.compression_size_percent = 80;
613 return this;
614 }
615
IncreaseParallelism(int total_threads)616 DBOptions* DBOptions::IncreaseParallelism(int total_threads) {
617 max_background_jobs = total_threads;
618 env->SetBackgroundThreads(total_threads, Env::LOW);
619 env->SetBackgroundThreads(1, Env::HIGH);
620 return this;
621 }
622
623 #endif // !ROCKSDB_LITE
624
ReadOptions()625 ReadOptions::ReadOptions()
626 : snapshot(nullptr),
627 iterate_lower_bound(nullptr),
628 iterate_upper_bound(nullptr),
629 readahead_size(0),
630 max_skippable_internal_keys(0),
631 read_tier(kReadAllTier),
632 verify_checksums(true),
633 fill_cache(true),
634 tailing(false),
635 managed(false),
636 total_order_seek(false),
637 auto_prefix_mode(false),
638 prefix_same_as_start(false),
639 pin_data(false),
640 background_purge_on_iterator_cleanup(false),
641 ignore_range_deletions(false),
642 iter_start_seqnum(0),
643 timestamp(nullptr),
644 iter_start_ts(nullptr),
645 deadline(std::chrono::microseconds::zero()),
646 io_timeout(std::chrono::microseconds::zero()),
647 value_size_soft_limit(std::numeric_limits<uint64_t>::max()) {}
648
ReadOptions(bool cksum,bool cache)649 ReadOptions::ReadOptions(bool cksum, bool cache)
650 : snapshot(nullptr),
651 iterate_lower_bound(nullptr),
652 iterate_upper_bound(nullptr),
653 readahead_size(0),
654 max_skippable_internal_keys(0),
655 read_tier(kReadAllTier),
656 verify_checksums(cksum),
657 fill_cache(cache),
658 tailing(false),
659 managed(false),
660 total_order_seek(false),
661 auto_prefix_mode(false),
662 prefix_same_as_start(false),
663 pin_data(false),
664 background_purge_on_iterator_cleanup(false),
665 ignore_range_deletions(false),
666 iter_start_seqnum(0),
667 timestamp(nullptr),
668 iter_start_ts(nullptr),
669 deadline(std::chrono::microseconds::zero()),
670 io_timeout(std::chrono::microseconds::zero()),
671 value_size_soft_limit(std::numeric_limits<uint64_t>::max()) {}
672
673 } // namespace ROCKSDB_NAMESPACE
674