1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include "rocksdb/options.h"
11 
12 #include <cinttypes>
13 #include <limits>
14 
15 #include "monitoring/statistics.h"
16 #include "options/db_options.h"
17 #include "options/options_helper.h"
18 #include "rocksdb/cache.h"
19 #include "rocksdb/compaction_filter.h"
20 #include "rocksdb/comparator.h"
21 #include "rocksdb/env.h"
22 #include "rocksdb/memtablerep.h"
23 #include "rocksdb/merge_operator.h"
24 #include "rocksdb/slice.h"
25 #include "rocksdb/slice_transform.h"
26 #include "rocksdb/sst_file_manager.h"
27 #include "rocksdb/table.h"
28 #include "rocksdb/table_properties.h"
29 #include "rocksdb/wal_filter.h"
30 #include "table/block_based/block_based_table_factory.h"
31 #include "util/compression.h"
32 
33 namespace ROCKSDB_NAMESPACE {
34 
AdvancedColumnFamilyOptions()35 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
36   assert(memtable_factory.get() != nullptr);
37 }
38 
AdvancedColumnFamilyOptions(const Options & options)39 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
40     : max_write_buffer_number(options.max_write_buffer_number),
41       min_write_buffer_number_to_merge(
42           options.min_write_buffer_number_to_merge),
43       max_write_buffer_number_to_maintain(
44           options.max_write_buffer_number_to_maintain),
45       max_write_buffer_size_to_maintain(
46           options.max_write_buffer_size_to_maintain),
47       inplace_update_support(options.inplace_update_support),
48       inplace_update_num_locks(options.inplace_update_num_locks),
49       inplace_callback(options.inplace_callback),
50       memtable_prefix_bloom_size_ratio(
51           options.memtable_prefix_bloom_size_ratio),
52       memtable_whole_key_filtering(options.memtable_whole_key_filtering),
53       memtable_huge_page_size(options.memtable_huge_page_size),
54       memtable_insert_with_hint_prefix_extractor(
55           options.memtable_insert_with_hint_prefix_extractor),
56       bloom_locality(options.bloom_locality),
57       arena_block_size(options.arena_block_size),
58       compression_per_level(options.compression_per_level),
59       num_levels(options.num_levels),
60       level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
61       level0_stop_writes_trigger(options.level0_stop_writes_trigger),
62       target_file_size_base(options.target_file_size_base),
63       target_file_size_multiplier(options.target_file_size_multiplier),
64       level_compaction_dynamic_level_bytes(
65           options.level_compaction_dynamic_level_bytes),
66       max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
67       max_bytes_for_level_multiplier_additional(
68           options.max_bytes_for_level_multiplier_additional),
69       max_compaction_bytes(options.max_compaction_bytes),
70       soft_pending_compaction_bytes_limit(
71           options.soft_pending_compaction_bytes_limit),
72       hard_pending_compaction_bytes_limit(
73           options.hard_pending_compaction_bytes_limit),
74       compaction_style(options.compaction_style),
75       compaction_pri(options.compaction_pri),
76       compaction_options_universal(options.compaction_options_universal),
77       compaction_options_fifo(options.compaction_options_fifo),
78       max_sequential_skip_in_iterations(
79           options.max_sequential_skip_in_iterations),
80       memtable_factory(options.memtable_factory),
81       table_properties_collector_factories(
82           options.table_properties_collector_factories),
83       max_successive_merges(options.max_successive_merges),
84       optimize_filters_for_hits(options.optimize_filters_for_hits),
85       paranoid_file_checks(options.paranoid_file_checks),
86       force_consistency_checks(options.force_consistency_checks),
87       report_bg_io_stats(options.report_bg_io_stats),
88       ttl(options.ttl),
89       periodic_compaction_seconds(options.periodic_compaction_seconds),
90       sample_for_compression(options.sample_for_compression) {
91   assert(memtable_factory.get() != nullptr);
92   if (max_bytes_for_level_multiplier_additional.size() <
93       static_cast<unsigned int>(num_levels)) {
94     max_bytes_for_level_multiplier_additional.resize(num_levels, 1);
95   }
96 }
97 
ColumnFamilyOptions()98 ColumnFamilyOptions::ColumnFamilyOptions()
99     : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
100       table_factory(
101           std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
102 
ColumnFamilyOptions(const Options & options)103 ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
104     : ColumnFamilyOptions(*static_cast<const ColumnFamilyOptions*>(&options)) {}
105 
DBOptions()106 DBOptions::DBOptions() {}
DBOptions(const Options & options)107 DBOptions::DBOptions(const Options& options)
108     : DBOptions(*static_cast<const DBOptions*>(&options)) {}
109 
Dump(Logger * log) const110 void DBOptions::Dump(Logger* log) const {
111     ImmutableDBOptions(*this).Dump(log);
112     MutableDBOptions(*this).Dump(log);
113 }  // DBOptions::Dump
114 
Dump(Logger * log) const115 void ColumnFamilyOptions::Dump(Logger* log) const {
116   ROCKS_LOG_HEADER(log, "              Options.comparator: %s",
117                    comparator->Name());
118   ROCKS_LOG_HEADER(log, "          Options.merge_operator: %s",
119                    merge_operator ? merge_operator->Name() : "None");
120   ROCKS_LOG_HEADER(log, "       Options.compaction_filter: %s",
121                    compaction_filter ? compaction_filter->Name() : "None");
122   ROCKS_LOG_HEADER(
123       log, "       Options.compaction_filter_factory: %s",
124       compaction_filter_factory ? compaction_filter_factory->Name() : "None");
125   ROCKS_LOG_HEADER(log, "        Options.memtable_factory: %s",
126                    memtable_factory->Name());
127   ROCKS_LOG_HEADER(log, "           Options.table_factory: %s",
128                    table_factory->Name());
129   ROCKS_LOG_HEADER(log, "           table_factory options: %s",
130                    table_factory->GetPrintableTableOptions().c_str());
131   ROCKS_LOG_HEADER(log, "       Options.write_buffer_size: %" ROCKSDB_PRIszt,
132                    write_buffer_size);
133   ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d",
134                    max_write_buffer_number);
135   if (!compression_per_level.empty()) {
136     for (unsigned int i = 0; i < compression_per_level.size(); i++) {
137       ROCKS_LOG_HEADER(
138           log, "       Options.compression[%d]: %s", i,
139           CompressionTypeToString(compression_per_level[i]).c_str());
140     }
141     } else {
142       ROCKS_LOG_HEADER(log, "         Options.compression: %s",
143                        CompressionTypeToString(compression).c_str());
144     }
145     ROCKS_LOG_HEADER(
146         log, "                 Options.bottommost_compression: %s",
147         bottommost_compression == kDisableCompressionOption
148             ? "Disabled"
149             : CompressionTypeToString(bottommost_compression).c_str());
150     ROCKS_LOG_HEADER(
151         log, "      Options.prefix_extractor: %s",
152         prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name());
153     ROCKS_LOG_HEADER(log,
154                      "  Options.memtable_insert_with_hint_prefix_extractor: %s",
155                      memtable_insert_with_hint_prefix_extractor == nullptr
156                          ? "nullptr"
157                          : memtable_insert_with_hint_prefix_extractor->Name());
158     ROCKS_LOG_HEADER(log, "            Options.num_levels: %d", num_levels);
159     ROCKS_LOG_HEADER(log, "       Options.min_write_buffer_number_to_merge: %d",
160                      min_write_buffer_number_to_merge);
161     ROCKS_LOG_HEADER(log, "    Options.max_write_buffer_number_to_maintain: %d",
162                      max_write_buffer_number_to_maintain);
163     ROCKS_LOG_HEADER(log,
164                      "    Options.max_write_buffer_size_to_maintain: %" PRIu64,
165                      max_write_buffer_size_to_maintain);
166     ROCKS_LOG_HEADER(
167         log, "           Options.bottommost_compression_opts.window_bits: %d",
168         bottommost_compression_opts.window_bits);
169     ROCKS_LOG_HEADER(
170         log, "                 Options.bottommost_compression_opts.level: %d",
171         bottommost_compression_opts.level);
172     ROCKS_LOG_HEADER(
173         log, "              Options.bottommost_compression_opts.strategy: %d",
174         bottommost_compression_opts.strategy);
175     ROCKS_LOG_HEADER(
176         log,
177         "        Options.bottommost_compression_opts.max_dict_bytes: "
178         "%" PRIu32,
179         bottommost_compression_opts.max_dict_bytes);
180     ROCKS_LOG_HEADER(
181         log,
182         "        Options.bottommost_compression_opts.zstd_max_train_bytes: "
183         "%" PRIu32,
184         bottommost_compression_opts.zstd_max_train_bytes);
185     ROCKS_LOG_HEADER(
186         log, "                 Options.bottommost_compression_opts.enabled: %s",
187         bottommost_compression_opts.enabled ? "true" : "false");
188     ROCKS_LOG_HEADER(log, "           Options.compression_opts.window_bits: %d",
189                      compression_opts.window_bits);
190     ROCKS_LOG_HEADER(log, "                 Options.compression_opts.level: %d",
191                      compression_opts.level);
192     ROCKS_LOG_HEADER(log, "              Options.compression_opts.strategy: %d",
193                      compression_opts.strategy);
194     ROCKS_LOG_HEADER(
195         log,
196         "        Options.compression_opts.max_dict_bytes: %" PRIu32,
197         compression_opts.max_dict_bytes);
198     ROCKS_LOG_HEADER(log,
199                      "        Options.compression_opts.zstd_max_train_bytes: "
200                      "%" PRIu32,
201                      compression_opts.zstd_max_train_bytes);
202     ROCKS_LOG_HEADER(log,
203                      "                 Options.compression_opts.enabled: %s",
204                      compression_opts.enabled ? "true" : "false");
205     ROCKS_LOG_HEADER(log, "     Options.level0_file_num_compaction_trigger: %d",
206                      level0_file_num_compaction_trigger);
207     ROCKS_LOG_HEADER(log, "         Options.level0_slowdown_writes_trigger: %d",
208                      level0_slowdown_writes_trigger);
209     ROCKS_LOG_HEADER(log, "             Options.level0_stop_writes_trigger: %d",
210                      level0_stop_writes_trigger);
211     ROCKS_LOG_HEADER(
212         log, "                  Options.target_file_size_base: %" PRIu64,
213         target_file_size_base);
214     ROCKS_LOG_HEADER(log, "            Options.target_file_size_multiplier: %d",
215                      target_file_size_multiplier);
216     ROCKS_LOG_HEADER(
217         log, "               Options.max_bytes_for_level_base: %" PRIu64,
218         max_bytes_for_level_base);
219     ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d",
220                      level_compaction_dynamic_level_bytes);
221     ROCKS_LOG_HEADER(log, "         Options.max_bytes_for_level_multiplier: %f",
222                      max_bytes_for_level_multiplier);
223     for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size();
224          i++) {
225       ROCKS_LOG_HEADER(
226           log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt
227                "]: %d",
228           i, max_bytes_for_level_multiplier_additional[i]);
229     }
230     ROCKS_LOG_HEADER(
231         log, "      Options.max_sequential_skip_in_iterations: %" PRIu64,
232         max_sequential_skip_in_iterations);
233     ROCKS_LOG_HEADER(
234         log, "                   Options.max_compaction_bytes: %" PRIu64,
235         max_compaction_bytes);
236     ROCKS_LOG_HEADER(
237         log,
238         "                       Options.arena_block_size: %" ROCKSDB_PRIszt,
239         arena_block_size);
240     ROCKS_LOG_HEADER(log,
241                      "  Options.soft_pending_compaction_bytes_limit: %" PRIu64,
242                      soft_pending_compaction_bytes_limit);
243     ROCKS_LOG_HEADER(log,
244                      "  Options.hard_pending_compaction_bytes_limit: %" PRIu64,
245                      hard_pending_compaction_bytes_limit);
246     ROCKS_LOG_HEADER(log, "      Options.rate_limit_delay_max_milliseconds: %u",
247                      rate_limit_delay_max_milliseconds);
248     ROCKS_LOG_HEADER(log, "               Options.disable_auto_compactions: %d",
249                      disable_auto_compactions);
250 
251     const auto& it_compaction_style =
252         compaction_style_to_string.find(compaction_style);
253     std::string str_compaction_style;
254     if (it_compaction_style == compaction_style_to_string.end()) {
255       assert(false);
256       str_compaction_style = "unknown_" + std::to_string(compaction_style);
257     } else {
258       str_compaction_style = it_compaction_style->second;
259     }
260     ROCKS_LOG_HEADER(log,
261                      "                       Options.compaction_style: %s",
262                      str_compaction_style.c_str());
263 
264     const auto& it_compaction_pri =
265         compaction_pri_to_string.find(compaction_pri);
266     std::string str_compaction_pri;
267     if (it_compaction_pri == compaction_pri_to_string.end()) {
268       assert(false);
269       str_compaction_pri = "unknown_" + std::to_string(compaction_pri);
270     } else {
271       str_compaction_pri = it_compaction_pri->second;
272     }
273     ROCKS_LOG_HEADER(log,
274                      "                         Options.compaction_pri: %s",
275                      str_compaction_pri.c_str());
276     ROCKS_LOG_HEADER(log,
277                      "Options.compaction_options_universal.size_ratio: %u",
278                      compaction_options_universal.size_ratio);
279     ROCKS_LOG_HEADER(log,
280                      "Options.compaction_options_universal.min_merge_width: %u",
281                      compaction_options_universal.min_merge_width);
282     ROCKS_LOG_HEADER(log,
283                      "Options.compaction_options_universal.max_merge_width: %u",
284                      compaction_options_universal.max_merge_width);
285     ROCKS_LOG_HEADER(
286         log,
287         "Options.compaction_options_universal."
288         "max_size_amplification_percent: %u",
289         compaction_options_universal.max_size_amplification_percent);
290     ROCKS_LOG_HEADER(
291         log,
292         "Options.compaction_options_universal.compression_size_percent: %d",
293         compaction_options_universal.compression_size_percent);
294     const auto& it_compaction_stop_style = compaction_stop_style_to_string.find(
295         compaction_options_universal.stop_style);
296     std::string str_compaction_stop_style;
297     if (it_compaction_stop_style == compaction_stop_style_to_string.end()) {
298       assert(false);
299       str_compaction_stop_style =
300           "unknown_" + std::to_string(compaction_options_universal.stop_style);
301     } else {
302       str_compaction_stop_style = it_compaction_stop_style->second;
303     }
304     ROCKS_LOG_HEADER(log,
305                      "Options.compaction_options_universal.stop_style: %s",
306                      str_compaction_stop_style.c_str());
307     ROCKS_LOG_HEADER(
308         log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
309         compaction_options_fifo.max_table_files_size);
310     ROCKS_LOG_HEADER(log,
311                      "Options.compaction_options_fifo.allow_compaction: %d",
312                      compaction_options_fifo.allow_compaction);
313     std::string collector_names;
314     for (const auto& collector_factory : table_properties_collector_factories) {
315       collector_names.append(collector_factory->Name());
316       collector_names.append("; ");
317     }
318     ROCKS_LOG_HEADER(
319         log, "                  Options.table_properties_collectors: %s",
320         collector_names.c_str());
321     ROCKS_LOG_HEADER(log,
322                      "                  Options.inplace_update_support: %d",
323                      inplace_update_support);
324     ROCKS_LOG_HEADER(
325         log,
326         "                Options.inplace_update_num_locks: %" ROCKSDB_PRIszt,
327         inplace_update_num_locks);
328     // TODO: easier config for bloom (maybe based on avg key/value size)
329     ROCKS_LOG_HEADER(
330         log, "              Options.memtable_prefix_bloom_size_ratio: %f",
331         memtable_prefix_bloom_size_ratio);
332     ROCKS_LOG_HEADER(log,
333                      "              Options.memtable_whole_key_filtering: %d",
334                      memtable_whole_key_filtering);
335 
336     ROCKS_LOG_HEADER(log, "  Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
337                      memtable_huge_page_size);
338     ROCKS_LOG_HEADER(log,
339                      "                          Options.bloom_locality: %d",
340                      bloom_locality);
341 
342     ROCKS_LOG_HEADER(
343         log,
344         "                   Options.max_successive_merges: %" ROCKSDB_PRIszt,
345         max_successive_merges);
346     ROCKS_LOG_HEADER(log,
347                      "               Options.optimize_filters_for_hits: %d",
348                      optimize_filters_for_hits);
349     ROCKS_LOG_HEADER(log, "               Options.paranoid_file_checks: %d",
350                      paranoid_file_checks);
351     ROCKS_LOG_HEADER(log, "               Options.force_consistency_checks: %d",
352                      force_consistency_checks);
353     ROCKS_LOG_HEADER(log, "               Options.report_bg_io_stats: %d",
354                      report_bg_io_stats);
355     ROCKS_LOG_HEADER(log, "                              Options.ttl: %" PRIu64,
356                      ttl);
357     ROCKS_LOG_HEADER(log,
358                      "         Options.periodic_compaction_seconds: %" PRIu64,
359                      periodic_compaction_seconds);
360 }  // ColumnFamilyOptions::Dump
361 
Dump(Logger * log) const362 void Options::Dump(Logger* log) const {
363   DBOptions::Dump(log);
364   ColumnFamilyOptions::Dump(log);
365 }   // Options::Dump
366 
DumpCFOptions(Logger * log) const367 void Options::DumpCFOptions(Logger* log) const {
368   ColumnFamilyOptions::Dump(log);
369 }  // Options::DumpCFOptions
370 
371 //
372 // The goal of this method is to create a configuration that
373 // allows an application to write all files into L0 and
374 // then do a single compaction to output all files into L1.
375 Options*
PrepareForBulkLoad()376 Options::PrepareForBulkLoad()
377 {
378   // never slowdown ingest.
379   level0_file_num_compaction_trigger = (1<<30);
380   level0_slowdown_writes_trigger = (1<<30);
381   level0_stop_writes_trigger = (1<<30);
382   soft_pending_compaction_bytes_limit = 0;
383   hard_pending_compaction_bytes_limit = 0;
384 
385   // no auto compactions please. The application should issue a
386   // manual compaction after all data is loaded into L0.
387   disable_auto_compactions = true;
388   // A manual compaction run should pick all files in L0 in
389   // a single compaction run.
390   max_compaction_bytes = (static_cast<uint64_t>(1) << 60);
391 
392   // It is better to have only 2 levels, otherwise a manual
393   // compaction would compact at every possible level, thereby
394   // increasing the total time needed for compactions.
395   num_levels = 2;
396 
397   // Need to allow more write buffers to allow more parallism
398   // of flushes.
399   max_write_buffer_number = 6;
400   min_write_buffer_number_to_merge = 1;
401 
402   // When compaction is disabled, more parallel flush threads can
403   // help with write throughput.
404   max_background_flushes = 4;
405 
406   // Prevent a memtable flush to automatically promote files
407   // to L1. This is helpful so that all files that are
408   // input to the manual compaction are all at L0.
409   max_background_compactions = 2;
410 
411   // The compaction would create large files in L1.
412   target_file_size_base = 256 * 1024 * 1024;
413   return this;
414 }
415 
OptimizeForSmallDb()416 Options* Options::OptimizeForSmallDb() {
417   // 16MB block cache
418   std::shared_ptr<Cache> cache = NewLRUCache(16 << 20);
419 
420   ColumnFamilyOptions::OptimizeForSmallDb(&cache);
421   DBOptions::OptimizeForSmallDb(&cache);
422   return this;
423 }
424 
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)425 Options* Options::OldDefaults(int rocksdb_major_version,
426                               int rocksdb_minor_version) {
427   ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
428                                    rocksdb_minor_version);
429   DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version);
430   return this;
431 }
432 
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)433 DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
434                                   int rocksdb_minor_version) {
435   if (rocksdb_major_version < 4 ||
436       (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
437     max_file_opening_threads = 1;
438     table_cache_numshardbits = 4;
439   }
440   if (rocksdb_major_version < 5 ||
441       (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
442     delayed_write_rate = 2 * 1024U * 1024U;
443   } else if (rocksdb_major_version < 5 ||
444              (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
445     delayed_write_rate = 16 * 1024U * 1024U;
446   }
447   max_open_files = 5000;
448   wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
449   return this;
450 }
451 
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)452 ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
453     int rocksdb_major_version, int rocksdb_minor_version) {
454   if (rocksdb_major_version < 5 ||
455       (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
456     compaction_pri = CompactionPri::kByCompensatedSize;
457   }
458   if (rocksdb_major_version < 4 ||
459       (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
460     write_buffer_size = 4 << 20;
461     target_file_size_base = 2 * 1048576;
462     max_bytes_for_level_base = 10 * 1048576;
463     soft_pending_compaction_bytes_limit = 0;
464     hard_pending_compaction_bytes_limit = 0;
465   }
466   if (rocksdb_major_version < 5) {
467     level0_stop_writes_trigger = 24;
468   } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
469     level0_stop_writes_trigger = 30;
470   }
471 
472   return this;
473 }
474 
475 // Optimization functions
OptimizeForSmallDb(std::shared_ptr<Cache> * cache)476 DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr<Cache>* cache) {
477   max_file_opening_threads = 1;
478   max_open_files = 5000;
479 
480   // Cost memtable to block cache too.
481   std::shared_ptr<ROCKSDB_NAMESPACE::WriteBufferManager> wbm =
482       std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(
483           0, (cache != nullptr) ? *cache : std::shared_ptr<Cache>());
484   write_buffer_manager = wbm;
485 
486   return this;
487 }
488 
OptimizeForSmallDb(std::shared_ptr<Cache> * cache)489 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb(
490     std::shared_ptr<Cache>* cache) {
491   write_buffer_size = 2 << 20;
492   target_file_size_base = 2 * 1048576;
493   max_bytes_for_level_base = 10 * 1048576;
494   soft_pending_compaction_bytes_limit = 256 * 1048576;
495   hard_pending_compaction_bytes_limit = 1073741824ul;
496 
497   BlockBasedTableOptions table_options;
498   table_options.block_cache =
499       (cache != nullptr) ? *cache : std::shared_ptr<Cache>();
500   table_options.cache_index_and_filter_blocks = true;
501   // Two level iterator to avoid LRU cache imbalance
502   table_options.index_type =
503       BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
504   table_factory.reset(new BlockBasedTableFactory(table_options));
505 
506   return this;
507 }
508 
509 #ifndef ROCKSDB_LITE
OptimizeForPointLookup(uint64_t block_cache_size_mb)510 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup(
511     uint64_t block_cache_size_mb) {
512   BlockBasedTableOptions block_based_options;
513   block_based_options.data_block_index_type =
514       BlockBasedTableOptions::kDataBlockBinaryAndHash;
515   block_based_options.data_block_hash_table_util_ratio = 0.75;
516   block_based_options.filter_policy.reset(NewBloomFilterPolicy(10));
517   block_based_options.block_cache =
518       NewLRUCache(static_cast<size_t>(block_cache_size_mb * 1024 * 1024));
519   table_factory.reset(new BlockBasedTableFactory(block_based_options));
520   memtable_prefix_bloom_size_ratio = 0.02;
521   memtable_whole_key_filtering = true;
522   return this;
523 }
524 
OptimizeLevelStyleCompaction(uint64_t memtable_memory_budget)525 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction(
526     uint64_t memtable_memory_budget) {
527   write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
528   // merge two memtables when flushing to L0
529   min_write_buffer_number_to_merge = 2;
530   // this means we'll use 50% extra memory in the worst case, but will reduce
531   // write stalls.
532   max_write_buffer_number = 6;
533   // start flushing L0->L1 as soon as possible. each file on level0 is
534   // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
535   // memtable_memory_budget.
536   level0_file_num_compaction_trigger = 2;
537   // doesn't really matter much, but we don't want to create too many files
538   target_file_size_base = memtable_memory_budget / 8;
539   // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
540   max_bytes_for_level_base = memtable_memory_budget;
541 
542   // level style compaction
543   compaction_style = kCompactionStyleLevel;
544 
545   // only compress levels >= 2
546   compression_per_level.resize(num_levels);
547   for (int i = 0; i < num_levels; ++i) {
548     if (i < 2) {
549       compression_per_level[i] = kNoCompression;
550     } else {
551       compression_per_level[i] =
552           LZ4_Supported()
553               ? kLZ4Compression
554               : (Snappy_Supported() ? kSnappyCompression : kNoCompression);
555     }
556   }
557   return this;
558 }
559 
OptimizeUniversalStyleCompaction(uint64_t memtable_memory_budget)560 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction(
561     uint64_t memtable_memory_budget) {
562   write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
563   // merge two memtables when flushing to L0
564   min_write_buffer_number_to_merge = 2;
565   // this means we'll use 50% extra memory in the worst case, but will reduce
566   // write stalls.
567   max_write_buffer_number = 6;
568   // universal style compaction
569   compaction_style = kCompactionStyleUniversal;
570   compaction_options_universal.compression_size_percent = 80;
571   return this;
572 }
573 
IncreaseParallelism(int total_threads)574 DBOptions* DBOptions::IncreaseParallelism(int total_threads) {
575   max_background_jobs = total_threads;
576   env->SetBackgroundThreads(total_threads, Env::LOW);
577   env->SetBackgroundThreads(1, Env::HIGH);
578   return this;
579 }
580 
581 #endif  // !ROCKSDB_LITE
582 
ReadOptions()583 ReadOptions::ReadOptions()
584     : snapshot(nullptr),
585       iterate_lower_bound(nullptr),
586       iterate_upper_bound(nullptr),
587       readahead_size(0),
588       max_skippable_internal_keys(0),
589       read_tier(kReadAllTier),
590       verify_checksums(true),
591       fill_cache(true),
592       tailing(false),
593       managed(false),
594       total_order_seek(false),
595       auto_prefix_mode(false),
596       prefix_same_as_start(false),
597       pin_data(false),
598       background_purge_on_iterator_cleanup(false),
599       ignore_range_deletions(false),
600       iter_start_seqnum(0),
601       timestamp(nullptr) {}
602 
ReadOptions(bool cksum,bool cache)603 ReadOptions::ReadOptions(bool cksum, bool cache)
604     : snapshot(nullptr),
605       iterate_lower_bound(nullptr),
606       iterate_upper_bound(nullptr),
607       readahead_size(0),
608       max_skippable_internal_keys(0),
609       read_tier(kReadAllTier),
610       verify_checksums(cksum),
611       fill_cache(cache),
612       tailing(false),
613       managed(false),
614       total_order_seek(false),
615       auto_prefix_mode(false),
616       prefix_same_as_start(false),
617       pin_data(false),
618       background_purge_on_iterator_cleanup(false),
619       ignore_range_deletions(false),
620       iter_start_seqnum(0),
621       timestamp(nullptr) {}
622 
623 }  // namespace ROCKSDB_NAMESPACE
624