1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include "rocksdb/options.h"
11 
12 #include <cinttypes>
13 #include <limits>
14 
15 #include "monitoring/statistics.h"
16 #include "options/db_options.h"
17 #include "options/options_helper.h"
18 #include "rocksdb/cache.h"
19 #include "rocksdb/compaction_filter.h"
20 #include "rocksdb/comparator.h"
21 #include "rocksdb/env.h"
22 #include "rocksdb/memtablerep.h"
23 #include "rocksdb/merge_operator.h"
24 #include "rocksdb/slice.h"
25 #include "rocksdb/slice_transform.h"
26 #include "rocksdb/sst_file_manager.h"
27 #include "rocksdb/sst_partitioner.h"
28 #include "rocksdb/table.h"
29 #include "rocksdb/table_properties.h"
30 #include "rocksdb/wal_filter.h"
31 #include "table/block_based/block_based_table_factory.h"
32 #include "util/compression.h"
33 
34 namespace ROCKSDB_NAMESPACE {
35 
AdvancedColumnFamilyOptions()36 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
37   assert(memtable_factory.get() != nullptr);
38 }
39 
AdvancedColumnFamilyOptions(const Options & options)40 AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
41     : max_write_buffer_number(options.max_write_buffer_number),
42       min_write_buffer_number_to_merge(
43           options.min_write_buffer_number_to_merge),
44       max_write_buffer_number_to_maintain(
45           options.max_write_buffer_number_to_maintain),
46       max_write_buffer_size_to_maintain(
47           options.max_write_buffer_size_to_maintain),
48       inplace_update_support(options.inplace_update_support),
49       inplace_update_num_locks(options.inplace_update_num_locks),
50       inplace_callback(options.inplace_callback),
51       memtable_prefix_bloom_size_ratio(
52           options.memtable_prefix_bloom_size_ratio),
53       memtable_whole_key_filtering(options.memtable_whole_key_filtering),
54       memtable_huge_page_size(options.memtable_huge_page_size),
55       memtable_insert_with_hint_prefix_extractor(
56           options.memtable_insert_with_hint_prefix_extractor),
57       bloom_locality(options.bloom_locality),
58       arena_block_size(options.arena_block_size),
59       compression_per_level(options.compression_per_level),
60       num_levels(options.num_levels),
61       level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
62       level0_stop_writes_trigger(options.level0_stop_writes_trigger),
63       target_file_size_base(options.target_file_size_base),
64       target_file_size_multiplier(options.target_file_size_multiplier),
65       level_compaction_dynamic_level_bytes(
66           options.level_compaction_dynamic_level_bytes),
67       max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
68       max_bytes_for_level_multiplier_additional(
69           options.max_bytes_for_level_multiplier_additional),
70       max_compaction_bytes(options.max_compaction_bytes),
71       soft_pending_compaction_bytes_limit(
72           options.soft_pending_compaction_bytes_limit),
73       hard_pending_compaction_bytes_limit(
74           options.hard_pending_compaction_bytes_limit),
75       compaction_style(options.compaction_style),
76       compaction_pri(options.compaction_pri),
77       compaction_options_universal(options.compaction_options_universal),
78       compaction_options_fifo(options.compaction_options_fifo),
79       max_sequential_skip_in_iterations(
80           options.max_sequential_skip_in_iterations),
81       memtable_factory(options.memtable_factory),
82       table_properties_collector_factories(
83           options.table_properties_collector_factories),
84       max_successive_merges(options.max_successive_merges),
85       optimize_filters_for_hits(options.optimize_filters_for_hits),
86       paranoid_file_checks(options.paranoid_file_checks),
87       force_consistency_checks(options.force_consistency_checks),
88       report_bg_io_stats(options.report_bg_io_stats),
89       ttl(options.ttl),
90       periodic_compaction_seconds(options.periodic_compaction_seconds),
91       sample_for_compression(options.sample_for_compression),
92       enable_blob_files(options.enable_blob_files),
93       min_blob_size(options.min_blob_size),
94       blob_file_size(options.blob_file_size),
95       blob_compression_type(options.blob_compression_type),
96       enable_blob_garbage_collection(options.enable_blob_garbage_collection),
97       blob_garbage_collection_age_cutoff(
98           options.blob_garbage_collection_age_cutoff) {
99   assert(memtable_factory.get() != nullptr);
100   if (max_bytes_for_level_multiplier_additional.size() <
101       static_cast<unsigned int>(num_levels)) {
102     max_bytes_for_level_multiplier_additional.resize(num_levels, 1);
103   }
104 }
105 
ColumnFamilyOptions()106 ColumnFamilyOptions::ColumnFamilyOptions()
107     : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
108       table_factory(
109           std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
110 
ColumnFamilyOptions(const Options & options)111 ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
112     : ColumnFamilyOptions(*static_cast<const ColumnFamilyOptions*>(&options)) {}
113 
DBOptions()114 DBOptions::DBOptions() {}
DBOptions(const Options & options)115 DBOptions::DBOptions(const Options& options)
116     : DBOptions(*static_cast<const DBOptions*>(&options)) {}
117 
Dump(Logger * log) const118 void DBOptions::Dump(Logger* log) const {
119     ImmutableDBOptions(*this).Dump(log);
120     MutableDBOptions(*this).Dump(log);
121 }  // DBOptions::Dump
122 
Dump(Logger * log) const123 void ColumnFamilyOptions::Dump(Logger* log) const {
124   ROCKS_LOG_HEADER(log, "              Options.comparator: %s",
125                    comparator->Name());
126   ROCKS_LOG_HEADER(log, "          Options.merge_operator: %s",
127                    merge_operator ? merge_operator->Name() : "None");
128   ROCKS_LOG_HEADER(log, "       Options.compaction_filter: %s",
129                    compaction_filter ? compaction_filter->Name() : "None");
130   ROCKS_LOG_HEADER(
131       log, "       Options.compaction_filter_factory: %s",
132       compaction_filter_factory ? compaction_filter_factory->Name() : "None");
133   ROCKS_LOG_HEADER(
134       log, " Options.sst_partitioner_factory: %s",
135       sst_partitioner_factory ? sst_partitioner_factory->Name() : "None");
136   ROCKS_LOG_HEADER(log, "        Options.memtable_factory: %s",
137                    memtable_factory->Name());
138   ROCKS_LOG_HEADER(log, "           Options.table_factory: %s",
139                    table_factory->Name());
140   ROCKS_LOG_HEADER(log, "           table_factory options: %s",
141                    table_factory->GetPrintableOptions().c_str());
142   ROCKS_LOG_HEADER(log, "       Options.write_buffer_size: %" ROCKSDB_PRIszt,
143                    write_buffer_size);
144   ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d",
145                    max_write_buffer_number);
146   if (!compression_per_level.empty()) {
147     for (unsigned int i = 0; i < compression_per_level.size(); i++) {
148       ROCKS_LOG_HEADER(
149           log, "       Options.compression[%d]: %s", i,
150           CompressionTypeToString(compression_per_level[i]).c_str());
151     }
152     } else {
153       ROCKS_LOG_HEADER(log, "         Options.compression: %s",
154                        CompressionTypeToString(compression).c_str());
155     }
156     ROCKS_LOG_HEADER(
157         log, "                 Options.bottommost_compression: %s",
158         bottommost_compression == kDisableCompressionOption
159             ? "Disabled"
160             : CompressionTypeToString(bottommost_compression).c_str());
161     ROCKS_LOG_HEADER(
162         log, "      Options.prefix_extractor: %s",
163         prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name());
164     ROCKS_LOG_HEADER(log,
165                      "  Options.memtable_insert_with_hint_prefix_extractor: %s",
166                      memtable_insert_with_hint_prefix_extractor == nullptr
167                          ? "nullptr"
168                          : memtable_insert_with_hint_prefix_extractor->Name());
169     ROCKS_LOG_HEADER(log, "            Options.num_levels: %d", num_levels);
170     ROCKS_LOG_HEADER(log, "       Options.min_write_buffer_number_to_merge: %d",
171                      min_write_buffer_number_to_merge);
172     ROCKS_LOG_HEADER(log, "    Options.max_write_buffer_number_to_maintain: %d",
173                      max_write_buffer_number_to_maintain);
174     ROCKS_LOG_HEADER(log,
175                      "    Options.max_write_buffer_size_to_maintain: %" PRIu64,
176                      max_write_buffer_size_to_maintain);
177     ROCKS_LOG_HEADER(
178         log, "           Options.bottommost_compression_opts.window_bits: %d",
179         bottommost_compression_opts.window_bits);
180     ROCKS_LOG_HEADER(
181         log, "                 Options.bottommost_compression_opts.level: %d",
182         bottommost_compression_opts.level);
183     ROCKS_LOG_HEADER(
184         log, "              Options.bottommost_compression_opts.strategy: %d",
185         bottommost_compression_opts.strategy);
186     ROCKS_LOG_HEADER(
187         log,
188         "        Options.bottommost_compression_opts.max_dict_bytes: "
189         "%" PRIu32,
190         bottommost_compression_opts.max_dict_bytes);
191     ROCKS_LOG_HEADER(
192         log,
193         "        Options.bottommost_compression_opts.zstd_max_train_bytes: "
194         "%" PRIu32,
195         bottommost_compression_opts.zstd_max_train_bytes);
196     ROCKS_LOG_HEADER(
197         log,
198         "        Options.bottommost_compression_opts.parallel_threads: "
199         "%" PRIu32,
200         bottommost_compression_opts.parallel_threads);
201     ROCKS_LOG_HEADER(
202         log, "                 Options.bottommost_compression_opts.enabled: %s",
203         bottommost_compression_opts.enabled ? "true" : "false");
204     ROCKS_LOG_HEADER(
205         log,
206         "        Options.bottommost_compression_opts.max_dict_buffer_bytes: "
207         "%" PRIu64,
208         bottommost_compression_opts.max_dict_buffer_bytes);
209     ROCKS_LOG_HEADER(log, "           Options.compression_opts.window_bits: %d",
210                      compression_opts.window_bits);
211     ROCKS_LOG_HEADER(log, "                 Options.compression_opts.level: %d",
212                      compression_opts.level);
213     ROCKS_LOG_HEADER(log, "              Options.compression_opts.strategy: %d",
214                      compression_opts.strategy);
215     ROCKS_LOG_HEADER(
216         log,
217         "        Options.compression_opts.max_dict_bytes: %" PRIu32,
218         compression_opts.max_dict_bytes);
219     ROCKS_LOG_HEADER(log,
220                      "        Options.compression_opts.zstd_max_train_bytes: "
221                      "%" PRIu32,
222                      compression_opts.zstd_max_train_bytes);
223     ROCKS_LOG_HEADER(log,
224                      "        Options.compression_opts.parallel_threads: "
225                      "%" PRIu32,
226                      compression_opts.parallel_threads);
227     ROCKS_LOG_HEADER(log,
228                      "                 Options.compression_opts.enabled: %s",
229                      compression_opts.enabled ? "true" : "false");
230     ROCKS_LOG_HEADER(log,
231                      "        Options.compression_opts.max_dict_buffer_bytes: "
232                      "%" PRIu64,
233                      compression_opts.max_dict_buffer_bytes);
234     ROCKS_LOG_HEADER(log, "     Options.level0_file_num_compaction_trigger: %d",
235                      level0_file_num_compaction_trigger);
236     ROCKS_LOG_HEADER(log, "         Options.level0_slowdown_writes_trigger: %d",
237                      level0_slowdown_writes_trigger);
238     ROCKS_LOG_HEADER(log, "             Options.level0_stop_writes_trigger: %d",
239                      level0_stop_writes_trigger);
240     ROCKS_LOG_HEADER(
241         log, "                  Options.target_file_size_base: %" PRIu64,
242         target_file_size_base);
243     ROCKS_LOG_HEADER(log, "            Options.target_file_size_multiplier: %d",
244                      target_file_size_multiplier);
245     ROCKS_LOG_HEADER(
246         log, "               Options.max_bytes_for_level_base: %" PRIu64,
247         max_bytes_for_level_base);
248     ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d",
249                      level_compaction_dynamic_level_bytes);
250     ROCKS_LOG_HEADER(log, "         Options.max_bytes_for_level_multiplier: %f",
251                      max_bytes_for_level_multiplier);
252     for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size();
253          i++) {
254       ROCKS_LOG_HEADER(
255           log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt
256                "]: %d",
257           i, max_bytes_for_level_multiplier_additional[i]);
258     }
259     ROCKS_LOG_HEADER(
260         log, "      Options.max_sequential_skip_in_iterations: %" PRIu64,
261         max_sequential_skip_in_iterations);
262     ROCKS_LOG_HEADER(
263         log, "                   Options.max_compaction_bytes: %" PRIu64,
264         max_compaction_bytes);
265     ROCKS_LOG_HEADER(
266         log,
267         "                       Options.arena_block_size: %" ROCKSDB_PRIszt,
268         arena_block_size);
269     ROCKS_LOG_HEADER(log,
270                      "  Options.soft_pending_compaction_bytes_limit: %" PRIu64,
271                      soft_pending_compaction_bytes_limit);
272     ROCKS_LOG_HEADER(log,
273                      "  Options.hard_pending_compaction_bytes_limit: %" PRIu64,
274                      hard_pending_compaction_bytes_limit);
275     ROCKS_LOG_HEADER(log, "      Options.rate_limit_delay_max_milliseconds: %u",
276                      rate_limit_delay_max_milliseconds);
277     ROCKS_LOG_HEADER(log, "               Options.disable_auto_compactions: %d",
278                      disable_auto_compactions);
279 
280     const auto& it_compaction_style =
281         compaction_style_to_string.find(compaction_style);
282     std::string str_compaction_style;
283     if (it_compaction_style == compaction_style_to_string.end()) {
284       assert(false);
285       str_compaction_style = "unknown_" + std::to_string(compaction_style);
286     } else {
287       str_compaction_style = it_compaction_style->second;
288     }
289     ROCKS_LOG_HEADER(log,
290                      "                       Options.compaction_style: %s",
291                      str_compaction_style.c_str());
292 
293     const auto& it_compaction_pri =
294         compaction_pri_to_string.find(compaction_pri);
295     std::string str_compaction_pri;
296     if (it_compaction_pri == compaction_pri_to_string.end()) {
297       assert(false);
298       str_compaction_pri = "unknown_" + std::to_string(compaction_pri);
299     } else {
300       str_compaction_pri = it_compaction_pri->second;
301     }
302     ROCKS_LOG_HEADER(log,
303                      "                         Options.compaction_pri: %s",
304                      str_compaction_pri.c_str());
305     ROCKS_LOG_HEADER(log,
306                      "Options.compaction_options_universal.size_ratio: %u",
307                      compaction_options_universal.size_ratio);
308     ROCKS_LOG_HEADER(log,
309                      "Options.compaction_options_universal.min_merge_width: %u",
310                      compaction_options_universal.min_merge_width);
311     ROCKS_LOG_HEADER(log,
312                      "Options.compaction_options_universal.max_merge_width: %u",
313                      compaction_options_universal.max_merge_width);
314     ROCKS_LOG_HEADER(
315         log,
316         "Options.compaction_options_universal."
317         "max_size_amplification_percent: %u",
318         compaction_options_universal.max_size_amplification_percent);
319     ROCKS_LOG_HEADER(
320         log,
321         "Options.compaction_options_universal.compression_size_percent: %d",
322         compaction_options_universal.compression_size_percent);
323     const auto& it_compaction_stop_style = compaction_stop_style_to_string.find(
324         compaction_options_universal.stop_style);
325     std::string str_compaction_stop_style;
326     if (it_compaction_stop_style == compaction_stop_style_to_string.end()) {
327       assert(false);
328       str_compaction_stop_style =
329           "unknown_" + std::to_string(compaction_options_universal.stop_style);
330     } else {
331       str_compaction_stop_style = it_compaction_stop_style->second;
332     }
333     ROCKS_LOG_HEADER(log,
334                      "Options.compaction_options_universal.stop_style: %s",
335                      str_compaction_stop_style.c_str());
336     ROCKS_LOG_HEADER(
337         log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
338         compaction_options_fifo.max_table_files_size);
339     ROCKS_LOG_HEADER(log,
340                      "Options.compaction_options_fifo.allow_compaction: %d",
341                      compaction_options_fifo.allow_compaction);
342     std::ostringstream collector_info;
343     for (const auto& collector_factory : table_properties_collector_factories) {
344       collector_info << collector_factory->ToString() << ';';
345     }
346     ROCKS_LOG_HEADER(
347         log, "                  Options.table_properties_collectors: %s",
348         collector_info.str().c_str());
349     ROCKS_LOG_HEADER(log,
350                      "                  Options.inplace_update_support: %d",
351                      inplace_update_support);
352     ROCKS_LOG_HEADER(
353         log,
354         "                Options.inplace_update_num_locks: %" ROCKSDB_PRIszt,
355         inplace_update_num_locks);
356     // TODO: easier config for bloom (maybe based on avg key/value size)
357     ROCKS_LOG_HEADER(
358         log, "              Options.memtable_prefix_bloom_size_ratio: %f",
359         memtable_prefix_bloom_size_ratio);
360     ROCKS_LOG_HEADER(log,
361                      "              Options.memtable_whole_key_filtering: %d",
362                      memtable_whole_key_filtering);
363 
364     ROCKS_LOG_HEADER(log, "  Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
365                      memtable_huge_page_size);
366     ROCKS_LOG_HEADER(log,
367                      "                          Options.bloom_locality: %d",
368                      bloom_locality);
369 
370     ROCKS_LOG_HEADER(
371         log,
372         "                   Options.max_successive_merges: %" ROCKSDB_PRIszt,
373         max_successive_merges);
374     ROCKS_LOG_HEADER(log,
375                      "               Options.optimize_filters_for_hits: %d",
376                      optimize_filters_for_hits);
377     ROCKS_LOG_HEADER(log, "               Options.paranoid_file_checks: %d",
378                      paranoid_file_checks);
379     ROCKS_LOG_HEADER(log, "               Options.force_consistency_checks: %d",
380                      force_consistency_checks);
381     ROCKS_LOG_HEADER(log, "               Options.report_bg_io_stats: %d",
382                      report_bg_io_stats);
383     ROCKS_LOG_HEADER(log, "                              Options.ttl: %" PRIu64,
384                      ttl);
385     ROCKS_LOG_HEADER(log,
386                      "         Options.periodic_compaction_seconds: %" PRIu64,
387                      periodic_compaction_seconds);
388     ROCKS_LOG_HEADER(log, "                   Options.enable_blob_files: %s",
389                      enable_blob_files ? "true" : "false");
390     ROCKS_LOG_HEADER(log,
391                      "                       Options.min_blob_size: %" PRIu64,
392                      min_blob_size);
393     ROCKS_LOG_HEADER(log,
394                      "                      Options.blob_file_size: %" PRIu64,
395                      blob_file_size);
396     ROCKS_LOG_HEADER(log, "               Options.blob_compression_type: %s",
397                      CompressionTypeToString(blob_compression_type).c_str());
398     ROCKS_LOG_HEADER(log, "      Options.enable_blob_garbage_collection: %s",
399                      enable_blob_garbage_collection ? "true" : "false");
400     ROCKS_LOG_HEADER(log, "  Options.blob_garbage_collection_age_cutoff: %f",
401                      blob_garbage_collection_age_cutoff);
402 }  // ColumnFamilyOptions::Dump
403 
Dump(Logger * log) const404 void Options::Dump(Logger* log) const {
405   DBOptions::Dump(log);
406   ColumnFamilyOptions::Dump(log);
407 }   // Options::Dump
408 
DumpCFOptions(Logger * log) const409 void Options::DumpCFOptions(Logger* log) const {
410   ColumnFamilyOptions::Dump(log);
411 }  // Options::DumpCFOptions
412 
413 //
414 // The goal of this method is to create a configuration that
415 // allows an application to write all files into L0 and
416 // then do a single compaction to output all files into L1.
417 Options*
PrepareForBulkLoad()418 Options::PrepareForBulkLoad()
419 {
420   // never slowdown ingest.
421   level0_file_num_compaction_trigger = (1<<30);
422   level0_slowdown_writes_trigger = (1<<30);
423   level0_stop_writes_trigger = (1<<30);
424   soft_pending_compaction_bytes_limit = 0;
425   hard_pending_compaction_bytes_limit = 0;
426 
427   // no auto compactions please. The application should issue a
428   // manual compaction after all data is loaded into L0.
429   disable_auto_compactions = true;
430   // A manual compaction run should pick all files in L0 in
431   // a single compaction run.
432   max_compaction_bytes = (static_cast<uint64_t>(1) << 60);
433 
434   // It is better to have only 2 levels, otherwise a manual
435   // compaction would compact at every possible level, thereby
436   // increasing the total time needed for compactions.
437   num_levels = 2;
438 
439   // Need to allow more write buffers to allow more parallism
440   // of flushes.
441   max_write_buffer_number = 6;
442   min_write_buffer_number_to_merge = 1;
443 
444   // When compaction is disabled, more parallel flush threads can
445   // help with write throughput.
446   max_background_flushes = 4;
447 
448   // Prevent a memtable flush to automatically promote files
449   // to L1. This is helpful so that all files that are
450   // input to the manual compaction are all at L0.
451   max_background_compactions = 2;
452 
453   // The compaction would create large files in L1.
454   target_file_size_base = 256 * 1024 * 1024;
455   return this;
456 }
457 
OptimizeForSmallDb()458 Options* Options::OptimizeForSmallDb() {
459   // 16MB block cache
460   std::shared_ptr<Cache> cache = NewLRUCache(16 << 20);
461 
462   ColumnFamilyOptions::OptimizeForSmallDb(&cache);
463   DBOptions::OptimizeForSmallDb(&cache);
464   return this;
465 }
466 
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)467 Options* Options::OldDefaults(int rocksdb_major_version,
468                               int rocksdb_minor_version) {
469   ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
470                                    rocksdb_minor_version);
471   DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version);
472   return this;
473 }
474 
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)475 DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
476                                   int rocksdb_minor_version) {
477   if (rocksdb_major_version < 4 ||
478       (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
479     max_file_opening_threads = 1;
480     table_cache_numshardbits = 4;
481   }
482   if (rocksdb_major_version < 5 ||
483       (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
484     delayed_write_rate = 2 * 1024U * 1024U;
485   } else if (rocksdb_major_version < 5 ||
486              (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
487     delayed_write_rate = 16 * 1024U * 1024U;
488   }
489   max_open_files = 5000;
490   wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
491   return this;
492 }
493 
OldDefaults(int rocksdb_major_version,int rocksdb_minor_version)494 ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
495     int rocksdb_major_version, int rocksdb_minor_version) {
496   if (rocksdb_major_version < 5 ||
497       (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
498     compaction_pri = CompactionPri::kByCompensatedSize;
499   }
500   if (rocksdb_major_version < 4 ||
501       (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
502     write_buffer_size = 4 << 20;
503     target_file_size_base = 2 * 1048576;
504     max_bytes_for_level_base = 10 * 1048576;
505     soft_pending_compaction_bytes_limit = 0;
506     hard_pending_compaction_bytes_limit = 0;
507   }
508   if (rocksdb_major_version < 5) {
509     level0_stop_writes_trigger = 24;
510   } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
511     level0_stop_writes_trigger = 30;
512   }
513 
514   return this;
515 }
516 
517 // Optimization functions
OptimizeForSmallDb(std::shared_ptr<Cache> * cache)518 DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr<Cache>* cache) {
519   max_file_opening_threads = 1;
520   max_open_files = 5000;
521 
522   // Cost memtable to block cache too.
523   std::shared_ptr<ROCKSDB_NAMESPACE::WriteBufferManager> wbm =
524       std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(
525           0, (cache != nullptr) ? *cache : std::shared_ptr<Cache>());
526   write_buffer_manager = wbm;
527 
528   return this;
529 }
530 
OptimizeForSmallDb(std::shared_ptr<Cache> * cache)531 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb(
532     std::shared_ptr<Cache>* cache) {
533   write_buffer_size = 2 << 20;
534   target_file_size_base = 2 * 1048576;
535   max_bytes_for_level_base = 10 * 1048576;
536   soft_pending_compaction_bytes_limit = 256 * 1048576;
537   hard_pending_compaction_bytes_limit = 1073741824ul;
538 
539   BlockBasedTableOptions table_options;
540   table_options.block_cache =
541       (cache != nullptr) ? *cache : std::shared_ptr<Cache>();
542   table_options.cache_index_and_filter_blocks = true;
543   // Two level iterator to avoid LRU cache imbalance
544   table_options.index_type =
545       BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
546   table_factory.reset(new BlockBasedTableFactory(table_options));
547 
548   return this;
549 }
550 
551 #ifndef ROCKSDB_LITE
OptimizeForPointLookup(uint64_t block_cache_size_mb)552 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup(
553     uint64_t block_cache_size_mb) {
554   BlockBasedTableOptions block_based_options;
555   block_based_options.data_block_index_type =
556       BlockBasedTableOptions::kDataBlockBinaryAndHash;
557   block_based_options.data_block_hash_table_util_ratio = 0.75;
558   block_based_options.filter_policy.reset(NewBloomFilterPolicy(10));
559   block_based_options.block_cache =
560       NewLRUCache(static_cast<size_t>(block_cache_size_mb * 1024 * 1024));
561   table_factory.reset(new BlockBasedTableFactory(block_based_options));
562   memtable_prefix_bloom_size_ratio = 0.02;
563   memtable_whole_key_filtering = true;
564   return this;
565 }
566 
OptimizeLevelStyleCompaction(uint64_t memtable_memory_budget)567 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction(
568     uint64_t memtable_memory_budget) {
569   write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
570   // merge two memtables when flushing to L0
571   min_write_buffer_number_to_merge = 2;
572   // this means we'll use 50% extra memory in the worst case, but will reduce
573   // write stalls.
574   max_write_buffer_number = 6;
575   // start flushing L0->L1 as soon as possible. each file on level0 is
576   // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
577   // memtable_memory_budget.
578   level0_file_num_compaction_trigger = 2;
579   // doesn't really matter much, but we don't want to create too many files
580   target_file_size_base = memtable_memory_budget / 8;
581   // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
582   max_bytes_for_level_base = memtable_memory_budget;
583 
584   // level style compaction
585   compaction_style = kCompactionStyleLevel;
586 
587   // only compress levels >= 2
588   compression_per_level.resize(num_levels);
589   for (int i = 0; i < num_levels; ++i) {
590     if (i < 2) {
591       compression_per_level[i] = kNoCompression;
592     } else {
593       compression_per_level[i] =
594           LZ4_Supported()
595               ? kLZ4Compression
596               : (Snappy_Supported() ? kSnappyCompression : kNoCompression);
597     }
598   }
599   return this;
600 }
601 
OptimizeUniversalStyleCompaction(uint64_t memtable_memory_budget)602 ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction(
603     uint64_t memtable_memory_budget) {
604   write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
605   // merge two memtables when flushing to L0
606   min_write_buffer_number_to_merge = 2;
607   // this means we'll use 50% extra memory in the worst case, but will reduce
608   // write stalls.
609   max_write_buffer_number = 6;
610   // universal style compaction
611   compaction_style = kCompactionStyleUniversal;
612   compaction_options_universal.compression_size_percent = 80;
613   return this;
614 }
615 
IncreaseParallelism(int total_threads)616 DBOptions* DBOptions::IncreaseParallelism(int total_threads) {
617   max_background_jobs = total_threads;
618   env->SetBackgroundThreads(total_threads, Env::LOW);
619   env->SetBackgroundThreads(1, Env::HIGH);
620   return this;
621 }
622 
623 #endif  // !ROCKSDB_LITE
624 
ReadOptions()625 ReadOptions::ReadOptions()
626     : snapshot(nullptr),
627       iterate_lower_bound(nullptr),
628       iterate_upper_bound(nullptr),
629       readahead_size(0),
630       max_skippable_internal_keys(0),
631       read_tier(kReadAllTier),
632       verify_checksums(true),
633       fill_cache(true),
634       tailing(false),
635       managed(false),
636       total_order_seek(false),
637       auto_prefix_mode(false),
638       prefix_same_as_start(false),
639       pin_data(false),
640       background_purge_on_iterator_cleanup(false),
641       ignore_range_deletions(false),
642       iter_start_seqnum(0),
643       timestamp(nullptr),
644       iter_start_ts(nullptr),
645       deadline(std::chrono::microseconds::zero()),
646       io_timeout(std::chrono::microseconds::zero()),
647       value_size_soft_limit(std::numeric_limits<uint64_t>::max()) {}
648 
ReadOptions(bool cksum,bool cache)649 ReadOptions::ReadOptions(bool cksum, bool cache)
650     : snapshot(nullptr),
651       iterate_lower_bound(nullptr),
652       iterate_upper_bound(nullptr),
653       readahead_size(0),
654       max_skippable_internal_keys(0),
655       read_tier(kReadAllTier),
656       verify_checksums(cksum),
657       fill_cache(cache),
658       tailing(false),
659       managed(false),
660       total_order_seek(false),
661       auto_prefix_mode(false),
662       prefix_same_as_start(false),
663       pin_data(false),
664       background_purge_on_iterator_cleanup(false),
665       ignore_range_deletions(false),
666       iter_start_seqnum(0),
667       timestamp(nullptr),
668       iter_start_ts(nullptr),
669       deadline(std::chrono::microseconds::zero()),
670       io_timeout(std::chrono::microseconds::zero()),
671       value_size_soft_limit(std::numeric_limits<uint64_t>::max()) {}
672 
673 }  // namespace ROCKSDB_NAMESPACE
674