1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 // The test uses an array to compare against values written to the database.
11 // Keys written to the array are in 1:1 correspondence to the actual values in
12 // the database according to the formula in the function GenerateValue.
13 
14 // Space is reserved in the array from 0 to FLAGS_max_key and values are
15 // randomly written/deleted/read from those positions. During verification we
16 // compare all the positions in the array. To shorten/elongate the running
17 // time, you could change the settings: FLAGS_max_key, FLAGS_ops_per_thread,
18 // (sometimes also FLAGS_threads).
19 //
20 // NOTE that if FLAGS_test_batches_snapshots is set, the test will have
21 // different behavior. See comment of the flag for details.
22 
23 #ifdef GFLAGS
24 #include "db_stress_tool/db_stress_common.h"
25 #include "db_stress_tool/db_stress_driver.h"
26 #ifndef NDEBUG
27 #include "utilities/fault_injection_fs.h"
28 #endif
29 
30 namespace ROCKSDB_NAMESPACE {
31 namespace {
32 static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
33 static std::shared_ptr<ROCKSDB_NAMESPACE::DbStressEnvWrapper> env_wrapper_guard;
34 static std::shared_ptr<CompositeEnvWrapper> fault_env_guard;
35 }  // namespace
36 
GetCompositeEnv(std::shared_ptr<FileSystem> fs)37 static Env* GetCompositeEnv(std::shared_ptr<FileSystem> fs) {
38   static std::shared_ptr<Env> composite_env = NewCompositeEnv(fs);
39   return composite_env.get();
40 }
41 
42 KeyGenContext key_gen_ctx;
43 
db_stress_tool(int argc,char ** argv)44 int db_stress_tool(int argc, char** argv) {
45   SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
46                   " [OPTIONS]...");
47   ParseCommandLineFlags(&argc, &argv, true);
48 
49   SanitizeDoubleParam(&FLAGS_bloom_bits);
50   SanitizeDoubleParam(&FLAGS_memtable_prefix_bloom_size_ratio);
51   SanitizeDoubleParam(&FLAGS_max_bytes_for_level_multiplier);
52 
53 #ifndef NDEBUG
54   if (FLAGS_mock_direct_io) {
55     SetupSyncPointsToMockDirectIO();
56   }
57 #endif
58   if (FLAGS_statistics) {
59     dbstats = ROCKSDB_NAMESPACE::CreateDBStatistics();
60     if (FLAGS_test_secondary) {
61       dbstats_secondaries = ROCKSDB_NAMESPACE::CreateDBStatistics();
62     }
63   }
64   compression_type_e = StringToCompressionType(FLAGS_compression_type.c_str());
65   bottommost_compression_type_e =
66       StringToCompressionType(FLAGS_bottommost_compression_type.c_str());
67   checksum_type_e = StringToChecksumType(FLAGS_checksum_type.c_str());
68 
69   Env* raw_env;
70 
71   int env_opts =
72       !FLAGS_hdfs.empty() + !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty();
73   if (env_opts > 1) {
74     fprintf(stderr,
75             "Error: --hdfs, --env_uri and --fs_uri are mutually exclusive\n");
76     exit(1);
77   }
78 
79   if (!FLAGS_hdfs.empty()) {
80     raw_env = new ROCKSDB_NAMESPACE::HdfsEnv(FLAGS_hdfs);
81   } else if (!FLAGS_env_uri.empty()) {
82     Status s = Env::LoadEnv(FLAGS_env_uri, &raw_env, &env_guard);
83     if (raw_env == nullptr) {
84       fprintf(stderr, "No Env registered for URI: %s\n", FLAGS_env_uri.c_str());
85       exit(1);
86     }
87   } else if (!FLAGS_fs_uri.empty()) {
88     std::shared_ptr<FileSystem> fs;
89     Status s = FileSystem::Load(FLAGS_fs_uri, &fs);
90     if (!s.ok()) {
91       fprintf(stderr, "Error: %s\n", s.ToString().c_str());
92       exit(1);
93     }
94     raw_env = GetCompositeEnv(fs);
95   } else {
96     raw_env = Env::Default();
97   }
98 
99 #ifndef NDEBUG
100   if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
101       FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in) {
102     FaultInjectionTestFS* fs =
103         new FaultInjectionTestFS(raw_env->GetFileSystem());
104     fault_fs_guard.reset(fs);
105     if (FLAGS_write_fault_one_in) {
106       fault_fs_guard->SetFilesystemDirectWritable(false);
107     } else {
108       fault_fs_guard->SetFilesystemDirectWritable(true);
109     }
110     fault_env_guard =
111         std::make_shared<CompositeEnvWrapper>(raw_env, fault_fs_guard);
112     raw_env = fault_env_guard.get();
113   }
114   if (FLAGS_write_fault_one_in) {
115     SyncPoint::GetInstance()->SetCallBack(
116         "BuildTable:BeforeFinishBuildTable",
117         [&](void*) { fault_fs_guard->EnableWriteErrorInjection(); });
118     SyncPoint::GetInstance()->EnableProcessing();
119   }
120 #endif
121 
122   env_wrapper_guard = std::make_shared<DbStressEnvWrapper>(raw_env);
123   db_stress_env = env_wrapper_guard.get();
124 
125 #ifndef NDEBUG
126   if (FLAGS_write_fault_one_in) {
127     // In the write injection case, we need to use the FS interface and returns
128     // the IOStatus with different error and flags. Therefore,
129     // DbStressEnvWrapper cannot be used which will swallow the FS
130     // implementations. We should directly use the raw_env which is the
131     // CompositeEnvWrapper of env and fault_fs.
132     db_stress_env = raw_env;
133   }
134 #endif
135 
136   FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());
137 
138   // The number of background threads should be at least as much the
139   // max number of concurrent compactions.
140   db_stress_env->SetBackgroundThreads(FLAGS_max_background_compactions,
141                                       ROCKSDB_NAMESPACE::Env::Priority::LOW);
142   db_stress_env->SetBackgroundThreads(FLAGS_num_bottom_pri_threads,
143                                       ROCKSDB_NAMESPACE::Env::Priority::BOTTOM);
144   if (FLAGS_prefixpercent > 0 && FLAGS_prefix_size < 0) {
145     fprintf(stderr,
146             "Error: prefixpercent is non-zero while prefix_size is "
147             "not positive!\n");
148     exit(1);
149   }
150   if (FLAGS_test_batches_snapshots && FLAGS_prefix_size <= 0) {
151     fprintf(stderr,
152             "Error: please specify prefix_size for "
153             "test_batches_snapshots test!\n");
154     exit(1);
155   }
156   if (FLAGS_memtable_prefix_bloom_size_ratio > 0.0 && FLAGS_prefix_size < 0 &&
157       !FLAGS_memtable_whole_key_filtering) {
158     fprintf(stderr,
159             "Error: please specify positive prefix_size or enable whole key "
160             "filtering in order to use memtable_prefix_bloom_size_ratio\n");
161     exit(1);
162   }
163   if ((FLAGS_readpercent + FLAGS_prefixpercent + FLAGS_writepercent +
164        FLAGS_delpercent + FLAGS_delrangepercent + FLAGS_iterpercent) != 100) {
165     fprintf(stderr,
166             "Error: "
167             "Read(%d)+Prefix(%d)+Write(%d)+Delete(%d)+DeleteRange(%d)"
168             "+Iterate(%d) percents != "
169             "100!\n",
170             FLAGS_readpercent, FLAGS_prefixpercent, FLAGS_writepercent,
171             FLAGS_delpercent, FLAGS_delrangepercent, FLAGS_iterpercent);
172     exit(1);
173   }
174   if (FLAGS_disable_wal == 1 && FLAGS_reopen > 0) {
175     fprintf(stderr, "Error: Db cannot reopen safely with disable_wal set!\n");
176     exit(1);
177   }
178   if ((unsigned)FLAGS_reopen >= FLAGS_ops_per_thread) {
179     fprintf(stderr,
180             "Error: #DB-reopens should be < ops_per_thread\n"
181             "Provided reopens = %d and ops_per_thread = %lu\n",
182             FLAGS_reopen, (unsigned long)FLAGS_ops_per_thread);
183     exit(1);
184   }
185   if (FLAGS_test_batches_snapshots && FLAGS_delrangepercent > 0) {
186     fprintf(stderr,
187             "Error: nonzero delrangepercent unsupported in "
188             "test_batches_snapshots mode\n");
189     exit(1);
190   }
191   if (FLAGS_active_width > FLAGS_max_key) {
192     fprintf(stderr, "Error: active_width can be at most max_key\n");
193     exit(1);
194   } else if (FLAGS_active_width == 0) {
195     FLAGS_active_width = FLAGS_max_key;
196   }
197   if (FLAGS_value_size_mult * kRandomValueMaxFactor > kValueMaxLen) {
198     fprintf(stderr, "Error: value_size_mult can be at most %d\n",
199             kValueMaxLen / kRandomValueMaxFactor);
200     exit(1);
201   }
202   if (FLAGS_use_merge && FLAGS_nooverwritepercent == 100) {
203     fprintf(
204         stderr,
205         "Error: nooverwritepercent must not be 100 when using merge operands");
206     exit(1);
207   }
208   if (FLAGS_ingest_external_file_one_in > 0 && FLAGS_nooverwritepercent > 0) {
209     fprintf(stderr,
210             "Error: nooverwritepercent must be 0 when using file ingestion\n");
211     exit(1);
212   }
213   if (FLAGS_clear_column_family_one_in > 0 && FLAGS_backup_one_in > 0) {
214     fprintf(stderr,
215             "Error: clear_column_family_one_in must be 0 when using backup\n");
216     exit(1);
217   }
218   if (FLAGS_test_cf_consistency && FLAGS_disable_wal) {
219     FLAGS_atomic_flush = true;
220   }
221 
222   if (FLAGS_read_only) {
223     if (FLAGS_writepercent != 0 || FLAGS_delpercent != 0 ||
224         FLAGS_delrangepercent != 0) {
225       fprintf(stderr, "Error: updates are not supported in read only mode\n");
226       exit(1);
227     } else if (FLAGS_checkpoint_one_in > 0 &&
228                FLAGS_clear_column_family_one_in > 0) {
229       fprintf(stdout,
230               "Warn: checkpoint won't be validated since column families may "
231               "be dropped.\n");
232     }
233   }
234 
235   // Choose a location for the test database if none given with --db=<path>
236   if (FLAGS_db.empty()) {
237     std::string default_db_path;
238     db_stress_env->GetTestDirectory(&default_db_path);
239     default_db_path += "/dbstress";
240     FLAGS_db = default_db_path;
241   }
242 
243   if ((FLAGS_test_secondary || FLAGS_continuous_verification_interval > 0) &&
244       FLAGS_secondaries_base.empty()) {
245     std::string default_secondaries_path;
246     db_stress_env->GetTestDirectory(&default_secondaries_path);
247     default_secondaries_path += "/dbstress_secondaries";
248     ROCKSDB_NAMESPACE::Status s =
249         db_stress_env->CreateDirIfMissing(default_secondaries_path);
250     if (!s.ok()) {
251       fprintf(stderr, "Failed to create directory %s: %s\n",
252               default_secondaries_path.c_str(), s.ToString().c_str());
253       exit(1);
254     }
255     FLAGS_secondaries_base = default_secondaries_path;
256   }
257 
258   if (!FLAGS_test_secondary && FLAGS_secondary_catch_up_one_in > 0) {
259     fprintf(
260         stderr,
261         "Must set -test_secondary=true if secondary_catch_up_one_in > 0.\n");
262     exit(1);
263   }
264   if (FLAGS_best_efforts_recovery && !FLAGS_skip_verifydb &&
265       !FLAGS_disable_wal) {
266     fprintf(stderr,
267             "With best-efforts recovery, either skip_verifydb or disable_wal "
268             "should be set to true.\n");
269     exit(1);
270   }
271   if (FLAGS_skip_verifydb) {
272     if (FLAGS_verify_db_one_in > 0) {
273       fprintf(stderr,
274               "Must set -verify_db_one_in=0 if skip_verifydb is true.\n");
275       exit(1);
276     }
277     if (FLAGS_continuous_verification_interval > 0) {
278       fprintf(stderr,
279               "Must set -continuous_verification_interval=0 if skip_verifydb "
280               "is true.\n");
281       exit(1);
282     }
283   }
284   if (FLAGS_enable_compaction_filter &&
285       (FLAGS_acquire_snapshot_one_in > 0 || FLAGS_compact_range_one_in > 0 ||
286        FLAGS_iterpercent > 0 || FLAGS_test_batches_snapshots ||
287        FLAGS_test_cf_consistency)) {
288     fprintf(
289         stderr,
290         "Error: acquire_snapshot_one_in, compact_range_one_in, iterpercent, "
291         "test_batches_snapshots  must all be 0 when using compaction filter\n");
292     exit(1);
293   }
294   if (FLAGS_batch_protection_bytes_per_key > 0 &&
295       !FLAGS_test_batches_snapshots) {
296     fprintf(stderr,
297             "Error: test_batches_snapshots must be enabled when "
298             "batch_protection_bytes_per_key > 0\n");
299     exit(1);
300   }
301 
302 #ifndef NDEBUG
303   KillPoint* kp = KillPoint::GetInstance();
304   kp->rocksdb_kill_odds = FLAGS_kill_random_test;
305   kp->rocksdb_kill_exclude_prefixes = SplitString(FLAGS_kill_exclude_prefixes);
306 #endif
307 
308   unsigned int levels = FLAGS_max_key_len;
309   std::vector<std::string> weights;
310   uint64_t scale_factor = FLAGS_key_window_scale_factor;
311   key_gen_ctx.window = scale_factor * 100;
312   if (!FLAGS_key_len_percent_dist.empty()) {
313     weights = SplitString(FLAGS_key_len_percent_dist);
314     if (weights.size() != levels) {
315       fprintf(stderr,
316               "Number of weights in key_len_dist should be equal to"
317               " max_key_len");
318       exit(1);
319     }
320 
321     uint64_t total_weight = 0;
322     for (std::string& weight : weights) {
323       uint64_t val = std::stoull(weight);
324       key_gen_ctx.weights.emplace_back(val * scale_factor);
325       total_weight += val;
326     }
327     if (total_weight != 100) {
328       fprintf(stderr, "Sum of all weights in key_len_dist should be 100");
329       exit(1);
330     }
331   } else {
332     uint64_t keys_per_level = key_gen_ctx.window / levels;
333     for (unsigned int level = 0; level + 1 < levels; ++level) {
334       key_gen_ctx.weights.emplace_back(keys_per_level);
335     }
336     key_gen_ctx.weights.emplace_back(key_gen_ctx.window -
337                                      keys_per_level * (levels - 1));
338   }
339 
340   std::unique_ptr<ROCKSDB_NAMESPACE::StressTest> stress;
341   if (FLAGS_test_cf_consistency) {
342     stress.reset(CreateCfConsistencyStressTest());
343   } else if (FLAGS_test_batches_snapshots) {
344     stress.reset(CreateBatchedOpsStressTest());
345   } else {
346     stress.reset(CreateNonBatchedOpsStressTest());
347   }
348   // Initialize the Zipfian pre-calculated array
349   InitializeHotKeyGenerator(FLAGS_hot_key_alpha);
350   if (RunStressTest(stress.get())) {
351     return 0;
352   } else {
353     return 1;
354   }
355 }
356 
357 }  // namespace ROCKSDB_NAMESPACE
358 #endif  // GFLAGS
359