1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #include "db/db_iter.h"
7 #include "db/dbformat.h"
8 #include "rocksdb/comparator.h"
9 #include "rocksdb/options.h"
10 #include "rocksdb/slice.h"
11 #include "test_util/testharness.h"
12 #include "util/random.h"
13 #include "util/string_util.h"
14 #include "utilities/merge_operators.h"
15 
16 #ifdef GFLAGS
17 
18 #include "util/gflags_compat.h"
19 
20 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
21 
22 DEFINE_bool(verbose, false,
23             "Print huge, detailed trace. Intended for debugging failures.");
24 
25 #else
26 
ParseCommandLineFlags(int *,char ***,bool)27 void ParseCommandLineFlags(int*, char***, bool) {}
28 bool FLAGS_verbose = false;
29 
30 #endif
31 
32 namespace ROCKSDB_NAMESPACE {
33 
34 class DBIteratorStressTest : public testing::Test {
35  public:
36   Env* env_;
37 
DBIteratorStressTest()38   DBIteratorStressTest() : env_(Env::Default()) {}
39 };
40 
41 namespace {
42 
43 struct Entry {
44   std::string key;
45   ValueType type;  // kTypeValue, kTypeDeletion, kTypeMerge
46   uint64_t sequence;
47   std::string ikey;  // internal key, made from `key`, `sequence` and `type`
48   std::string value;
49   // If false, we'll pretend that this entry doesn't exist.
50   bool visible = true;
51 
operator <ROCKSDB_NAMESPACE::__anon118a491c0111::Entry52   bool operator<(const Entry& e) const {
53     if (key != e.key) return key < e.key;
54     return std::tie(sequence, type) > std::tie(e.sequence, e.type);
55   }
56 };
57 
58 struct Data {
59   std::vector<Entry> entries;
60 
61   // Indices in `entries` with `visible` = false.
62   std::vector<size_t> hidden;
63   // Keys of entries whose `visible` changed since the last seek of iterators.
64   std::set<std::string> recently_touched_keys;
65 };
66 
67 struct StressTestIterator : public InternalIterator {
68   Data* data;
69   Random64* rnd;
70   InternalKeyComparator cmp;
71 
72   // Each operation will return error with this probability...
73   double error_probability = 0;
74   // ... and add/remove entries with this probability.
75   double mutation_probability = 0;
76   // The probability of adding vs removing entries will be chosen so that the
77   // amount of removed entries stays somewhat close to this number.
78   double target_hidden_fraction = 0;
79   // If true, print all mutations to stdout for debugging.
80   bool trace = false;
81 
82   int iter = -1;
83   Status status_;
84 
StressTestIteratorROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator85   StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp)
86       : data(_data), rnd(_rnd), cmp(_cmp) {}
87 
ValidROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator88   bool Valid() const override {
89     if (iter >= 0 && iter < (int)data->entries.size()) {
90       assert(status_.ok());
91       return true;
92     }
93     return false;
94   }
95 
statusROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator96   Status status() const override { return status_; }
97 
MaybeFailROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator98   bool MaybeFail() {
99     if (rnd->Next() >=
100         std::numeric_limits<uint64_t>::max() * error_probability) {
101       return false;
102     }
103     if (rnd->Next() % 2) {
104       status_ = Status::Incomplete("test");
105     } else {
106       status_ = Status::IOError("test");
107     }
108     if (trace) {
109       std::cout << "injecting " << status_.ToString() << std::endl;
110     }
111     iter = -1;
112     return true;
113   }
114 
MaybeMutateROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator115   void MaybeMutate() {
116     if (rnd->Next() >=
117         std::numeric_limits<uint64_t>::max() * mutation_probability) {
118       return;
119     }
120     do {
121       // If too many entries are hidden, hide less, otherwise hide more.
122       double hide_probability =
123           data->hidden.size() > data->entries.size() * target_hidden_fraction
124               ? 1. / 3
125               : 2. / 3;
126       if (data->hidden.empty()) {
127         hide_probability = 1;
128       }
129       bool do_hide =
130           rnd->Next() < std::numeric_limits<uint64_t>::max() * hide_probability;
131       if (do_hide) {
132         // Hide a random entry.
133         size_t idx = rnd->Next() % data->entries.size();
134         Entry& e = data->entries[idx];
135         if (e.visible) {
136           if (trace) {
137             std::cout << "hiding idx " << idx << std::endl;
138           }
139           e.visible = false;
140           data->hidden.push_back(idx);
141           data->recently_touched_keys.insert(e.key);
142         } else {
143           // Already hidden. Let's go unhide something instead, just because
144           // it's easy and it doesn't really matter what we do.
145           do_hide = false;
146         }
147       }
148       if (!do_hide) {
149         // Unhide a random entry.
150         size_t hi = rnd->Next() % data->hidden.size();
151         size_t idx = data->hidden[hi];
152         if (trace) {
153           std::cout << "unhiding idx " << idx << std::endl;
154         }
155         Entry& e = data->entries[idx];
156         assert(!e.visible);
157         e.visible = true;
158         data->hidden[hi] = data->hidden.back();
159         data->hidden.pop_back();
160         data->recently_touched_keys.insert(e.key);
161       }
162     } while (rnd->Next() % 3 != 0);  // do 3 mutations on average
163   }
164 
SkipForwardROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator165   void SkipForward() {
166     while (iter < (int)data->entries.size() && !data->entries[iter].visible) {
167       ++iter;
168     }
169   }
SkipBackwardROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator170   void SkipBackward() {
171     while (iter >= 0 && !data->entries[iter].visible) {
172       --iter;
173     }
174   }
175 
SeekToFirstROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator176   void SeekToFirst() override {
177     if (MaybeFail()) return;
178     MaybeMutate();
179 
180     status_ = Status::OK();
181     iter = 0;
182     SkipForward();
183   }
SeekToLastROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator184   void SeekToLast() override {
185     if (MaybeFail()) return;
186     MaybeMutate();
187 
188     status_ = Status::OK();
189     iter = (int)data->entries.size() - 1;
190     SkipBackward();
191   }
192 
SeekROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator193   void Seek(const Slice& target) override {
194     if (MaybeFail()) return;
195     MaybeMutate();
196 
197     status_ = Status::OK();
198     // Binary search.
199     auto it = std::partition_point(
200         data->entries.begin(), data->entries.end(),
201         [&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; });
202     iter = (int)(it - data->entries.begin());
203     SkipForward();
204   }
SeekForPrevROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator205   void SeekForPrev(const Slice& target) override {
206     if (MaybeFail()) return;
207     MaybeMutate();
208 
209     status_ = Status::OK();
210     // Binary search.
211     auto it = std::partition_point(
212         data->entries.begin(), data->entries.end(),
213         [&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; });
214     iter = (int)(it - data->entries.begin());
215     --iter;
216     SkipBackward();
217   }
218 
NextROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator219   void Next() override {
220     assert(Valid());
221     if (MaybeFail()) return;
222     MaybeMutate();
223     ++iter;
224     SkipForward();
225   }
PrevROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator226   void Prev() override {
227     assert(Valid());
228     if (MaybeFail()) return;
229     MaybeMutate();
230     --iter;
231     SkipBackward();
232   }
233 
keyROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator234   Slice key() const override {
235     assert(Valid());
236     return data->entries[iter].ikey;
237   }
valueROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator238   Slice value() const override {
239     assert(Valid());
240     return data->entries[iter].value;
241   }
242 
IsKeyPinnedROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator243   bool IsKeyPinned() const override { return true; }
IsValuePinnedROCKSDB_NAMESPACE::__anon118a491c0111::StressTestIterator244   bool IsValuePinned() const override { return true; }
245 };
246 
247 // A small reimplementation of DBIter, supporting only some of the features,
248 // and doing everything in O(log n).
249 // Skips all keys that are in recently_touched_keys.
250 struct ReferenceIterator {
251   Data* data;
252   uint64_t sequence;  // ignore entries with sequence number below this
253 
254   bool valid = false;
255   std::string key;
256   std::string value;
257 
ReferenceIteratorROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator258   ReferenceIterator(Data* _data, uint64_t _sequence)
259       : data(_data), sequence(_sequence) {}
260 
ValidROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator261   bool Valid() const { return valid; }
262 
263   // Finds the first entry with key
264   // greater/less/greater-or-equal/less-or-equal than `key`, depending on
265   // arguments: if `skip`, inequality is strict; if `forward`, it's
266   // greater/greater-or-equal, otherwise less/less-or-equal.
267   // Sets `key` to the result.
268   // If no such key exists, returns false. Doesn't check `visible`.
FindNextKeyROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator269   bool FindNextKey(bool skip, bool forward) {
270     valid = false;
271     auto it = std::partition_point(data->entries.begin(), data->entries.end(),
272                                    [&](const Entry& e) {
273                                      if (forward != skip) {
274                                        return e.key < key;
275                                      } else {
276                                        return e.key <= key;
277                                      }
278                                    });
279     if (forward) {
280       if (it != data->entries.end()) {
281         key = it->key;
282         return true;
283       }
284     } else {
285       if (it != data->entries.begin()) {
286         --it;
287         key = it->key;
288         return true;
289       }
290     }
291     return false;
292   }
293 
FindValueForCurrentKeyROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator294   bool FindValueForCurrentKey() {
295     if (data->recently_touched_keys.count(key)) {
296       return false;
297     }
298 
299     // Find the first entry for the key. The caller promises that it exists.
300     auto it = std::partition_point(data->entries.begin(), data->entries.end(),
301                                    [&](const Entry& e) {
302                                      if (e.key != key) {
303                                        return e.key < key;
304                                      }
305                                      return e.sequence > sequence;
306                                    });
307 
308     // Find the first visible entry.
309     for (;; ++it) {
310       if (it == data->entries.end()) {
311         return false;
312       }
313       Entry& e = *it;
314       if (e.key != key) {
315         return false;
316       }
317       assert(e.sequence <= sequence);
318       if (!e.visible) continue;
319       if (e.type == kTypeDeletion) {
320         return false;
321       }
322       if (e.type == kTypeValue) {
323         value = e.value;
324         valid = true;
325         return true;
326       }
327       assert(e.type == kTypeMerge);
328       break;
329     }
330 
331     // Collect merge operands.
332     std::vector<Slice> operands;
333     for (; it != data->entries.end(); ++it) {
334       Entry& e = *it;
335       if (e.key != key) {
336         break;
337       }
338       assert(e.sequence <= sequence);
339       if (!e.visible) continue;
340       if (e.type == kTypeDeletion) {
341         break;
342       }
343       operands.push_back(e.value);
344       if (e.type == kTypeValue) {
345         break;
346       }
347     }
348 
349     // Do a merge.
350     value = operands.back().ToString();
351     for (int i = (int)operands.size() - 2; i >= 0; --i) {
352       value.append(",");
353       value.append(operands[i].data(), operands[i].size());
354     }
355 
356     valid = true;
357     return true;
358   }
359 
360   // Start at `key` and move until we encounter a valid value.
361   // `forward` defines the direction of movement.
362   // If `skip` is true, we're looking for key not equal to `key`.
DoTheThingROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator363   void DoTheThing(bool skip, bool forward) {
364     while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) {
365       skip = true;
366     }
367   }
368 
SeekROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator369   void Seek(const Slice& target) {
370     key = target.ToString();
371     DoTheThing(false, true);
372   }
SeekForPrevROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator373   void SeekForPrev(const Slice& target) {
374     key = target.ToString();
375     DoTheThing(false, false);
376   }
SeekToFirstROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator377   void SeekToFirst() { Seek(""); }
SeekToLastROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator378   void SeekToLast() {
379     key = data->entries.back().key;
380     DoTheThing(false, false);
381   }
NextROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator382   void Next() {
383     assert(Valid());
384     DoTheThing(true, true);
385   }
PrevROCKSDB_NAMESPACE::__anon118a491c0111::ReferenceIterator386   void Prev() {
387     assert(Valid());
388     DoTheThing(true, false);
389   }
390 };
391 
392 }  // namespace
393 
394 // Use an internal iterator that sometimes returns errors and sometimes
395 // adds/removes entries on the fly. Do random operations on a DBIter and
396 // check results.
397 // TODO: can be improved for more coverage:
398 //   * Override IsKeyPinned() and IsValuePinned() to actually use
399 //     PinnedIteratorManager and check that there's no use-after free.
400 //   * Try different combinations of prefix_extractor, total_order_seek,
401 //     prefix_same_as_start, iterate_lower_bound, iterate_upper_bound.
TEST_F(DBIteratorStressTest,StressTest)402 TEST_F(DBIteratorStressTest, StressTest) {
403   // We use a deterministic RNG, and everything happens in a single thread.
404   Random64 rnd(826909345792864532ll);
405 
406   auto gen_key = [&](int max_key) {
407     assert(max_key > 0);
408     int len = 0;
409     int a = max_key;
410     while (a) {
411       a /= 10;
412       ++len;
413     }
414     std::string s = ToString(rnd.Next() % static_cast<uint64_t>(max_key));
415     s.insert(0, len - (int)s.size(), '0');
416     return s;
417   };
418 
419   Options options;
420   options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
421   ReadOptions ropt;
422 
423   size_t num_matching = 0;
424   size_t num_at_end = 0;
425   size_t num_not_ok = 0;
426   size_t num_recently_removed = 0;
427 
428   // Number of iterations for each combination of parameters
429   // (there are ~250 of those).
430   // Tweak this to change the test run time.
431   // As of the time of writing, the test takes ~4 seconds for value of 5000.
432   const int num_iterations = 5000;
433   // Enable this to print all the operations for debugging.
434   bool trace = FLAGS_verbose;
435 
436   for (int num_entries : {5, 10, 100}) {
437     for (double key_space : {0.1, 1.0, 3.0}) {
438       for (ValueType prevalent_entry_type :
439            {kTypeValue, kTypeDeletion, kTypeMerge}) {
440         for (double error_probability : {0.01, 0.1}) {
441           for (double mutation_probability : {0.01, 0.5}) {
442             for (double target_hidden_fraction : {0.1, 0.5}) {
443               std::string trace_str =
444                   "entries: " + ToString(num_entries) +
445                   ", key_space: " + ToString(key_space) +
446                   ", error_probability: " + ToString(error_probability) +
447                   ", mutation_probability: " + ToString(mutation_probability) +
448                   ", target_hidden_fraction: " +
449                   ToString(target_hidden_fraction);
450               SCOPED_TRACE(trace_str);
451               if (trace) {
452                 std::cout << trace_str << std::endl;
453               }
454 
455               // Generate data.
456               Data data;
457               int max_key = (int)(num_entries * key_space) + 1;
458               for (int i = 0; i < num_entries; ++i) {
459                 Entry e;
460                 e.key = gen_key(max_key);
461                 if (rnd.Next() % 10 != 0) {
462                   e.type = prevalent_entry_type;
463                 } else {
464                   const ValueType types[] = {kTypeValue, kTypeDeletion,
465                                              kTypeMerge};
466                   e.type =
467                       types[rnd.Next() % (sizeof(types) / sizeof(types[0]))];
468                 }
469                 e.sequence = i;
470                 e.value = "v" + ToString(i);
471                 ParsedInternalKey internal_key(e.key, e.sequence, e.type);
472                 AppendInternalKey(&e.ikey, internal_key);
473 
474                 data.entries.push_back(e);
475               }
476               std::sort(data.entries.begin(), data.entries.end());
477               if (trace) {
478                 std::cout << "entries:";
479                 for (size_t i = 0; i < data.entries.size(); ++i) {
480                   Entry& e = data.entries[i];
481                   std::cout
482                       << "\n  idx " << i << ": \"" << e.key << "\": \""
483                       << e.value << "\" seq: " << e.sequence << " type: "
484                       << (e.type == kTypeValue
485                               ? "val"
486                               : e.type == kTypeDeletion ? "del" : "merge");
487                 }
488                 std::cout << std::endl;
489               }
490 
491               std::unique_ptr<Iterator> db_iter;
492               std::unique_ptr<ReferenceIterator> ref_iter;
493               for (int iteration = 0; iteration < num_iterations; ++iteration) {
494                 SCOPED_TRACE(iteration);
495                 // Create a new iterator every ~30 operations.
496                 if (db_iter == nullptr || rnd.Next() % 30 == 0) {
497                   uint64_t sequence = rnd.Next() % (data.entries.size() + 2);
498                   ref_iter.reset(new ReferenceIterator(&data, sequence));
499                   if (trace) {
500                     std::cout << "new iterator, seq: " << sequence << std::endl;
501                   }
502 
503                   auto internal_iter =
504                       new StressTestIterator(&data, &rnd, BytewiseComparator());
505                   internal_iter->error_probability = error_probability;
506                   internal_iter->mutation_probability = mutation_probability;
507                   internal_iter->target_hidden_fraction =
508                       target_hidden_fraction;
509                   internal_iter->trace = trace;
510                   db_iter.reset(NewDBIterator(
511                       env_, ropt, ImmutableCFOptions(options),
512                       MutableCFOptions(options), BytewiseComparator(),
513                       internal_iter, sequence,
514                       options.max_sequential_skip_in_iterations,
515                       nullptr /*read_callback*/));
516                 }
517 
518                 // Do a random operation. It's important to do it on ref_it
519                 // later than on db_iter to make sure ref_it sees the correct
520                 // recently_touched_keys.
521                 std::string old_key;
522                 bool forward = rnd.Next() % 2 > 0;
523                 // Do Next()/Prev() ~90% of the time.
524                 bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0;
525                 if (trace) {
526                   std::cout << iteration << ": ";
527                 }
528 
529                 if (!seek) {
530                   assert(db_iter->Valid());
531                   old_key = ref_iter->key;
532                   if (trace) {
533                     std::cout << (forward ? "Next" : "Prev") << std::endl;
534                   }
535 
536                   if (forward) {
537                     db_iter->Next();
538                     ref_iter->Next();
539                   } else {
540                     db_iter->Prev();
541                     ref_iter->Prev();
542                   }
543                 } else {
544                   data.recently_touched_keys.clear();
545                   // Do SeekToFirst less often than Seek.
546                   if (rnd.Next() % 4 == 0) {
547                     if (trace) {
548                       std::cout << (forward ? "SeekToFirst" : "SeekToLast")
549                                 << std::endl;
550                     }
551 
552                     if (forward) {
553                       old_key = "";
554                       db_iter->SeekToFirst();
555                       ref_iter->SeekToFirst();
556                     } else {
557                       old_key = data.entries.back().key;
558                       db_iter->SeekToLast();
559                       ref_iter->SeekToLast();
560                     }
561                   } else {
562                     old_key = gen_key(max_key);
563                     if (trace) {
564                       std::cout << (forward ? "Seek" : "SeekForPrev") << " \""
565                                 << old_key << '"' << std::endl;
566                     }
567                     if (forward) {
568                       db_iter->Seek(old_key);
569                       ref_iter->Seek(old_key);
570                     } else {
571                       db_iter->SeekForPrev(old_key);
572                       ref_iter->SeekForPrev(old_key);
573                     }
574                   }
575                 }
576 
577                 // Check the result.
578                 if (db_iter->Valid()) {
579                   ASSERT_TRUE(db_iter->status().ok());
580                   if (data.recently_touched_keys.count(
581                           db_iter->key().ToString())) {
582                     // Ended on a key that may have been mutated during the
583                     // operation. Reference iterator skips such keys, so we
584                     // can't check the exact result.
585 
586                     // Check that the key moved in the right direction.
587                     if (forward) {
588                       if (seek)
589                         ASSERT_GE(db_iter->key().ToString(), old_key);
590                       else
591                         ASSERT_GT(db_iter->key().ToString(), old_key);
592                     } else {
593                       if (seek)
594                         ASSERT_LE(db_iter->key().ToString(), old_key);
595                       else
596                         ASSERT_LT(db_iter->key().ToString(), old_key);
597                     }
598 
599                     if (ref_iter->Valid()) {
600                       // Check that DBIter didn't miss any non-mutated key.
601                       if (forward) {
602                         ASSERT_LT(db_iter->key().ToString(), ref_iter->key);
603                       } else {
604                         ASSERT_GT(db_iter->key().ToString(), ref_iter->key);
605                       }
606                     }
607                     // Tell the next iteration of the loop to reseek the
608                     // iterators.
609                     ref_iter->valid = false;
610 
611                     ++num_recently_removed;
612                   } else {
613                     ASSERT_TRUE(ref_iter->Valid());
614                     ASSERT_EQ(ref_iter->key, db_iter->key().ToString());
615                     ASSERT_EQ(ref_iter->value, db_iter->value());
616                     ++num_matching;
617                   }
618                 } else if (db_iter->status().ok()) {
619                   ASSERT_FALSE(ref_iter->Valid());
620                   ++num_at_end;
621                 } else {
622                   // Non-ok status. Nothing to check here.
623                   // Tell the next iteration of the loop to reseek the
624                   // iterators.
625                   ref_iter->valid = false;
626                   ++num_not_ok;
627                 }
628               }
629             }
630           }
631         }
632       }
633     }
634   }
635 
636   // Check that all cases were hit many times.
637   EXPECT_GT(num_matching, 10000);
638   EXPECT_GT(num_at_end, 10000);
639   EXPECT_GT(num_not_ok, 10000);
640   EXPECT_GT(num_recently_removed, 10000);
641 
642   std::cout << "stats:\n  exact matches: " << num_matching
643             << "\n  end reached: " << num_at_end
644             << "\n  non-ok status: " << num_not_ok
645             << "\n  mutated on the fly: " << num_recently_removed << std::endl;
646 }
647 
648 }  // namespace ROCKSDB_NAMESPACE
649 
main(int argc,char ** argv)650 int main(int argc, char** argv) {
651   ::testing::InitGoogleTest(&argc, argv);
652   ParseCommandLineFlags(&argc, &argv, true);
653   return RUN_ALL_TESTS();
654 }
655