1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include <functional>
11 
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
20 
21 namespace ROCKSDB_NAMESPACE {
22 
23 // A dumb ReadCallback which saying every key is committed.
24 class DummyReadCallback : public ReadCallback {
25  public:
DummyReadCallback()26   DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
IsVisibleFullCheck(SequenceNumber)27   bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
SetSnapshot(SequenceNumber seq)28   void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
29 };
30 
31 // Test param:
32 //   bool: whether to pass read_callback to NewIterator().
33 class DBIteratorTest : public DBTestBase,
34                        public testing::WithParamInterface<bool> {
35  public:
DBIteratorTest()36   DBIteratorTest() : DBTestBase("/db_iterator_test") {}
37 
NewIterator(const ReadOptions & read_options,ColumnFamilyHandle * column_family=nullptr)38   Iterator* NewIterator(const ReadOptions& read_options,
39                         ColumnFamilyHandle* column_family = nullptr) {
40     if (column_family == nullptr) {
41       column_family = db_->DefaultColumnFamily();
42     }
43     auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
44     SequenceNumber seq = read_options.snapshot != nullptr
45                              ? read_options.snapshot->GetSequenceNumber()
46                              : db_->GetLatestSequenceNumber();
47     bool use_read_callback = GetParam();
48     DummyReadCallback* read_callback = nullptr;
49     if (use_read_callback) {
50       read_callback = new DummyReadCallback();
51       read_callback->SetSnapshot(seq);
52       InstrumentedMutexLock lock(&mutex_);
53       read_callbacks_.push_back(
54           std::unique_ptr<DummyReadCallback>(read_callback));
55     }
56     return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
57   }
58 
59  private:
60   InstrumentedMutex mutex_;
61   std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
62 };
63 
TEST_P(DBIteratorTest,IteratorProperty)64 TEST_P(DBIteratorTest, IteratorProperty) {
65   // The test needs to be changed if kPersistedTier is supported in iterator.
66   Options options = CurrentOptions();
67   CreateAndReopenWithCF({"pikachu"}, options);
68   Put(1, "1", "2");
69   Delete(1, "2");
70   ReadOptions ropt;
71   ropt.pin_data = false;
72   {
73     std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
74     iter->SeekToFirst();
75     std::string prop_value;
76     ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
77     ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
78     ASSERT_EQ("0", prop_value);
79     ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
80     ASSERT_EQ("1", prop_value);
81     iter->Next();
82     ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
83     ASSERT_EQ("Iterator is not valid.", prop_value);
84 
85     // Get internal key at which the iteration stopped (tombstone in this case).
86     ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
87     ASSERT_EQ("2", prop_value);
88   }
89   Close();
90 }
91 
TEST_P(DBIteratorTest,PersistedTierOnIterator)92 TEST_P(DBIteratorTest, PersistedTierOnIterator) {
93   // The test needs to be changed if kPersistedTier is supported in iterator.
94   Options options = CurrentOptions();
95   CreateAndReopenWithCF({"pikachu"}, options);
96   ReadOptions ropt;
97   ropt.read_tier = kPersistedTier;
98 
99   auto* iter = db_->NewIterator(ropt, handles_[1]);
100   ASSERT_TRUE(iter->status().IsNotSupported());
101   delete iter;
102 
103   std::vector<Iterator*> iters;
104   ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
105   Close();
106 }
107 
TEST_P(DBIteratorTest,NonBlockingIteration)108 TEST_P(DBIteratorTest, NonBlockingIteration) {
109   do {
110     ReadOptions non_blocking_opts, regular_opts;
111     Options options = CurrentOptions();
112     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
113     non_blocking_opts.read_tier = kBlockCacheTier;
114     CreateAndReopenWithCF({"pikachu"}, options);
115     // write one kv to the database.
116     ASSERT_OK(Put(1, "a", "b"));
117 
118     // scan using non-blocking iterator. We should find it because
119     // it is in memtable.
120     Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
121     int count = 0;
122     for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
123       ASSERT_OK(iter->status());
124       count++;
125     }
126     ASSERT_EQ(count, 1);
127     delete iter;
128 
129     // flush memtable to storage. Now, the key should not be in the
130     // memtable neither in the block cache.
131     ASSERT_OK(Flush(1));
132 
133     // verify that a non-blocking iterator does not find any
134     // kvs. Neither does it do any IOs to storage.
135     uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
136     uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
137     iter = NewIterator(non_blocking_opts, handles_[1]);
138     count = 0;
139     for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
140       count++;
141     }
142     ASSERT_EQ(count, 0);
143     ASSERT_TRUE(iter->status().IsIncomplete());
144     ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
145     ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
146     delete iter;
147 
148     // read in the specified block via a regular get
149     ASSERT_EQ(Get(1, "a"), "b");
150 
151     // verify that we can find it via a non-blocking scan
152     numopen = TestGetTickerCount(options, NO_FILE_OPENS);
153     cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
154     iter = NewIterator(non_blocking_opts, handles_[1]);
155     count = 0;
156     for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
157       ASSERT_OK(iter->status());
158       count++;
159     }
160     ASSERT_EQ(count, 1);
161     ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
162     ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
163     delete iter;
164 
165     // This test verifies block cache behaviors, which is not used by plain
166     // table format.
167   } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
168 }
169 
TEST_P(DBIteratorTest,IterSeekBeforePrev)170 TEST_P(DBIteratorTest, IterSeekBeforePrev) {
171   ASSERT_OK(Put("a", "b"));
172   ASSERT_OK(Put("c", "d"));
173   dbfull()->Flush(FlushOptions());
174   ASSERT_OK(Put("0", "f"));
175   ASSERT_OK(Put("1", "h"));
176   dbfull()->Flush(FlushOptions());
177   ASSERT_OK(Put("2", "j"));
178   auto iter = NewIterator(ReadOptions());
179   iter->Seek(Slice("c"));
180   iter->Prev();
181   iter->Seek(Slice("a"));
182   iter->Prev();
183   delete iter;
184 }
185 
TEST_P(DBIteratorTest,IterReseekNewUpperBound)186 TEST_P(DBIteratorTest, IterReseekNewUpperBound) {
187   Random rnd(301);
188   Options options = CurrentOptions();
189   BlockBasedTableOptions table_options;
190   table_options.block_size = 1024;
191   table_options.block_size_deviation = 50;
192   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
193   options.compression = kNoCompression;
194   Reopen(options);
195 
196   ASSERT_OK(Put("a", RandomString(&rnd, 400)));
197   ASSERT_OK(Put("aabb", RandomString(&rnd, 400)));
198   ASSERT_OK(Put("aaef", RandomString(&rnd, 400)));
199   ASSERT_OK(Put("b", RandomString(&rnd, 400)));
200   dbfull()->Flush(FlushOptions());
201   ReadOptions opts;
202   Slice ub = Slice("aa");
203   opts.iterate_upper_bound = &ub;
204   auto iter = NewIterator(opts);
205   iter->Seek(Slice("a"));
206   ub = Slice("b");
207   iter->Seek(Slice("aabc"));
208   ASSERT_TRUE(iter->Valid());
209   ASSERT_EQ(iter->key().ToString(), "aaef");
210   delete iter;
211 }
212 
TEST_P(DBIteratorTest,IterSeekForPrevBeforeNext)213 TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
214   ASSERT_OK(Put("a", "b"));
215   ASSERT_OK(Put("c", "d"));
216   dbfull()->Flush(FlushOptions());
217   ASSERT_OK(Put("0", "f"));
218   ASSERT_OK(Put("1", "h"));
219   dbfull()->Flush(FlushOptions());
220   ASSERT_OK(Put("2", "j"));
221   auto iter = NewIterator(ReadOptions());
222   iter->SeekForPrev(Slice("0"));
223   iter->Next();
224   iter->SeekForPrev(Slice("1"));
225   iter->Next();
226   delete iter;
227 }
228 
229 namespace {
MakeLongKey(size_t length,char c)230 std::string MakeLongKey(size_t length, char c) {
231   return std::string(length, c);
232 }
233 }  // namespace
234 
TEST_P(DBIteratorTest,IterLongKeys)235 TEST_P(DBIteratorTest, IterLongKeys) {
236   ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
237   ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
238   ASSERT_OK(Put("a", "b"));
239   dbfull()->Flush(FlushOptions());
240   ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
241   ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
242   ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
243   auto iter = NewIterator(ReadOptions());
244 
245   // Create a key that needs to be skipped for Seq too new
246   iter->Seek(MakeLongKey(20, 0));
247   ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
248   iter->Next();
249   ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
250   iter->Next();
251   ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
252   iter->Next();
253   ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
254   iter->Next();
255   ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
256 
257   iter->SeekForPrev(MakeLongKey(127, 3));
258   ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
259   iter->Prev();
260   ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
261   iter->Prev();
262   ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
263   delete iter;
264 
265   iter = NewIterator(ReadOptions());
266   iter->Seek(MakeLongKey(50, 1));
267   ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
268   iter->Next();
269   ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
270   iter->Next();
271   ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
272   delete iter;
273 }
274 
TEST_P(DBIteratorTest,IterNextWithNewerSeq)275 TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
276   ASSERT_OK(Put("0", "0"));
277   dbfull()->Flush(FlushOptions());
278   ASSERT_OK(Put("a", "b"));
279   ASSERT_OK(Put("c", "d"));
280   ASSERT_OK(Put("d", "e"));
281   auto iter = NewIterator(ReadOptions());
282 
283   // Create a key that needs to be skipped for Seq too new
284   for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
285        i++) {
286     ASSERT_OK(Put("b", "f"));
287   }
288 
289   iter->Seek(Slice("a"));
290   ASSERT_EQ(IterStatus(iter), "a->b");
291   iter->Next();
292   ASSERT_EQ(IterStatus(iter), "c->d");
293   iter->SeekForPrev(Slice("b"));
294   ASSERT_EQ(IterStatus(iter), "a->b");
295   iter->Next();
296   ASSERT_EQ(IterStatus(iter), "c->d");
297 
298   delete iter;
299 }
300 
TEST_P(DBIteratorTest,IterPrevWithNewerSeq)301 TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
302   ASSERT_OK(Put("0", "0"));
303   dbfull()->Flush(FlushOptions());
304   ASSERT_OK(Put("a", "b"));
305   ASSERT_OK(Put("c", "d"));
306   ASSERT_OK(Put("d", "e"));
307   auto iter = NewIterator(ReadOptions());
308 
309   // Create a key that needs to be skipped for Seq too new
310   for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
311        i++) {
312     ASSERT_OK(Put("b", "f"));
313   }
314 
315   iter->Seek(Slice("d"));
316   ASSERT_EQ(IterStatus(iter), "d->e");
317   iter->Prev();
318   ASSERT_EQ(IterStatus(iter), "c->d");
319   iter->Prev();
320   ASSERT_EQ(IterStatus(iter), "a->b");
321   iter->Prev();
322   iter->SeekForPrev(Slice("d"));
323   ASSERT_EQ(IterStatus(iter), "d->e");
324   iter->Prev();
325   ASSERT_EQ(IterStatus(iter), "c->d");
326   iter->Prev();
327   ASSERT_EQ(IterStatus(iter), "a->b");
328   iter->Prev();
329   delete iter;
330 }
331 
TEST_P(DBIteratorTest,IterPrevWithNewerSeq2)332 TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
333   ASSERT_OK(Put("0", "0"));
334   dbfull()->Flush(FlushOptions());
335   ASSERT_OK(Put("a", "b"));
336   ASSERT_OK(Put("c", "d"));
337   ASSERT_OK(Put("e", "f"));
338   auto iter = NewIterator(ReadOptions());
339   auto iter2 = NewIterator(ReadOptions());
340   iter->Seek(Slice("c"));
341   iter2->SeekForPrev(Slice("d"));
342   ASSERT_EQ(IterStatus(iter), "c->d");
343   ASSERT_EQ(IterStatus(iter2), "c->d");
344 
345   // Create a key that needs to be skipped for Seq too new
346   for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
347        i++) {
348     ASSERT_OK(Put("b", "f"));
349   }
350 
351   iter->Prev();
352   ASSERT_EQ(IterStatus(iter), "a->b");
353   iter->Prev();
354   iter2->Prev();
355   ASSERT_EQ(IterStatus(iter2), "a->b");
356   iter2->Prev();
357   delete iter;
358   delete iter2;
359 }
360 
TEST_P(DBIteratorTest,IterEmpty)361 TEST_P(DBIteratorTest, IterEmpty) {
362   do {
363     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
364     Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
365 
366     iter->SeekToFirst();
367     ASSERT_EQ(IterStatus(iter), "(invalid)");
368 
369     iter->SeekToLast();
370     ASSERT_EQ(IterStatus(iter), "(invalid)");
371 
372     iter->Seek("foo");
373     ASSERT_EQ(IterStatus(iter), "(invalid)");
374 
375     iter->SeekForPrev("foo");
376     ASSERT_EQ(IterStatus(iter), "(invalid)");
377 
378     delete iter;
379   } while (ChangeCompactOptions());
380 }
381 
TEST_P(DBIteratorTest,IterSingle)382 TEST_P(DBIteratorTest, IterSingle) {
383   do {
384     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
385     ASSERT_OK(Put(1, "a", "va"));
386     Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
387 
388     iter->SeekToFirst();
389     ASSERT_EQ(IterStatus(iter), "a->va");
390     iter->Next();
391     ASSERT_EQ(IterStatus(iter), "(invalid)");
392     iter->SeekToFirst();
393     ASSERT_EQ(IterStatus(iter), "a->va");
394     iter->Prev();
395     ASSERT_EQ(IterStatus(iter), "(invalid)");
396 
397     iter->SeekToLast();
398     ASSERT_EQ(IterStatus(iter), "a->va");
399     iter->Next();
400     ASSERT_EQ(IterStatus(iter), "(invalid)");
401     iter->SeekToLast();
402     ASSERT_EQ(IterStatus(iter), "a->va");
403     iter->Prev();
404     ASSERT_EQ(IterStatus(iter), "(invalid)");
405 
406     iter->Seek("");
407     ASSERT_EQ(IterStatus(iter), "a->va");
408     iter->Next();
409     ASSERT_EQ(IterStatus(iter), "(invalid)");
410     iter->SeekForPrev("");
411     ASSERT_EQ(IterStatus(iter), "(invalid)");
412 
413     iter->Seek("a");
414     ASSERT_EQ(IterStatus(iter), "a->va");
415     iter->Next();
416     ASSERT_EQ(IterStatus(iter), "(invalid)");
417     iter->SeekForPrev("a");
418     ASSERT_EQ(IterStatus(iter), "a->va");
419     iter->Prev();
420     ASSERT_EQ(IterStatus(iter), "(invalid)");
421 
422     iter->Seek("b");
423     ASSERT_EQ(IterStatus(iter), "(invalid)");
424     iter->SeekForPrev("b");
425     ASSERT_EQ(IterStatus(iter), "a->va");
426     iter->Prev();
427     ASSERT_EQ(IterStatus(iter), "(invalid)");
428 
429     delete iter;
430   } while (ChangeCompactOptions());
431 }
432 
TEST_P(DBIteratorTest,IterMulti)433 TEST_P(DBIteratorTest, IterMulti) {
434   do {
435     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
436     ASSERT_OK(Put(1, "a", "va"));
437     ASSERT_OK(Put(1, "b", "vb"));
438     ASSERT_OK(Put(1, "c", "vc"));
439     Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
440 
441     iter->SeekToFirst();
442     ASSERT_EQ(IterStatus(iter), "a->va");
443     iter->Next();
444     ASSERT_EQ(IterStatus(iter), "b->vb");
445     iter->Next();
446     ASSERT_EQ(IterStatus(iter), "c->vc");
447     iter->Next();
448     ASSERT_EQ(IterStatus(iter), "(invalid)");
449     iter->SeekToFirst();
450     ASSERT_EQ(IterStatus(iter), "a->va");
451     iter->Prev();
452     ASSERT_EQ(IterStatus(iter), "(invalid)");
453 
454     iter->SeekToLast();
455     ASSERT_EQ(IterStatus(iter), "c->vc");
456     iter->Prev();
457     ASSERT_EQ(IterStatus(iter), "b->vb");
458     iter->Prev();
459     ASSERT_EQ(IterStatus(iter), "a->va");
460     iter->Prev();
461     ASSERT_EQ(IterStatus(iter), "(invalid)");
462     iter->SeekToLast();
463     ASSERT_EQ(IterStatus(iter), "c->vc");
464     iter->Next();
465     ASSERT_EQ(IterStatus(iter), "(invalid)");
466 
467     iter->Seek("");
468     ASSERT_EQ(IterStatus(iter), "a->va");
469     iter->Seek("a");
470     ASSERT_EQ(IterStatus(iter), "a->va");
471     iter->Seek("ax");
472     ASSERT_EQ(IterStatus(iter), "b->vb");
473     iter->SeekForPrev("d");
474     ASSERT_EQ(IterStatus(iter), "c->vc");
475     iter->SeekForPrev("c");
476     ASSERT_EQ(IterStatus(iter), "c->vc");
477     iter->SeekForPrev("bx");
478     ASSERT_EQ(IterStatus(iter), "b->vb");
479 
480     iter->Seek("b");
481     ASSERT_EQ(IterStatus(iter), "b->vb");
482     iter->Seek("z");
483     ASSERT_EQ(IterStatus(iter), "(invalid)");
484     iter->SeekForPrev("b");
485     ASSERT_EQ(IterStatus(iter), "b->vb");
486     iter->SeekForPrev("");
487     ASSERT_EQ(IterStatus(iter), "(invalid)");
488 
489     // Switch from reverse to forward
490     iter->SeekToLast();
491     iter->Prev();
492     iter->Prev();
493     iter->Next();
494     ASSERT_EQ(IterStatus(iter), "b->vb");
495 
496     // Switch from forward to reverse
497     iter->SeekToFirst();
498     iter->Next();
499     iter->Next();
500     iter->Prev();
501     ASSERT_EQ(IterStatus(iter), "b->vb");
502 
503     // Make sure iter stays at snapshot
504     ASSERT_OK(Put(1, "a", "va2"));
505     ASSERT_OK(Put(1, "a2", "va3"));
506     ASSERT_OK(Put(1, "b", "vb2"));
507     ASSERT_OK(Put(1, "c", "vc2"));
508     ASSERT_OK(Delete(1, "b"));
509     iter->SeekToFirst();
510     ASSERT_EQ(IterStatus(iter), "a->va");
511     iter->Next();
512     ASSERT_EQ(IterStatus(iter), "b->vb");
513     iter->Next();
514     ASSERT_EQ(IterStatus(iter), "c->vc");
515     iter->Next();
516     ASSERT_EQ(IterStatus(iter), "(invalid)");
517     iter->SeekToLast();
518     ASSERT_EQ(IterStatus(iter), "c->vc");
519     iter->Prev();
520     ASSERT_EQ(IterStatus(iter), "b->vb");
521     iter->Prev();
522     ASSERT_EQ(IterStatus(iter), "a->va");
523     iter->Prev();
524     ASSERT_EQ(IterStatus(iter), "(invalid)");
525 
526     delete iter;
527   } while (ChangeCompactOptions());
528 }
529 
530 // Check that we can skip over a run of user keys
531 // by using reseek rather than sequential scan
TEST_P(DBIteratorTest,IterReseek)532 TEST_P(DBIteratorTest, IterReseek) {
533   anon::OptionsOverride options_override;
534   options_override.skip_policy = kSkipNoSnapshot;
535   Options options = CurrentOptions(options_override);
536   options.max_sequential_skip_in_iterations = 3;
537   options.create_if_missing = true;
538   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
539   DestroyAndReopen(options);
540   CreateAndReopenWithCF({"pikachu"}, options);
541 
542   // insert three keys with same userkey and verify that
543   // reseek is not invoked. For each of these test cases,
544   // verify that we can find the next key "b".
545   ASSERT_OK(Put(1, "a", "zero"));
546   ASSERT_OK(Put(1, "a", "one"));
547   ASSERT_OK(Put(1, "a", "two"));
548   ASSERT_OK(Put(1, "b", "bone"));
549   Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
550   iter->SeekToFirst();
551   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
552   ASSERT_EQ(IterStatus(iter), "a->two");
553   iter->Next();
554   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
555   ASSERT_EQ(IterStatus(iter), "b->bone");
556   delete iter;
557 
558   // insert a total of three keys with same userkey and verify
559   // that reseek is still not invoked.
560   ASSERT_OK(Put(1, "a", "three"));
561   iter = NewIterator(ReadOptions(), handles_[1]);
562   iter->SeekToFirst();
563   ASSERT_EQ(IterStatus(iter), "a->three");
564   iter->Next();
565   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
566   ASSERT_EQ(IterStatus(iter), "b->bone");
567   delete iter;
568 
569   // insert a total of four keys with same userkey and verify
570   // that reseek is invoked.
571   ASSERT_OK(Put(1, "a", "four"));
572   iter = NewIterator(ReadOptions(), handles_[1]);
573   iter->SeekToFirst();
574   ASSERT_EQ(IterStatus(iter), "a->four");
575   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
576   iter->Next();
577   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
578   ASSERT_EQ(IterStatus(iter), "b->bone");
579   delete iter;
580 
581   // Testing reverse iterator
582   // At this point, we have three versions of "a" and one version of "b".
583   // The reseek statistics is already at 1.
584   int num_reseeks = static_cast<int>(
585       TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
586 
587   // Insert another version of b and assert that reseek is not invoked
588   ASSERT_OK(Put(1, "b", "btwo"));
589   iter = NewIterator(ReadOptions(), handles_[1]);
590   iter->SeekToLast();
591   ASSERT_EQ(IterStatus(iter), "b->btwo");
592   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
593             num_reseeks);
594   iter->Prev();
595   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
596             num_reseeks + 1);
597   ASSERT_EQ(IterStatus(iter), "a->four");
598   delete iter;
599 
600   // insert two more versions of b. This makes a total of 4 versions
601   // of b and 4 versions of a.
602   ASSERT_OK(Put(1, "b", "bthree"));
603   ASSERT_OK(Put(1, "b", "bfour"));
604   iter = NewIterator(ReadOptions(), handles_[1]);
605   iter->SeekToLast();
606   ASSERT_EQ(IterStatus(iter), "b->bfour");
607   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
608             num_reseeks + 2);
609   iter->Prev();
610 
611   // the previous Prev call should have invoked reseek
612   ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
613             num_reseeks + 3);
614   ASSERT_EQ(IterStatus(iter), "a->four");
615   delete iter;
616 }
617 
TEST_P(DBIteratorTest,IterSmallAndLargeMix)618 TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
619   do {
620     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
621     ASSERT_OK(Put(1, "a", "va"));
622     ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
623     ASSERT_OK(Put(1, "c", "vc"));
624     ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
625     ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
626 
627     Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
628 
629     iter->SeekToFirst();
630     ASSERT_EQ(IterStatus(iter), "a->va");
631     iter->Next();
632     ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
633     iter->Next();
634     ASSERT_EQ(IterStatus(iter), "c->vc");
635     iter->Next();
636     ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
637     iter->Next();
638     ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
639     iter->Next();
640     ASSERT_EQ(IterStatus(iter), "(invalid)");
641 
642     iter->SeekToLast();
643     ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
644     iter->Prev();
645     ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
646     iter->Prev();
647     ASSERT_EQ(IterStatus(iter), "c->vc");
648     iter->Prev();
649     ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
650     iter->Prev();
651     ASSERT_EQ(IterStatus(iter), "a->va");
652     iter->Prev();
653     ASSERT_EQ(IterStatus(iter), "(invalid)");
654 
655     delete iter;
656   } while (ChangeCompactOptions());
657 }
658 
TEST_P(DBIteratorTest,IterMultiWithDelete)659 TEST_P(DBIteratorTest, IterMultiWithDelete) {
660   do {
661     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
662     ASSERT_OK(Put(1, "ka", "va"));
663     ASSERT_OK(Put(1, "kb", "vb"));
664     ASSERT_OK(Put(1, "kc", "vc"));
665     ASSERT_OK(Delete(1, "kb"));
666     ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
667 
668     Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
669     iter->Seek("kc");
670     ASSERT_EQ(IterStatus(iter), "kc->vc");
671     if (!CurrentOptions().merge_operator) {
672       // TODO: merge operator does not support backward iteration yet
673       if (kPlainTableAllBytesPrefix != option_config_ &&
674           kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
675           kHashLinkList != option_config_ &&
676           kHashSkipList != option_config_) {  // doesn't support SeekToLast
677         iter->Prev();
678         ASSERT_EQ(IterStatus(iter), "ka->va");
679       }
680     }
681     delete iter;
682   } while (ChangeOptions());
683 }
684 
TEST_P(DBIteratorTest,IterPrevMaxSkip)685 TEST_P(DBIteratorTest, IterPrevMaxSkip) {
686   do {
687     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
688     for (int i = 0; i < 2; i++) {
689       ASSERT_OK(Put(1, "key1", "v1"));
690       ASSERT_OK(Put(1, "key2", "v2"));
691       ASSERT_OK(Put(1, "key3", "v3"));
692       ASSERT_OK(Put(1, "key4", "v4"));
693       ASSERT_OK(Put(1, "key5", "v5"));
694     }
695 
696     VerifyIterLast("key5->v5", 1);
697 
698     ASSERT_OK(Delete(1, "key5"));
699     VerifyIterLast("key4->v4", 1);
700 
701     ASSERT_OK(Delete(1, "key4"));
702     VerifyIterLast("key3->v3", 1);
703 
704     ASSERT_OK(Delete(1, "key3"));
705     VerifyIterLast("key2->v2", 1);
706 
707     ASSERT_OK(Delete(1, "key2"));
708     VerifyIterLast("key1->v1", 1);
709 
710     ASSERT_OK(Delete(1, "key1"));
711     VerifyIterLast("(invalid)", 1);
712   } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
713 }
714 
TEST_P(DBIteratorTest,IterWithSnapshot)715 TEST_P(DBIteratorTest, IterWithSnapshot) {
716   anon::OptionsOverride options_override;
717   options_override.skip_policy = kSkipNoSnapshot;
718   do {
719     CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
720     ASSERT_OK(Put(1, "key1", "val1"));
721     ASSERT_OK(Put(1, "key2", "val2"));
722     ASSERT_OK(Put(1, "key3", "val3"));
723     ASSERT_OK(Put(1, "key4", "val4"));
724     ASSERT_OK(Put(1, "key5", "val5"));
725 
726     const Snapshot* snapshot = db_->GetSnapshot();
727     ReadOptions options;
728     options.snapshot = snapshot;
729     Iterator* iter = NewIterator(options, handles_[1]);
730 
731     ASSERT_OK(Put(1, "key0", "val0"));
732     // Put more values after the snapshot
733     ASSERT_OK(Put(1, "key100", "val100"));
734     ASSERT_OK(Put(1, "key101", "val101"));
735 
736     iter->Seek("key5");
737     ASSERT_EQ(IterStatus(iter), "key5->val5");
738     if (!CurrentOptions().merge_operator) {
739       // TODO: merge operator does not support backward iteration yet
740       if (kPlainTableAllBytesPrefix != option_config_ &&
741           kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
742           kHashLinkList != option_config_ && kHashSkipList != option_config_) {
743         iter->Prev();
744         ASSERT_EQ(IterStatus(iter), "key4->val4");
745         iter->Prev();
746         ASSERT_EQ(IterStatus(iter), "key3->val3");
747 
748         iter->Next();
749         ASSERT_EQ(IterStatus(iter), "key4->val4");
750         iter->Next();
751         ASSERT_EQ(IterStatus(iter), "key5->val5");
752       }
753       iter->Next();
754       ASSERT_TRUE(!iter->Valid());
755     }
756 
757     if (!CurrentOptions().merge_operator) {
758       // TODO(gzh): merge operator does not support backward iteration yet
759       if (kPlainTableAllBytesPrefix != option_config_ &&
760           kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
761           kHashLinkList != option_config_ && kHashSkipList != option_config_) {
762         iter->SeekForPrev("key1");
763         ASSERT_EQ(IterStatus(iter), "key1->val1");
764         iter->Next();
765         ASSERT_EQ(IterStatus(iter), "key2->val2");
766         iter->Next();
767         ASSERT_EQ(IterStatus(iter), "key3->val3");
768         iter->Prev();
769         ASSERT_EQ(IterStatus(iter), "key2->val2");
770         iter->Prev();
771         ASSERT_EQ(IterStatus(iter), "key1->val1");
772         iter->Prev();
773         ASSERT_TRUE(!iter->Valid());
774       }
775     }
776     db_->ReleaseSnapshot(snapshot);
777     delete iter;
778   } while (ChangeOptions());
779 }
780 
TEST_P(DBIteratorTest,IteratorPinsRef)781 TEST_P(DBIteratorTest, IteratorPinsRef) {
782   do {
783     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
784     Put(1, "foo", "hello");
785 
786     // Get iterator that will yield the current contents of the DB.
787     Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
788 
789     // Write to force compactions
790     Put(1, "foo", "newvalue1");
791     for (int i = 0; i < 100; i++) {
792       // 100K values
793       ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
794     }
795     Put(1, "foo", "newvalue2");
796 
797     iter->SeekToFirst();
798     ASSERT_TRUE(iter->Valid());
799     ASSERT_EQ("foo", iter->key().ToString());
800     ASSERT_EQ("hello", iter->value().ToString());
801     iter->Next();
802     ASSERT_TRUE(!iter->Valid());
803     delete iter;
804   } while (ChangeCompactOptions());
805 }
806 
TEST_P(DBIteratorTest,IteratorDeleteAfterCfDelete)807 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) {
808   CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
809 
810   Put(1, "foo", "delete-cf-then-delete-iter");
811   Put(1, "hello", "value2");
812 
813   ColumnFamilyHandle* cf = handles_[1];
814   ReadOptions ro;
815 
816   auto* iter = db_->NewIterator(ro, cf);
817   iter->SeekToFirst();
818   ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter");
819 
820   // delete CF handle
821   db_->DestroyColumnFamilyHandle(cf);
822   handles_.erase(std::begin(handles_) + 1);
823 
824   // delete Iterator after CF handle is deleted
825   iter->Next();
826   ASSERT_EQ(IterStatus(iter), "hello->value2");
827   delete iter;
828 }
829 
TEST_P(DBIteratorTest,IteratorDeleteAfterCfDrop)830 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) {
831   CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
832 
833   Put(1, "foo", "drop-cf-then-delete-iter");
834 
835   ReadOptions ro;
836   ColumnFamilyHandle* cf = handles_[1];
837 
838   auto* iter = db_->NewIterator(ro, cf);
839   iter->SeekToFirst();
840   ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter");
841 
842   // drop and delete CF
843   db_->DropColumnFamily(cf);
844   db_->DestroyColumnFamilyHandle(cf);
845   handles_.erase(std::begin(handles_) + 1);
846 
847   // delete Iterator after CF handle is dropped
848   delete iter;
849 }
850 
851 // SetOptions not defined in ROCKSDB LITE
852 #ifndef ROCKSDB_LITE
TEST_P(DBIteratorTest,DBIteratorBoundTest)853 TEST_P(DBIteratorTest, DBIteratorBoundTest) {
854   Options options = CurrentOptions();
855   options.env = env_;
856   options.create_if_missing = true;
857 
858   options.prefix_extractor = nullptr;
859   DestroyAndReopen(options);
860   ASSERT_OK(Put("a", "0"));
861   ASSERT_OK(Put("foo", "bar"));
862   ASSERT_OK(Put("foo1", "bar1"));
863   ASSERT_OK(Put("g1", "0"));
864 
865   // testing basic case with no iterate_upper_bound and no prefix_extractor
866   {
867     ReadOptions ro;
868     ro.iterate_upper_bound = nullptr;
869 
870     std::unique_ptr<Iterator> iter(NewIterator(ro));
871 
872     iter->Seek("foo");
873 
874     ASSERT_TRUE(iter->Valid());
875     ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
876 
877     iter->Next();
878     ASSERT_TRUE(iter->Valid());
879     ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
880 
881     iter->Next();
882     ASSERT_TRUE(iter->Valid());
883     ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
884 
885     iter->SeekForPrev("g1");
886 
887     ASSERT_TRUE(iter->Valid());
888     ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
889 
890     iter->Prev();
891     ASSERT_TRUE(iter->Valid());
892     ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
893 
894     iter->Prev();
895     ASSERT_TRUE(iter->Valid());
896     ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
897   }
898 
899   // testing iterate_upper_bound and forward iterator
900   // to make sure it stops at bound
901   {
902     ReadOptions ro;
903     // iterate_upper_bound points beyond the last expected entry
904     Slice prefix("foo2");
905     ro.iterate_upper_bound = &prefix;
906 
907     std::unique_ptr<Iterator> iter(NewIterator(ro));
908 
909     iter->Seek("foo");
910 
911     ASSERT_TRUE(iter->Valid());
912     ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
913 
914     iter->Next();
915     ASSERT_TRUE(iter->Valid());
916     ASSERT_EQ(iter->key().compare(("foo1")), 0);
917 
918     iter->Next();
919     // should stop here...
920     ASSERT_TRUE(!iter->Valid());
921   }
922   // Testing SeekToLast with iterate_upper_bound set
923   {
924     ReadOptions ro;
925 
926     Slice prefix("foo");
927     ro.iterate_upper_bound = &prefix;
928 
929     std::unique_ptr<Iterator> iter(NewIterator(ro));
930 
931     iter->SeekToLast();
932     ASSERT_TRUE(iter->Valid());
933     ASSERT_EQ(iter->key().compare(Slice("a")), 0);
934   }
935 
936   // prefix is the first letter of the key
937   ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
938   ASSERT_OK(Put("a", "0"));
939   ASSERT_OK(Put("foo", "bar"));
940   ASSERT_OK(Put("foo1", "bar1"));
941   ASSERT_OK(Put("g1", "0"));
942 
943   // testing with iterate_upper_bound and prefix_extractor
944   // Seek target and iterate_upper_bound are not is same prefix
945   // This should be an error
946   {
947     ReadOptions ro;
948     Slice upper_bound("g");
949     ro.iterate_upper_bound = &upper_bound;
950 
951     std::unique_ptr<Iterator> iter(NewIterator(ro));
952 
953     iter->Seek("foo");
954 
955     ASSERT_TRUE(iter->Valid());
956     ASSERT_EQ("foo", iter->key().ToString());
957 
958     iter->Next();
959     ASSERT_TRUE(iter->Valid());
960     ASSERT_EQ("foo1", iter->key().ToString());
961 
962     iter->Next();
963     ASSERT_TRUE(!iter->Valid());
964   }
965 
966   // testing that iterate_upper_bound prevents iterating over deleted items
967   // if the bound has already reached
968   {
969     options.prefix_extractor = nullptr;
970     DestroyAndReopen(options);
971     ASSERT_OK(Put("a", "0"));
972     ASSERT_OK(Put("b", "0"));
973     ASSERT_OK(Put("b1", "0"));
974     ASSERT_OK(Put("c", "0"));
975     ASSERT_OK(Put("d", "0"));
976     ASSERT_OK(Put("e", "0"));
977     ASSERT_OK(Delete("c"));
978     ASSERT_OK(Delete("d"));
979 
980     // base case with no bound
981     ReadOptions ro;
982     ro.iterate_upper_bound = nullptr;
983 
984     std::unique_ptr<Iterator> iter(NewIterator(ro));
985 
986     iter->Seek("b");
987     ASSERT_TRUE(iter->Valid());
988     ASSERT_EQ(iter->key().compare(Slice("b")), 0);
989 
990     iter->Next();
991     ASSERT_TRUE(iter->Valid());
992     ASSERT_EQ(iter->key().compare(("b1")), 0);
993 
994     get_perf_context()->Reset();
995     iter->Next();
996 
997     ASSERT_TRUE(iter->Valid());
998     ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
999 
1000     // now testing with iterate_bound
1001     Slice prefix("c");
1002     ro.iterate_upper_bound = &prefix;
1003 
1004     iter.reset(NewIterator(ro));
1005 
1006     get_perf_context()->Reset();
1007 
1008     iter->Seek("b");
1009     ASSERT_TRUE(iter->Valid());
1010     ASSERT_EQ(iter->key().compare(Slice("b")), 0);
1011 
1012     iter->Next();
1013     ASSERT_TRUE(iter->Valid());
1014     ASSERT_EQ(iter->key().compare(("b1")), 0);
1015 
1016     iter->Next();
1017     // the iteration should stop as soon as the bound key is reached
1018     // even though the key is deleted
1019     // hence internal_delete_skipped_count should be 0
1020     ASSERT_TRUE(!iter->Valid());
1021     ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
1022   }
1023 }
1024 
TEST_P(DBIteratorTest,DBIteratorBoundMultiSeek)1025 TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
1026   Options options = CurrentOptions();
1027   options.env = env_;
1028   options.create_if_missing = true;
1029   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1030   options.prefix_extractor = nullptr;
1031   DestroyAndReopen(options);
1032   ASSERT_OK(Put("a", "0"));
1033   ASSERT_OK(Put("z", "0"));
1034   ASSERT_OK(Flush());
1035   ASSERT_OK(Put("foo1", "bar1"));
1036   ASSERT_OK(Put("foo2", "bar2"));
1037   ASSERT_OK(Put("foo3", "bar3"));
1038   ASSERT_OK(Put("foo4", "bar4"));
1039 
1040   {
1041     std::string up_str = "foo5";
1042     Slice up(up_str);
1043     ReadOptions ro;
1044     ro.iterate_upper_bound = &up;
1045     std::unique_ptr<Iterator> iter(NewIterator(ro));
1046 
1047     iter->Seek("foo1");
1048     ASSERT_TRUE(iter->Valid());
1049     ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1050 
1051     uint64_t prev_block_cache_hit =
1052         TestGetTickerCount(options, BLOCK_CACHE_HIT);
1053     uint64_t prev_block_cache_miss =
1054         TestGetTickerCount(options, BLOCK_CACHE_MISS);
1055 
1056     ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
1057 
1058     iter->Seek("foo4");
1059     ASSERT_TRUE(iter->Valid());
1060     ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
1061     ASSERT_EQ(prev_block_cache_hit,
1062               TestGetTickerCount(options, BLOCK_CACHE_HIT));
1063     ASSERT_EQ(prev_block_cache_miss,
1064               TestGetTickerCount(options, BLOCK_CACHE_MISS));
1065 
1066     iter->Seek("foo2");
1067     ASSERT_TRUE(iter->Valid());
1068     ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1069     iter->Next();
1070     ASSERT_TRUE(iter->Valid());
1071     ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
1072     ASSERT_EQ(prev_block_cache_hit,
1073               TestGetTickerCount(options, BLOCK_CACHE_HIT));
1074     ASSERT_EQ(prev_block_cache_miss,
1075               TestGetTickerCount(options, BLOCK_CACHE_MISS));
1076   }
1077 }
1078 #endif
1079 
TEST_P(DBIteratorTest,DBIteratorBoundOptimizationTest)1080 TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
1081   for (auto format_version : {2, 3, 4}) {
1082     int upper_bound_hits = 0;
1083     Options options = CurrentOptions();
1084     ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1085         "BlockBasedTableIterator:out_of_bound",
1086         [&upper_bound_hits](void*) { upper_bound_hits++; });
1087     ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1088     options.env = env_;
1089     options.create_if_missing = true;
1090     options.prefix_extractor = nullptr;
1091     BlockBasedTableOptions table_options;
1092     table_options.format_version = format_version;
1093     table_options.flush_block_policy_factory =
1094         std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1095     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1096 
1097     DestroyAndReopen(options);
1098     ASSERT_OK(Put("foo1", "bar1"));
1099     ASSERT_OK(Put("foo2", "bar2"));
1100     ASSERT_OK(Put("foo4", "bar4"));
1101     ASSERT_OK(Flush());
1102 
1103     Slice ub("foo3");
1104     ReadOptions ro;
1105     ro.iterate_upper_bound = &ub;
1106 
1107     std::unique_ptr<Iterator> iter(NewIterator(ro));
1108 
1109     iter->Seek("foo");
1110     ASSERT_TRUE(iter->Valid());
1111     ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1112     ASSERT_EQ(upper_bound_hits, 0);
1113 
1114     iter->Next();
1115     ASSERT_TRUE(iter->Valid());
1116     ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1117     ASSERT_EQ(upper_bound_hits, 0);
1118 
1119     iter->Next();
1120     ASSERT_FALSE(iter->Valid());
1121     ASSERT_EQ(upper_bound_hits, 1);
1122   }
1123 }
1124 
1125 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1126 // they don't do unnecessary block reads.
TEST_P(DBIteratorTest,IndexWithFirstKey)1127 TEST_P(DBIteratorTest, IndexWithFirstKey) {
1128   for (int tailing = 0; tailing < 2; ++tailing) {
1129     SCOPED_TRACE("tailing = " + std::to_string(tailing));
1130     Options options = CurrentOptions();
1131     options.env = env_;
1132     options.create_if_missing = true;
1133     options.prefix_extractor = nullptr;
1134     options.merge_operator = MergeOperators::CreateStringAppendOperator();
1135     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1136     Statistics* stats = options.statistics.get();
1137     BlockBasedTableOptions table_options;
1138     table_options.index_type =
1139         BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1140     table_options.index_shortening =
1141         BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1142     table_options.flush_block_policy_factory =
1143         std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1144     table_options.block_cache =
1145         NewLRUCache(8000);  // fits all blocks and their cache metadata overhead
1146     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1147 
1148     DestroyAndReopen(options);
1149     ASSERT_OK(Merge("a1", "x1"));
1150     ASSERT_OK(Merge("b1", "y1"));
1151     ASSERT_OK(Merge("c0", "z1"));
1152     ASSERT_OK(Flush());
1153     ASSERT_OK(Merge("a2", "x2"));
1154     ASSERT_OK(Merge("b2", "y2"));
1155     ASSERT_OK(Merge("c0", "z2"));
1156     ASSERT_OK(Flush());
1157     ASSERT_OK(Merge("a3", "x3"));
1158     ASSERT_OK(Merge("b3", "y3"));
1159     ASSERT_OK(Merge("c3", "z3"));
1160     ASSERT_OK(Flush());
1161 
1162     // Block cache is not important for this test.
1163     // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1164     // available way of counting block accesses.
1165 
1166     ReadOptions ropt;
1167     ropt.tailing = tailing;
1168     std::unique_ptr<Iterator> iter(NewIterator(ropt));
1169 
1170     iter->Seek("b10");
1171     ASSERT_TRUE(iter->Valid());
1172     EXPECT_EQ("b2", iter->key().ToString());
1173     EXPECT_EQ("y2", iter->value().ToString());
1174     EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1175 
1176     iter->Next();
1177     ASSERT_TRUE(iter->Valid());
1178     EXPECT_EQ("b3", iter->key().ToString());
1179     EXPECT_EQ("y3", iter->value().ToString());
1180     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1181     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1182 
1183     iter->Seek("c0");
1184     ASSERT_TRUE(iter->Valid());
1185     EXPECT_EQ("c0", iter->key().ToString());
1186     EXPECT_EQ("z1,z2", iter->value().ToString());
1187     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1188     EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1189 
1190     iter->Next();
1191     ASSERT_TRUE(iter->Valid());
1192     EXPECT_EQ("c3", iter->key().ToString());
1193     EXPECT_EQ("z3", iter->value().ToString());
1194     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1195     EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1196 
1197     iter.reset();
1198 
1199     // Enable iterate_upper_bound and check that iterator is not trying to read
1200     // blocks that are fully above upper bound.
1201     std::string ub = "b3";
1202     Slice ub_slice(ub);
1203     ropt.iterate_upper_bound = &ub_slice;
1204     iter.reset(NewIterator(ropt));
1205 
1206     iter->Seek("b2");
1207     ASSERT_TRUE(iter->Valid());
1208     EXPECT_EQ("b2", iter->key().ToString());
1209     EXPECT_EQ("y2", iter->value().ToString());
1210     EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1211     EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1212 
1213     iter->Next();
1214     ASSERT_FALSE(iter->Valid());
1215     EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1216     EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1217   }
1218 }
1219 
TEST_P(DBIteratorTest,IndexWithFirstKeyGet)1220 TEST_P(DBIteratorTest, IndexWithFirstKeyGet) {
1221   Options options = CurrentOptions();
1222   options.env = env_;
1223   options.create_if_missing = true;
1224   options.prefix_extractor = nullptr;
1225   options.merge_operator = MergeOperators::CreateStringAppendOperator();
1226   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1227   Statistics* stats = options.statistics.get();
1228   BlockBasedTableOptions table_options;
1229   table_options.index_type =
1230       BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1231   table_options.index_shortening =
1232       BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1233   table_options.flush_block_policy_factory =
1234       std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1235   table_options.block_cache = NewLRUCache(1000);  // fits all blocks
1236   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1237 
1238   DestroyAndReopen(options);
1239   ASSERT_OK(Merge("a", "x1"));
1240   ASSERT_OK(Merge("c", "y1"));
1241   ASSERT_OK(Merge("e", "z1"));
1242   ASSERT_OK(Flush());
1243   ASSERT_OK(Merge("c", "y2"));
1244   ASSERT_OK(Merge("e", "z2"));
1245   ASSERT_OK(Flush());
1246 
1247   // Get() between blocks shouldn't read any blocks.
1248   ASSERT_EQ("NOT_FOUND", Get("b"));
1249   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1250   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1251 
1252   // Get() of an existing key shouldn't read any unnecessary blocks when there's
1253   // only one key per block.
1254 
1255   ASSERT_EQ("y1,y2", Get("c"));
1256   EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1257   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1258 
1259   ASSERT_EQ("x1", Get("a"));
1260   EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1261   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1262 
1263   EXPECT_EQ(std::vector<std::string>({"NOT_FOUND", "z1,z2"}),
1264             MultiGet({"b", "e"}));
1265 }
1266 
1267 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1268 //             return the biggest key which is smaller than the seek key.
TEST_P(DBIteratorTest,PrevAfterAndNextAfterMerge)1269 TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
1270   Options options;
1271   options.create_if_missing = true;
1272   options.merge_operator = MergeOperators::CreatePutOperator();
1273   options.env = env_;
1274   DestroyAndReopen(options);
1275 
1276   // write three entries with different keys using Merge()
1277   WriteOptions wopts;
1278   db_->Merge(wopts, "1", "data1");
1279   db_->Merge(wopts, "2", "data2");
1280   db_->Merge(wopts, "3", "data3");
1281 
1282   std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
1283 
1284   it->Seek("2");
1285   ASSERT_TRUE(it->Valid());
1286   ASSERT_EQ("2", it->key().ToString());
1287 
1288   it->Prev();
1289   ASSERT_TRUE(it->Valid());
1290   ASSERT_EQ("1", it->key().ToString());
1291 
1292   it->SeekForPrev("1");
1293   ASSERT_TRUE(it->Valid());
1294   ASSERT_EQ("1", it->key().ToString());
1295 
1296   it->Next();
1297   ASSERT_TRUE(it->Valid());
1298   ASSERT_EQ("2", it->key().ToString());
1299 }
1300 
1301 class DBIteratorTestForPinnedData : public DBIteratorTest {
1302  public:
1303   enum TestConfig {
1304     NORMAL,
1305     CLOSE_AND_OPEN,
1306     COMPACT_BEFORE_READ,
1307     FLUSH_EVERY_1000,
1308     MAX
1309   };
DBIteratorTestForPinnedData()1310   DBIteratorTestForPinnedData() : DBIteratorTest() {}
PinnedDataIteratorRandomized(TestConfig run_config)1311   void PinnedDataIteratorRandomized(TestConfig run_config) {
1312     // Generate Random data
1313     Random rnd(301);
1314 
1315     int puts = 100000;
1316     int key_pool = static_cast<int>(puts * 0.7);
1317     int key_size = 100;
1318     int val_size = 1000;
1319     int seeks_percentage = 20;   // 20% of keys will be used to test seek()
1320     int delete_percentage = 20;  // 20% of keys will be deleted
1321     int merge_percentage = 20;   // 20% of keys will be added using Merge()
1322 
1323     Options options = CurrentOptions();
1324     BlockBasedTableOptions table_options;
1325     table_options.use_delta_encoding = false;
1326     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1327     options.merge_operator = MergeOperators::CreatePutOperator();
1328     DestroyAndReopen(options);
1329 
1330     std::vector<std::string> generated_keys(key_pool);
1331     for (int i = 0; i < key_pool; i++) {
1332       generated_keys[i] = RandomString(&rnd, key_size);
1333     }
1334 
1335     std::map<std::string, std::string> true_data;
1336     std::vector<std::string> random_keys;
1337     std::vector<std::string> deleted_keys;
1338     for (int i = 0; i < puts; i++) {
1339       auto& k = generated_keys[rnd.Next() % key_pool];
1340       auto v = RandomString(&rnd, val_size);
1341 
1342       // Insert data to true_data map and to DB
1343       true_data[k] = v;
1344       if (rnd.PercentTrue(merge_percentage)) {
1345         ASSERT_OK(db_->Merge(WriteOptions(), k, v));
1346       } else {
1347         ASSERT_OK(Put(k, v));
1348       }
1349 
1350       // Pick random keys to be used to test Seek()
1351       if (rnd.PercentTrue(seeks_percentage)) {
1352         random_keys.push_back(k);
1353       }
1354 
1355       // Delete some random keys
1356       if (rnd.PercentTrue(delete_percentage)) {
1357         deleted_keys.push_back(k);
1358         true_data.erase(k);
1359         ASSERT_OK(Delete(k));
1360       }
1361 
1362       if (run_config == TestConfig::FLUSH_EVERY_1000) {
1363         if (i && i % 1000 == 0) {
1364           Flush();
1365         }
1366       }
1367     }
1368 
1369     if (run_config == TestConfig::CLOSE_AND_OPEN) {
1370       Close();
1371       Reopen(options);
1372     } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
1373       db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
1374     }
1375 
1376     ReadOptions ro;
1377     ro.pin_data = true;
1378     auto iter = NewIterator(ro);
1379 
1380     {
1381       // Test Seek to random keys
1382       std::vector<Slice> keys_slices;
1383       std::vector<std::string> true_keys;
1384       for (auto& k : random_keys) {
1385         iter->Seek(k);
1386         if (!iter->Valid()) {
1387           ASSERT_EQ(true_data.lower_bound(k), true_data.end());
1388           continue;
1389         }
1390         std::string prop_value;
1391         ASSERT_OK(
1392             iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1393         ASSERT_EQ("1", prop_value);
1394         keys_slices.push_back(iter->key());
1395         true_keys.push_back(true_data.lower_bound(k)->first);
1396       }
1397 
1398       for (size_t i = 0; i < keys_slices.size(); i++) {
1399         ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1400       }
1401     }
1402 
1403     {
1404       // Test SeekForPrev to random keys
1405       std::vector<Slice> keys_slices;
1406       std::vector<std::string> true_keys;
1407       for (auto& k : random_keys) {
1408         iter->SeekForPrev(k);
1409         if (!iter->Valid()) {
1410           ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
1411           continue;
1412         }
1413         std::string prop_value;
1414         ASSERT_OK(
1415             iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1416         ASSERT_EQ("1", prop_value);
1417         keys_slices.push_back(iter->key());
1418         true_keys.push_back((--true_data.upper_bound(k))->first);
1419       }
1420 
1421       for (size_t i = 0; i < keys_slices.size(); i++) {
1422         ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1423       }
1424     }
1425 
1426     {
1427       // Test iterating all data forward
1428       std::vector<Slice> all_keys;
1429       for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1430         std::string prop_value;
1431         ASSERT_OK(
1432             iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1433         ASSERT_EQ("1", prop_value);
1434         all_keys.push_back(iter->key());
1435       }
1436       ASSERT_EQ(all_keys.size(), true_data.size());
1437 
1438       // Verify that all keys slices are valid
1439       auto data_iter = true_data.begin();
1440       for (size_t i = 0; i < all_keys.size(); i++) {
1441         ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1442         data_iter++;
1443       }
1444     }
1445 
1446     {
1447       // Test iterating all data backward
1448       std::vector<Slice> all_keys;
1449       for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1450         std::string prop_value;
1451         ASSERT_OK(
1452             iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1453         ASSERT_EQ("1", prop_value);
1454         all_keys.push_back(iter->key());
1455       }
1456       ASSERT_EQ(all_keys.size(), true_data.size());
1457 
1458       // Verify that all keys slices are valid (backward)
1459       auto data_iter = true_data.rbegin();
1460       for (size_t i = 0; i < all_keys.size(); i++) {
1461         ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1462         data_iter++;
1463       }
1464     }
1465 
1466     delete iter;
1467 }
1468 };
1469 
TEST_P(DBIteratorTestForPinnedData,PinnedDataIteratorRandomizedNormal)1470 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
1471   PinnedDataIteratorRandomized(TestConfig::NORMAL);
1472 }
1473 
TEST_P(DBIteratorTestForPinnedData,PinnedDataIteratorRandomizedCLoseAndOpen)1474 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
1475   PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
1476 }
1477 
TEST_P(DBIteratorTestForPinnedData,PinnedDataIteratorRandomizedCompactBeforeRead)1478 TEST_P(DBIteratorTestForPinnedData,
1479        PinnedDataIteratorRandomizedCompactBeforeRead) {
1480   PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
1481 }
1482 
TEST_P(DBIteratorTestForPinnedData,PinnedDataIteratorRandomizedFlush)1483 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
1484   PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
1485 }
1486 
1487 #ifndef ROCKSDB_LITE
TEST_P(DBIteratorTest,PinnedDataIteratorMultipleFiles)1488 TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
1489   Options options = CurrentOptions();
1490   BlockBasedTableOptions table_options;
1491   table_options.use_delta_encoding = false;
1492   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1493   options.disable_auto_compactions = true;
1494   options.write_buffer_size = 1024 * 1024 * 10;  // 10 Mb
1495   DestroyAndReopen(options);
1496 
1497   std::map<std::string, std::string> true_data;
1498 
1499   // Generate 4 sst files in L2
1500   Random rnd(301);
1501   for (int i = 1; i <= 1000; i++) {
1502     std::string k = Key(i * 3);
1503     std::string v = RandomString(&rnd, 100);
1504     ASSERT_OK(Put(k, v));
1505     true_data[k] = v;
1506     if (i % 250 == 0) {
1507       ASSERT_OK(Flush());
1508     }
1509   }
1510   ASSERT_EQ(FilesPerLevel(0), "4");
1511   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1512   ASSERT_EQ(FilesPerLevel(0), "0,4");
1513 
1514   // Generate 4 sst files in L0
1515   for (int i = 1; i <= 1000; i++) {
1516     std::string k = Key(i * 2);
1517     std::string v = RandomString(&rnd, 100);
1518     ASSERT_OK(Put(k, v));
1519     true_data[k] = v;
1520     if (i % 250 == 0) {
1521       ASSERT_OK(Flush());
1522     }
1523   }
1524   ASSERT_EQ(FilesPerLevel(0), "4,4");
1525 
1526   // Add some keys/values in memtables
1527   for (int i = 1; i <= 1000; i++) {
1528     std::string k = Key(i);
1529     std::string v = RandomString(&rnd, 100);
1530     ASSERT_OK(Put(k, v));
1531     true_data[k] = v;
1532   }
1533   ASSERT_EQ(FilesPerLevel(0), "4,4");
1534 
1535   ReadOptions ro;
1536   ro.pin_data = true;
1537   auto iter = NewIterator(ro);
1538 
1539   std::vector<std::pair<Slice, std::string>> results;
1540   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1541     std::string prop_value;
1542     ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1543     ASSERT_EQ("1", prop_value);
1544     results.emplace_back(iter->key(), iter->value().ToString());
1545   }
1546 
1547   ASSERT_EQ(results.size(), true_data.size());
1548   auto data_iter = true_data.begin();
1549   for (size_t i = 0; i < results.size(); i++, data_iter++) {
1550     auto& kv = results[i];
1551     ASSERT_EQ(kv.first, data_iter->first);
1552     ASSERT_EQ(kv.second, data_iter->second);
1553   }
1554 
1555   delete iter;
1556 }
1557 #endif
1558 
TEST_P(DBIteratorTest,PinnedDataIteratorMergeOperator)1559 TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
1560   Options options = CurrentOptions();
1561   BlockBasedTableOptions table_options;
1562   table_options.use_delta_encoding = false;
1563   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1564   options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1565   DestroyAndReopen(options);
1566 
1567   std::string numbers[7];
1568   for (int val = 0; val <= 6; val++) {
1569     PutFixed64(numbers + val, val);
1570   }
1571 
1572   // +1 all keys in range [ 0 => 999]
1573   for (int i = 0; i < 1000; i++) {
1574     WriteOptions wo;
1575     ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
1576   }
1577 
1578   // +2 all keys divisible by 2 in range [ 0 => 999]
1579   for (int i = 0; i < 1000; i += 2) {
1580     WriteOptions wo;
1581     ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
1582   }
1583 
1584   // +3 all keys divisible by 5 in range [ 0 => 999]
1585   for (int i = 0; i < 1000; i += 5) {
1586     WriteOptions wo;
1587     ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
1588   }
1589 
1590   ReadOptions ro;
1591   ro.pin_data = true;
1592   auto iter = NewIterator(ro);
1593 
1594   std::vector<std::pair<Slice, std::string>> results;
1595   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1596     std::string prop_value;
1597     ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1598     ASSERT_EQ("1", prop_value);
1599     results.emplace_back(iter->key(), iter->value().ToString());
1600   }
1601 
1602   ASSERT_EQ(results.size(), 1000);
1603   for (size_t i = 0; i < results.size(); i++) {
1604     auto& kv = results[i];
1605     ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
1606     int expected_val = 1;
1607     if (i % 2 == 0) {
1608       expected_val += 2;
1609     }
1610     if (i % 5 == 0) {
1611       expected_val += 3;
1612     }
1613     ASSERT_EQ(kv.second, numbers[expected_val]);
1614   }
1615 
1616   delete iter;
1617 }
1618 
TEST_P(DBIteratorTest,PinnedDataIteratorReadAfterUpdate)1619 TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
1620   Options options = CurrentOptions();
1621   BlockBasedTableOptions table_options;
1622   table_options.use_delta_encoding = false;
1623   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1624   options.write_buffer_size = 100000;
1625   DestroyAndReopen(options);
1626 
1627   Random rnd(301);
1628 
1629   std::map<std::string, std::string> true_data;
1630   for (int i = 0; i < 1000; i++) {
1631     std::string k = RandomString(&rnd, 10);
1632     std::string v = RandomString(&rnd, 1000);
1633     ASSERT_OK(Put(k, v));
1634     true_data[k] = v;
1635   }
1636 
1637   ReadOptions ro;
1638   ro.pin_data = true;
1639   auto iter = NewIterator(ro);
1640 
1641   // Delete 50% of the keys and update the other 50%
1642   for (auto& kv : true_data) {
1643     if (rnd.OneIn(2)) {
1644       ASSERT_OK(Delete(kv.first));
1645     } else {
1646       std::string new_val = RandomString(&rnd, 1000);
1647       ASSERT_OK(Put(kv.first, new_val));
1648     }
1649   }
1650 
1651   std::vector<std::pair<Slice, std::string>> results;
1652   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1653     std::string prop_value;
1654     ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1655     ASSERT_EQ("1", prop_value);
1656     results.emplace_back(iter->key(), iter->value().ToString());
1657   }
1658 
1659   auto data_iter = true_data.begin();
1660   for (size_t i = 0; i < results.size(); i++, data_iter++) {
1661     auto& kv = results[i];
1662     ASSERT_EQ(kv.first, data_iter->first);
1663     ASSERT_EQ(kv.second, data_iter->second);
1664   }
1665 
1666   delete iter;
1667 }
1668 
1669 class SliceTransformLimitedDomainGeneric : public SliceTransform {
Name() const1670   const char* Name() const override {
1671     return "SliceTransformLimitedDomainGeneric";
1672   }
1673 
Transform(const Slice & src) const1674   Slice Transform(const Slice& src) const override {
1675     return Slice(src.data(), 1);
1676   }
1677 
InDomain(const Slice & src) const1678   bool InDomain(const Slice& src) const override {
1679     // prefix will be x????
1680     return src.size() >= 1;
1681   }
1682 
InRange(const Slice & dst) const1683   bool InRange(const Slice& dst) const override {
1684     // prefix will be x????
1685     return dst.size() == 1;
1686   }
1687 };
1688 
TEST_P(DBIteratorTest,IterSeekForPrevCrossingFiles)1689 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
1690   Options options = CurrentOptions();
1691   options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1692   options.disable_auto_compactions = true;
1693   // Enable prefix bloom for SST files
1694   BlockBasedTableOptions table_options;
1695   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1696   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1697   DestroyAndReopen(options);
1698 
1699   ASSERT_OK(Put("a1", "va1"));
1700   ASSERT_OK(Put("a2", "va2"));
1701   ASSERT_OK(Put("a3", "va3"));
1702   ASSERT_OK(Flush());
1703 
1704   ASSERT_OK(Put("b1", "vb1"));
1705   ASSERT_OK(Put("b2", "vb2"));
1706   ASSERT_OK(Put("b3", "vb3"));
1707   ASSERT_OK(Flush());
1708 
1709   ASSERT_OK(Put("b4", "vb4"));
1710   ASSERT_OK(Put("d1", "vd1"));
1711   ASSERT_OK(Put("d2", "vd2"));
1712   ASSERT_OK(Put("d4", "vd4"));
1713   ASSERT_OK(Flush());
1714 
1715   MoveFilesToLevel(1);
1716   {
1717     ReadOptions ro;
1718     Iterator* iter = NewIterator(ro);
1719 
1720     iter->SeekForPrev("a4");
1721     ASSERT_EQ(iter->key().ToString(), "a3");
1722     ASSERT_EQ(iter->value().ToString(), "va3");
1723 
1724     iter->SeekForPrev("c2");
1725     ASSERT_EQ(iter->key().ToString(), "b3");
1726     iter->SeekForPrev("d3");
1727     ASSERT_EQ(iter->key().ToString(), "d2");
1728     iter->SeekForPrev("b5");
1729     ASSERT_EQ(iter->key().ToString(), "b4");
1730     delete iter;
1731   }
1732 
1733   {
1734     ReadOptions ro;
1735     ro.prefix_same_as_start = true;
1736     Iterator* iter = NewIterator(ro);
1737     iter->SeekForPrev("c2");
1738     ASSERT_TRUE(!iter->Valid());
1739     delete iter;
1740   }
1741 }
1742 
TEST_P(DBIteratorTest,IterSeekForPrevCrossingFilesCustomPrefixExtractor)1743 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
1744   Options options = CurrentOptions();
1745   options.prefix_extractor =
1746       std::make_shared<SliceTransformLimitedDomainGeneric>();
1747   options.disable_auto_compactions = true;
1748   // Enable prefix bloom for SST files
1749   BlockBasedTableOptions table_options;
1750   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1751   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1752   DestroyAndReopen(options);
1753 
1754   ASSERT_OK(Put("a1", "va1"));
1755   ASSERT_OK(Put("a2", "va2"));
1756   ASSERT_OK(Put("a3", "va3"));
1757   ASSERT_OK(Flush());
1758 
1759   ASSERT_OK(Put("b1", "vb1"));
1760   ASSERT_OK(Put("b2", "vb2"));
1761   ASSERT_OK(Put("b3", "vb3"));
1762   ASSERT_OK(Flush());
1763 
1764   ASSERT_OK(Put("b4", "vb4"));
1765   ASSERT_OK(Put("d1", "vd1"));
1766   ASSERT_OK(Put("d2", "vd2"));
1767   ASSERT_OK(Put("d4", "vd4"));
1768   ASSERT_OK(Flush());
1769 
1770   MoveFilesToLevel(1);
1771   {
1772     ReadOptions ro;
1773     Iterator* iter = NewIterator(ro);
1774 
1775     iter->SeekForPrev("a4");
1776     ASSERT_EQ(iter->key().ToString(), "a3");
1777     ASSERT_EQ(iter->value().ToString(), "va3");
1778 
1779     iter->SeekForPrev("c2");
1780     ASSERT_EQ(iter->key().ToString(), "b3");
1781     iter->SeekForPrev("d3");
1782     ASSERT_EQ(iter->key().ToString(), "d2");
1783     iter->SeekForPrev("b5");
1784     ASSERT_EQ(iter->key().ToString(), "b4");
1785     delete iter;
1786   }
1787 
1788   {
1789     ReadOptions ro;
1790     ro.prefix_same_as_start = true;
1791     Iterator* iter = NewIterator(ro);
1792     iter->SeekForPrev("c2");
1793     ASSERT_TRUE(!iter->Valid());
1794     delete iter;
1795   }
1796 }
1797 
TEST_P(DBIteratorTest,IterPrevKeyCrossingBlocks)1798 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
1799   Options options = CurrentOptions();
1800   BlockBasedTableOptions table_options;
1801   table_options.block_size = 1;  // every block will contain one entry
1802   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1803   options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1804   options.disable_auto_compactions = true;
1805   options.max_sequential_skip_in_iterations = 8;
1806 
1807   DestroyAndReopen(options);
1808 
1809   // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1810   for (int file_num = 0; file_num < 10; file_num++) {
1811     ASSERT_OK(Delete("key4"));
1812     ASSERT_OK(Flush());
1813   }
1814 
1815   // First File containing 5 blocks of puts
1816   ASSERT_OK(Put("key1", "val1.0"));
1817   ASSERT_OK(Put("key2", "val2.0"));
1818   ASSERT_OK(Put("key3", "val3.0"));
1819   ASSERT_OK(Put("key4", "val4.0"));
1820   ASSERT_OK(Put("key5", "val5.0"));
1821   ASSERT_OK(Flush());
1822 
1823   // Second file containing 9 blocks of merge operands
1824   ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
1825   ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
1826 
1827   ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
1828   ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
1829   ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
1830 
1831   ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
1832   ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
1833   ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
1834   ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
1835   ASSERT_OK(Flush());
1836 
1837   {
1838     ReadOptions ro;
1839     ro.fill_cache = false;
1840     Iterator* iter = NewIterator(ro);
1841 
1842     iter->SeekToLast();
1843     ASSERT_EQ(iter->key().ToString(), "key5");
1844     ASSERT_EQ(iter->value().ToString(), "val5.0");
1845 
1846     iter->Prev();
1847     ASSERT_EQ(iter->key().ToString(), "key4");
1848     ASSERT_EQ(iter->value().ToString(), "val4.0");
1849 
1850     iter->Prev();
1851     ASSERT_EQ(iter->key().ToString(), "key3");
1852     ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1853 
1854     iter->Prev();
1855     ASSERT_EQ(iter->key().ToString(), "key2");
1856     ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1857 
1858     iter->Prev();
1859     ASSERT_EQ(iter->key().ToString(), "key1");
1860     ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
1861 
1862     delete iter;
1863   }
1864 }
1865 
TEST_P(DBIteratorTest,IterPrevKeyCrossingBlocksRandomized)1866 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
1867   Options options = CurrentOptions();
1868   options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1869   options.disable_auto_compactions = true;
1870   options.level0_slowdown_writes_trigger = (1 << 30);
1871   options.level0_stop_writes_trigger = (1 << 30);
1872   options.max_sequential_skip_in_iterations = 8;
1873   DestroyAndReopen(options);
1874 
1875   const int kNumKeys = 500;
1876   // Small number of merge operands to make sure that DBIter::Prev() dont
1877   // fall back to Seek()
1878   const int kNumMergeOperands = 3;
1879   // Use value size that will make sure that every block contain 1 key
1880   const int kValSize =
1881       static_cast<int>(BlockBasedTableOptions().block_size) * 4;
1882   // Percentage of keys that wont get merge operations
1883   const int kNoMergeOpPercentage = 20;
1884   // Percentage of keys that will be deleted
1885   const int kDeletePercentage = 10;
1886 
1887   // For half of the key range we will write multiple deletes first to
1888   // force DBIter::Prev() to fall back to Seek()
1889   for (int file_num = 0; file_num < 10; file_num++) {
1890     for (int i = 0; i < kNumKeys; i += 2) {
1891       ASSERT_OK(Delete(Key(i)));
1892     }
1893     ASSERT_OK(Flush());
1894   }
1895 
1896   Random rnd(301);
1897   std::map<std::string, std::string> true_data;
1898   std::string gen_key;
1899   std::string gen_val;
1900 
1901   for (int i = 0; i < kNumKeys; i++) {
1902     gen_key = Key(i);
1903     gen_val = RandomString(&rnd, kValSize);
1904 
1905     ASSERT_OK(Put(gen_key, gen_val));
1906     true_data[gen_key] = gen_val;
1907   }
1908   ASSERT_OK(Flush());
1909 
1910   // Separate values and merge operands in different file so that we
1911   // make sure that we dont merge them while flushing but actually
1912   // merge them in the read path
1913   for (int i = 0; i < kNumKeys; i++) {
1914     if (rnd.PercentTrue(kNoMergeOpPercentage)) {
1915       // Dont give merge operations for some keys
1916       continue;
1917     }
1918 
1919     for (int j = 0; j < kNumMergeOperands; j++) {
1920       gen_key = Key(i);
1921       gen_val = RandomString(&rnd, kValSize);
1922 
1923       ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
1924       true_data[gen_key] += "," + gen_val;
1925     }
1926   }
1927   ASSERT_OK(Flush());
1928 
1929   for (int i = 0; i < kNumKeys; i++) {
1930     if (rnd.PercentTrue(kDeletePercentage)) {
1931       gen_key = Key(i);
1932 
1933       ASSERT_OK(Delete(gen_key));
1934       true_data.erase(gen_key);
1935     }
1936   }
1937   ASSERT_OK(Flush());
1938 
1939   {
1940     ReadOptions ro;
1941     ro.fill_cache = false;
1942     Iterator* iter = NewIterator(ro);
1943     auto data_iter = true_data.rbegin();
1944 
1945     for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1946       ASSERT_EQ(iter->key().ToString(), data_iter->first);
1947       ASSERT_EQ(iter->value().ToString(), data_iter->second);
1948       data_iter++;
1949     }
1950     ASSERT_EQ(data_iter, true_data.rend());
1951 
1952     delete iter;
1953   }
1954 
1955   {
1956     ReadOptions ro;
1957     ro.fill_cache = false;
1958     Iterator* iter = NewIterator(ro);
1959     auto data_iter = true_data.rbegin();
1960 
1961     int entries_right = 0;
1962     std::string seek_key;
1963     for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1964       // Verify key/value of current position
1965       ASSERT_EQ(iter->key().ToString(), data_iter->first);
1966       ASSERT_EQ(iter->value().ToString(), data_iter->second);
1967 
1968       bool restore_position_with_seek = rnd.Uniform(2);
1969       if (restore_position_with_seek) {
1970         seek_key = iter->key().ToString();
1971       }
1972 
1973       // Do some Next() operations the restore the iterator to orignal position
1974       int next_count =
1975           entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
1976       for (int i = 0; i < next_count; i++) {
1977         iter->Next();
1978         data_iter--;
1979 
1980         ASSERT_EQ(iter->key().ToString(), data_iter->first);
1981         ASSERT_EQ(iter->value().ToString(), data_iter->second);
1982       }
1983 
1984       if (restore_position_with_seek) {
1985         // Restore orignal position using Seek()
1986         iter->Seek(seek_key);
1987         for (int i = 0; i < next_count; i++) {
1988           data_iter++;
1989         }
1990 
1991         ASSERT_EQ(iter->key().ToString(), data_iter->first);
1992         ASSERT_EQ(iter->value().ToString(), data_iter->second);
1993       } else {
1994         // Restore original position using Prev()
1995         for (int i = 0; i < next_count; i++) {
1996           iter->Prev();
1997           data_iter++;
1998 
1999           ASSERT_EQ(iter->key().ToString(), data_iter->first);
2000           ASSERT_EQ(iter->value().ToString(), data_iter->second);
2001         }
2002       }
2003 
2004       entries_right++;
2005       data_iter++;
2006     }
2007     ASSERT_EQ(data_iter, true_data.rend());
2008 
2009     delete iter;
2010   }
2011 }
2012 
TEST_P(DBIteratorTest,IteratorWithLocalStatistics)2013 TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
2014   Options options = CurrentOptions();
2015   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2016   DestroyAndReopen(options);
2017 
2018   Random rnd(301);
2019   for (int i = 0; i < 1000; i++) {
2020     // Key 10 bytes / Value 10 bytes
2021     ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
2022   }
2023 
2024   std::atomic<uint64_t> total_next(0);
2025   std::atomic<uint64_t> total_next_found(0);
2026   std::atomic<uint64_t> total_prev(0);
2027   std::atomic<uint64_t> total_prev_found(0);
2028   std::atomic<uint64_t> total_bytes(0);
2029 
2030   std::vector<port::Thread> threads;
2031   std::function<void()> reader_func_next = [&]() {
2032     SetPerfLevel(kEnableCount);
2033     get_perf_context()->Reset();
2034     Iterator* iter = NewIterator(ReadOptions());
2035 
2036     iter->SeekToFirst();
2037     // Seek will bump ITER_BYTES_READ
2038     uint64_t bytes = 0;
2039     bytes += iter->key().size();
2040     bytes += iter->value().size();
2041     while (true) {
2042       iter->Next();
2043       total_next++;
2044 
2045       if (!iter->Valid()) {
2046         break;
2047       }
2048       total_next_found++;
2049       bytes += iter->key().size();
2050       bytes += iter->value().size();
2051     }
2052 
2053     delete iter;
2054     ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2055     SetPerfLevel(kDisable);
2056     total_bytes += bytes;
2057   };
2058 
2059   std::function<void()> reader_func_prev = [&]() {
2060     SetPerfLevel(kEnableCount);
2061     Iterator* iter = NewIterator(ReadOptions());
2062 
2063     iter->SeekToLast();
2064     // Seek will bump ITER_BYTES_READ
2065     uint64_t bytes = 0;
2066     bytes += iter->key().size();
2067     bytes += iter->value().size();
2068     while (true) {
2069       iter->Prev();
2070       total_prev++;
2071 
2072       if (!iter->Valid()) {
2073         break;
2074       }
2075       total_prev_found++;
2076       bytes += iter->key().size();
2077       bytes += iter->value().size();
2078     }
2079 
2080     delete iter;
2081     ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2082     SetPerfLevel(kDisable);
2083     total_bytes += bytes;
2084   };
2085 
2086   for (int i = 0; i < 10; i++) {
2087     threads.emplace_back(reader_func_next);
2088   }
2089   for (int i = 0; i < 15; i++) {
2090     threads.emplace_back(reader_func_prev);
2091   }
2092 
2093   for (auto& t : threads) {
2094     t.join();
2095   }
2096 
2097   ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
2098   ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
2099             (uint64_t)total_next_found);
2100   ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
2101   ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
2102             (uint64_t)total_prev_found);
2103   ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), (uint64_t)total_bytes);
2104 
2105 }
2106 
TEST_P(DBIteratorTest,ReadAhead)2107 TEST_P(DBIteratorTest, ReadAhead) {
2108   Options options;
2109   env_->count_random_reads_ = true;
2110   options.env = env_;
2111   options.disable_auto_compactions = true;
2112   options.write_buffer_size = 4 << 20;
2113   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2114   BlockBasedTableOptions table_options;
2115   table_options.block_size = 1024;
2116   table_options.no_block_cache = true;
2117   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2118   Reopen(options);
2119 
2120   std::string value(1024, 'a');
2121   for (int i = 0; i < 100; i++) {
2122     Put(Key(i), value);
2123   }
2124   ASSERT_OK(Flush());
2125   MoveFilesToLevel(2);
2126 
2127   for (int i = 0; i < 100; i++) {
2128     Put(Key(i), value);
2129   }
2130   ASSERT_OK(Flush());
2131   MoveFilesToLevel(1);
2132 
2133   for (int i = 0; i < 100; i++) {
2134     Put(Key(i), value);
2135   }
2136   ASSERT_OK(Flush());
2137 #ifndef ROCKSDB_LITE
2138   ASSERT_EQ("1,1,1", FilesPerLevel());
2139 #endif  // !ROCKSDB_LITE
2140 
2141   env_->random_read_bytes_counter_ = 0;
2142   options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2143   ReadOptions read_options;
2144   auto* iter = NewIterator(read_options);
2145   iter->SeekToFirst();
2146   int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
2147   size_t bytes_read = env_->random_read_bytes_counter_;
2148   delete iter;
2149 
2150   int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
2151   env_->random_read_bytes_counter_ = 0;
2152   options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2153   read_options.readahead_size = 1024 * 10;
2154   iter = NewIterator(read_options);
2155   iter->SeekToFirst();
2156   int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
2157   size_t bytes_read_readahead = env_->random_read_bytes_counter_;
2158   delete iter;
2159   int64_t num_file_closes_readahead =
2160       TestGetTickerCount(options, NO_FILE_CLOSES);
2161   ASSERT_EQ(num_file_opens, num_file_opens_readahead);
2162   ASSERT_EQ(num_file_closes, num_file_closes_readahead);
2163   ASSERT_GT(bytes_read_readahead, bytes_read);
2164   ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
2165 
2166   // Verify correctness.
2167   iter = NewIterator(read_options);
2168   int count = 0;
2169   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2170     ASSERT_EQ(value, iter->value());
2171     count++;
2172   }
2173   ASSERT_EQ(100, count);
2174   for (int i = 0; i < 100; i++) {
2175     iter->Seek(Key(i));
2176     ASSERT_EQ(value, iter->value());
2177   }
2178   delete iter;
2179 }
2180 
2181 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2182 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2183 // going through all the overwrites linearly.
TEST_P(DBIteratorTest,DBIteratorSkipRecentDuplicatesTest)2184 TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
2185   Options options = CurrentOptions();
2186   options.env = env_;
2187   options.create_if_missing = true;
2188   options.max_sequential_skip_in_iterations = 3;
2189   options.prefix_extractor = nullptr;
2190   options.write_buffer_size = 1 << 27;  // big enough to avoid flush
2191   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2192   DestroyAndReopen(options);
2193 
2194   // Insert.
2195   ASSERT_OK(Put("b", "0"));
2196 
2197   // Create iterator.
2198   ReadOptions ro;
2199   std::unique_ptr<Iterator> iter(NewIterator(ro));
2200 
2201   // Insert a lot.
2202   for (int i = 0; i < 100; ++i) {
2203     ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
2204   }
2205 
2206 #ifndef ROCKSDB_LITE
2207   // Check that memtable wasn't flushed.
2208   std::string val;
2209   ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
2210   EXPECT_EQ("0", val);
2211 #endif
2212 
2213   // Seek iterator to a smaller key.
2214   get_perf_context()->Reset();
2215   iter->Seek("a");
2216   ASSERT_TRUE(iter->Valid());
2217   EXPECT_EQ("b", iter->key().ToString());
2218   EXPECT_EQ("0", iter->value().ToString());
2219 
2220   // Check that the seek didn't do too much work.
2221   // Checks are not tight, just make sure that everything is well below 100.
2222   EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
2223   EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
2224   EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
2225   EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
2226   EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
2227 
2228   // Check that iterator did something like what we expect.
2229   EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
2230   EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
2231   EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
2232   EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
2233   EXPECT_EQ(1, options.statistics->getTickerCount(
2234                  NUMBER_OF_RESEEKS_IN_ITERATION));
2235 }
2236 
TEST_P(DBIteratorTest,Refresh)2237 TEST_P(DBIteratorTest, Refresh) {
2238   ASSERT_OK(Put("x", "y"));
2239 
2240   std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2241   iter->Seek(Slice("a"));
2242   ASSERT_TRUE(iter->Valid());
2243   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2244   iter->Next();
2245   ASSERT_FALSE(iter->Valid());
2246 
2247   ASSERT_OK(Put("c", "d"));
2248 
2249   iter->Seek(Slice("a"));
2250   ASSERT_TRUE(iter->Valid());
2251   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2252   iter->Next();
2253   ASSERT_FALSE(iter->Valid());
2254 
2255   iter->Refresh();
2256 
2257   iter->Seek(Slice("a"));
2258   ASSERT_TRUE(iter->Valid());
2259   ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2260   iter->Next();
2261   ASSERT_TRUE(iter->Valid());
2262   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2263   iter->Next();
2264   ASSERT_FALSE(iter->Valid());
2265 
2266   dbfull()->Flush(FlushOptions());
2267 
2268   ASSERT_OK(Put("m", "n"));
2269 
2270   iter->Seek(Slice("a"));
2271   ASSERT_TRUE(iter->Valid());
2272   ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2273   iter->Next();
2274   ASSERT_TRUE(iter->Valid());
2275   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2276   iter->Next();
2277   ASSERT_FALSE(iter->Valid());
2278 
2279   iter->Refresh();
2280 
2281   iter->Seek(Slice("a"));
2282   ASSERT_TRUE(iter->Valid());
2283   ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2284   iter->Next();
2285   ASSERT_TRUE(iter->Valid());
2286   ASSERT_EQ(iter->key().compare(Slice("m")), 0);
2287   iter->Next();
2288   ASSERT_TRUE(iter->Valid());
2289   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2290   iter->Next();
2291   ASSERT_FALSE(iter->Valid());
2292 
2293   iter.reset();
2294 }
2295 
TEST_P(DBIteratorTest,RefreshWithSnapshot)2296 TEST_P(DBIteratorTest, RefreshWithSnapshot) {
2297   ASSERT_OK(Put("x", "y"));
2298   const Snapshot* snapshot = db_->GetSnapshot();
2299   ReadOptions options;
2300   options.snapshot = snapshot;
2301   Iterator* iter = NewIterator(options);
2302 
2303   iter->Seek(Slice("a"));
2304   ASSERT_TRUE(iter->Valid());
2305   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2306   iter->Next();
2307   ASSERT_FALSE(iter->Valid());
2308 
2309   ASSERT_OK(Put("c", "d"));
2310 
2311   iter->Seek(Slice("a"));
2312   ASSERT_TRUE(iter->Valid());
2313   ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2314   iter->Next();
2315   ASSERT_FALSE(iter->Valid());
2316 
2317   Status s;
2318   s = iter->Refresh();
2319   ASSERT_TRUE(s.IsNotSupported());
2320   db_->ReleaseSnapshot(snapshot);
2321   delete iter;
2322 }
2323 
TEST_P(DBIteratorTest,CreationFailure)2324 TEST_P(DBIteratorTest, CreationFailure) {
2325   SyncPoint::GetInstance()->SetCallBack(
2326       "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
2327         *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
2328       });
2329   SyncPoint::GetInstance()->EnableProcessing();
2330 
2331   Iterator* iter = NewIterator(ReadOptions());
2332   ASSERT_FALSE(iter->Valid());
2333   ASSERT_TRUE(iter->status().IsCorruption());
2334   delete iter;
2335 }
2336 
TEST_P(DBIteratorTest,UpperBoundWithChangeDirection)2337 TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
2338   Options options = CurrentOptions();
2339   options.max_sequential_skip_in_iterations = 3;
2340   DestroyAndReopen(options);
2341 
2342   // write a bunch of kvs to the database.
2343   ASSERT_OK(Put("a", "1"));
2344   ASSERT_OK(Put("y", "1"));
2345   ASSERT_OK(Put("y1", "1"));
2346   ASSERT_OK(Put("y2", "1"));
2347   ASSERT_OK(Put("y3", "1"));
2348   ASSERT_OK(Put("z", "1"));
2349   ASSERT_OK(Flush());
2350   ASSERT_OK(Put("a", "1"));
2351   ASSERT_OK(Put("z", "1"));
2352   ASSERT_OK(Put("bar", "1"));
2353   ASSERT_OK(Put("foo", "1"));
2354 
2355   std::string upper_bound = "x";
2356   Slice ub_slice(upper_bound);
2357   ReadOptions ro;
2358   ro.iterate_upper_bound = &ub_slice;
2359   ro.max_skippable_internal_keys = 1000;
2360 
2361   Iterator* iter = NewIterator(ro);
2362   iter->Seek("foo");
2363   ASSERT_TRUE(iter->Valid());
2364   ASSERT_EQ("foo", iter->key().ToString());
2365 
2366   iter->Prev();
2367   ASSERT_TRUE(iter->Valid());
2368   ASSERT_OK(iter->status());
2369   ASSERT_EQ("bar", iter->key().ToString());
2370 
2371   delete iter;
2372 }
2373 
TEST_P(DBIteratorTest,TableFilter)2374 TEST_P(DBIteratorTest, TableFilter) {
2375   ASSERT_OK(Put("a", "1"));
2376   dbfull()->Flush(FlushOptions());
2377   ASSERT_OK(Put("b", "2"));
2378   ASSERT_OK(Put("c", "3"));
2379   dbfull()->Flush(FlushOptions());
2380   ASSERT_OK(Put("d", "4"));
2381   ASSERT_OK(Put("e", "5"));
2382   ASSERT_OK(Put("f", "6"));
2383   dbfull()->Flush(FlushOptions());
2384 
2385   // Ensure the table_filter callback is called once for each table.
2386   {
2387     std::set<uint64_t> unseen{1, 2, 3};
2388     ReadOptions opts;
2389     opts.table_filter = [&](const TableProperties& props) {
2390       auto it = unseen.find(props.num_entries);
2391       if (it == unseen.end()) {
2392         ADD_FAILURE() << "saw table properties with an unexpected "
2393                       << props.num_entries << " entries";
2394       } else {
2395         unseen.erase(it);
2396       }
2397       return true;
2398     };
2399     auto iter = NewIterator(opts);
2400     iter->SeekToFirst();
2401     ASSERT_EQ(IterStatus(iter), "a->1");
2402     iter->Next();
2403     ASSERT_EQ(IterStatus(iter), "b->2");
2404     iter->Next();
2405     ASSERT_EQ(IterStatus(iter), "c->3");
2406     iter->Next();
2407     ASSERT_EQ(IterStatus(iter), "d->4");
2408     iter->Next();
2409     ASSERT_EQ(IterStatus(iter), "e->5");
2410     iter->Next();
2411     ASSERT_EQ(IterStatus(iter), "f->6");
2412     iter->Next();
2413     ASSERT_FALSE(iter->Valid());
2414     ASSERT_TRUE(unseen.empty());
2415     delete iter;
2416   }
2417 
2418   // Ensure returning false in the table_filter hides the keys from that table
2419   // during iteration.
2420   {
2421     ReadOptions opts;
2422     opts.table_filter = [](const TableProperties& props) {
2423       return props.num_entries != 2;
2424     };
2425     auto iter = NewIterator(opts);
2426     iter->SeekToFirst();
2427     ASSERT_EQ(IterStatus(iter), "a->1");
2428     iter->Next();
2429     ASSERT_EQ(IterStatus(iter), "d->4");
2430     iter->Next();
2431     ASSERT_EQ(IterStatus(iter), "e->5");
2432     iter->Next();
2433     ASSERT_EQ(IterStatus(iter), "f->6");
2434     iter->Next();
2435     ASSERT_FALSE(iter->Valid());
2436     delete iter;
2437   }
2438 }
2439 
TEST_P(DBIteratorTest,UpperBoundWithPrevReseek)2440 TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
2441   Options options = CurrentOptions();
2442   options.max_sequential_skip_in_iterations = 3;
2443   DestroyAndReopen(options);
2444 
2445   // write a bunch of kvs to the database.
2446   ASSERT_OK(Put("a", "1"));
2447   ASSERT_OK(Put("y", "1"));
2448   ASSERT_OK(Put("z", "1"));
2449   ASSERT_OK(Flush());
2450   ASSERT_OK(Put("a", "1"));
2451   ASSERT_OK(Put("z", "1"));
2452   ASSERT_OK(Put("bar", "1"));
2453   ASSERT_OK(Put("foo", "1"));
2454   ASSERT_OK(Put("foo", "2"));
2455 
2456   ASSERT_OK(Put("foo", "3"));
2457   ASSERT_OK(Put("foo", "4"));
2458   ASSERT_OK(Put("foo", "5"));
2459   const Snapshot* snapshot = db_->GetSnapshot();
2460   ASSERT_OK(Put("foo", "6"));
2461 
2462   std::string upper_bound = "x";
2463   Slice ub_slice(upper_bound);
2464   ReadOptions ro;
2465   ro.snapshot = snapshot;
2466   ro.iterate_upper_bound = &ub_slice;
2467 
2468   Iterator* iter = NewIterator(ro);
2469   iter->SeekForPrev("goo");
2470   ASSERT_TRUE(iter->Valid());
2471   ASSERT_EQ("foo", iter->key().ToString());
2472   iter->Prev();
2473 
2474   ASSERT_TRUE(iter->Valid());
2475   ASSERT_EQ("bar", iter->key().ToString());
2476 
2477   delete iter;
2478   db_->ReleaseSnapshot(snapshot);
2479 }
2480 
TEST_P(DBIteratorTest,SkipStatistics)2481 TEST_P(DBIteratorTest, SkipStatistics) {
2482   Options options = CurrentOptions();
2483   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2484   DestroyAndReopen(options);
2485 
2486   int skip_count = 0;
2487 
2488   // write a bunch of kvs to the database.
2489   ASSERT_OK(Put("a", "1"));
2490   ASSERT_OK(Put("b", "1"));
2491   ASSERT_OK(Put("c", "1"));
2492   ASSERT_OK(Flush());
2493   ASSERT_OK(Put("d", "1"));
2494   ASSERT_OK(Put("e", "1"));
2495   ASSERT_OK(Put("f", "1"));
2496   ASSERT_OK(Put("a", "2"));
2497   ASSERT_OK(Put("b", "2"));
2498   ASSERT_OK(Flush());
2499   ASSERT_OK(Delete("d"));
2500   ASSERT_OK(Delete("e"));
2501   ASSERT_OK(Delete("f"));
2502 
2503   Iterator* iter = NewIterator(ReadOptions());
2504   int count = 0;
2505   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2506     ASSERT_OK(iter->status());
2507     count++;
2508   }
2509   ASSERT_EQ(count, 3);
2510   delete iter;
2511   skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
2512   ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2513 
2514   iter = NewIterator(ReadOptions());
2515   count = 0;
2516   for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2517     ASSERT_OK(iter->status());
2518     count++;
2519   }
2520   ASSERT_EQ(count, 3);
2521   delete iter;
2522   skip_count += 8; // Same as above, but in reverse order
2523   ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2524 
2525   ASSERT_OK(Put("aa", "1"));
2526   ASSERT_OK(Put("ab", "1"));
2527   ASSERT_OK(Put("ac", "1"));
2528   ASSERT_OK(Put("ad", "1"));
2529   ASSERT_OK(Flush());
2530   ASSERT_OK(Delete("ab"));
2531   ASSERT_OK(Delete("ac"));
2532   ASSERT_OK(Delete("ad"));
2533 
2534   ReadOptions ro;
2535   Slice prefix("b");
2536   ro.iterate_upper_bound = &prefix;
2537 
2538   iter = NewIterator(ro);
2539   count = 0;
2540   for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
2541     ASSERT_OK(iter->status());
2542     count++;
2543   }
2544   ASSERT_EQ(count, 1);
2545   delete iter;
2546   skip_count += 6; // 3 deletes + 3 original keys
2547   ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2548 
2549   iter = NewIterator(ro);
2550   count = 0;
2551   for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2552     ASSERT_OK(iter->status());
2553     count++;
2554   }
2555   ASSERT_EQ(count, 2);
2556   delete iter;
2557   // 3 deletes + 3 original keys + lower sequence of "a"
2558   skip_count += 7;
2559   ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2560 }
2561 
TEST_P(DBIteratorTest,SeekAfterHittingManyInternalKeys)2562 TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
2563   Options options = CurrentOptions();
2564   DestroyAndReopen(options);
2565   ReadOptions ropts;
2566   ropts.max_skippable_internal_keys = 2;
2567 
2568   Put("1", "val_1");
2569   // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2570   Delete("2");
2571   Delete("3");
2572   Delete("4");
2573   Delete("5");
2574   Put("6", "val_6");
2575 
2576   std::unique_ptr<Iterator> iter(NewIterator(ropts));
2577   iter->SeekToFirst();
2578 
2579   ASSERT_TRUE(iter->Valid());
2580   ASSERT_EQ(iter->key().ToString(), "1");
2581   ASSERT_EQ(iter->value().ToString(), "val_1");
2582 
2583   // This should fail as incomplete due to too many non-visible internal keys on
2584   // the way to the next valid user key.
2585   iter->Next();
2586   ASSERT_TRUE(!iter->Valid());
2587   ASSERT_TRUE(iter->status().IsIncomplete());
2588 
2589   // Get the internal key at which Next() failed.
2590   std::string prop_value;
2591   ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
2592   ASSERT_EQ("4", prop_value);
2593 
2594   // Create a new iterator to seek to the internal key.
2595   std::unique_ptr<Iterator> iter2(NewIterator(ropts));
2596   iter2->Seek(prop_value);
2597   ASSERT_TRUE(iter2->Valid());
2598   ASSERT_OK(iter2->status());
2599 
2600   ASSERT_EQ(iter2->key().ToString(), "6");
2601   ASSERT_EQ(iter2->value().ToString(), "val_6");
2602 }
2603 
2604 // Reproduces a former bug where iterator would skip some records when DBIter
2605 // re-seeks subiterator with Incomplete status.
TEST_P(DBIteratorTest,NonBlockingIterationBugRepro)2606 TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
2607   Options options = CurrentOptions();
2608   BlockBasedTableOptions table_options;
2609   // Make sure the sst file has more than one block.
2610   table_options.flush_block_policy_factory =
2611       std::make_shared<FlushBlockEveryKeyPolicyFactory>();
2612   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2613   DestroyAndReopen(options);
2614 
2615   // Two records in sst file, each in its own block.
2616   Put("b", "");
2617   Put("d", "");
2618   Flush();
2619 
2620   // Create a nonblocking iterator before writing to memtable.
2621   ReadOptions ropt;
2622   ropt.read_tier = kBlockCacheTier;
2623   std::unique_ptr<Iterator> iter(NewIterator(ropt));
2624 
2625   // Overwrite a key in memtable many times to hit
2626   // max_sequential_skip_in_iterations (which is 8 by default).
2627   for (int i = 0; i < 20; ++i) {
2628     Put("c", "");
2629   }
2630 
2631   // Load the second block in sst file into the block cache.
2632   {
2633     std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
2634     iter2->Seek("d");
2635   }
2636 
2637   // Finally seek the nonblocking iterator.
2638   iter->Seek("a");
2639   // With the bug, the status used to be OK, and the iterator used to point to
2640   // "d".
2641   EXPECT_TRUE(iter->status().IsIncomplete());
2642 }
2643 
TEST_P(DBIteratorTest,SeekBackwardAfterOutOfUpperBound)2644 TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
2645   Put("a", "");
2646   Put("b", "");
2647   Flush();
2648 
2649   ReadOptions ropt;
2650   Slice ub = "b";
2651   ropt.iterate_upper_bound = &ub;
2652 
2653   std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
2654   it->SeekForPrev("a");
2655   ASSERT_TRUE(it->Valid());
2656   ASSERT_OK(it->status());
2657   ASSERT_EQ("a", it->key().ToString());
2658   it->Next();
2659   ASSERT_FALSE(it->Valid());
2660   ASSERT_OK(it->status());
2661   it->SeekForPrev("a");
2662   ASSERT_OK(it->status());
2663 
2664   ASSERT_TRUE(it->Valid());
2665   ASSERT_EQ("a", it->key().ToString());
2666 }
2667 
TEST_P(DBIteratorTest,AvoidReseekLevelIterator)2668 TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
2669   Options options = CurrentOptions();
2670   options.compression = CompressionType::kNoCompression;
2671   BlockBasedTableOptions table_options;
2672   table_options.block_size = 800;
2673   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2674   Reopen(options);
2675 
2676   Random rnd(301);
2677   std::string random_str = RandomString(&rnd, 180);
2678 
2679   ASSERT_OK(Put("1", random_str));
2680   ASSERT_OK(Put("2", random_str));
2681   ASSERT_OK(Put("3", random_str));
2682   ASSERT_OK(Put("4", random_str));
2683   // A new block
2684   ASSERT_OK(Put("5", random_str));
2685   ASSERT_OK(Put("6", random_str));
2686   ASSERT_OK(Put("7", random_str));
2687   ASSERT_OK(Flush());
2688   ASSERT_OK(Put("8", random_str));
2689   ASSERT_OK(Put("9", random_str));
2690   ASSERT_OK(Flush());
2691   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2692 
2693   int num_find_file_in_level = 0;
2694   int num_idx_blk_seek = 0;
2695   SyncPoint::GetInstance()->SetCallBack(
2696       "LevelIterator::Seek:BeforeFindFile",
2697       [&](void* /*arg*/) { num_find_file_in_level++; });
2698   SyncPoint::GetInstance()->SetCallBack(
2699       "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
2700   SyncPoint::GetInstance()->EnableProcessing();
2701 
2702   {
2703     std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2704     iter->Seek("1");
2705     ASSERT_TRUE(iter->Valid());
2706     ASSERT_EQ(1, num_find_file_in_level);
2707     ASSERT_EQ(1, num_idx_blk_seek);
2708 
2709     iter->Seek("2");
2710     ASSERT_TRUE(iter->Valid());
2711     ASSERT_EQ(1, num_find_file_in_level);
2712     ASSERT_EQ(1, num_idx_blk_seek);
2713 
2714     iter->Seek("3");
2715     ASSERT_TRUE(iter->Valid());
2716     ASSERT_EQ(1, num_find_file_in_level);
2717     ASSERT_EQ(1, num_idx_blk_seek);
2718 
2719     iter->Next();
2720     ASSERT_TRUE(iter->Valid());
2721     ASSERT_EQ(1, num_find_file_in_level);
2722     ASSERT_EQ(1, num_idx_blk_seek);
2723 
2724     iter->Seek("5");
2725     ASSERT_TRUE(iter->Valid());
2726     ASSERT_EQ(1, num_find_file_in_level);
2727     ASSERT_EQ(2, num_idx_blk_seek);
2728 
2729     iter->Seek("6");
2730     ASSERT_TRUE(iter->Valid());
2731     ASSERT_EQ(1, num_find_file_in_level);
2732     ASSERT_EQ(2, num_idx_blk_seek);
2733 
2734     iter->Seek("7");
2735     ASSERT_TRUE(iter->Valid());
2736     ASSERT_EQ(1, num_find_file_in_level);
2737     ASSERT_EQ(3, num_idx_blk_seek);
2738 
2739     iter->Seek("8");
2740     ASSERT_TRUE(iter->Valid());
2741     ASSERT_EQ(2, num_find_file_in_level);
2742     // Still re-seek because "8" is the boundary key, which has
2743     // the same user key as the seek key.
2744     ASSERT_EQ(4, num_idx_blk_seek);
2745 
2746     iter->Seek("5");
2747     ASSERT_TRUE(iter->Valid());
2748     ASSERT_EQ(3, num_find_file_in_level);
2749     ASSERT_EQ(5, num_idx_blk_seek);
2750 
2751     iter->Next();
2752     ASSERT_TRUE(iter->Valid());
2753     ASSERT_EQ(3, num_find_file_in_level);
2754     ASSERT_EQ(5, num_idx_blk_seek);
2755 
2756     // Seek backward never triggers the index block seek to be skipped
2757     iter->Seek("5");
2758     ASSERT_TRUE(iter->Valid());
2759     ASSERT_EQ(3, num_find_file_in_level);
2760     ASSERT_EQ(6, num_idx_blk_seek);
2761   }
2762 
2763   SyncPoint::GetInstance()->DisableProcessing();
2764 }
2765 
2766 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2767 // usage doesn't break iterator.
TEST_P(DBIteratorTest,IterateBoundChangedBeforeSeek)2768 TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) {
2769   Options options = CurrentOptions();
2770   options.compression = CompressionType::kNoCompression;
2771   BlockBasedTableOptions table_options;
2772   table_options.block_size = 100;
2773   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2774   std::string value(50, 'v');
2775   Reopen(options);
2776   ASSERT_OK(Put("aaa", value));
2777   ASSERT_OK(Flush());
2778   ASSERT_OK(Put("bbb", "v"));
2779   ASSERT_OK(Put("ccc", "v"));
2780   ASSERT_OK(Put("ddd", "v"));
2781   ASSERT_OK(Flush());
2782   ASSERT_OK(Put("eee", "v"));
2783   ASSERT_OK(Flush());
2784   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2785 
2786   std::string ub1 = "e";
2787   std::string ub2 = "c";
2788   Slice ub(ub1);
2789   ReadOptions read_opts1;
2790   read_opts1.iterate_upper_bound = &ub;
2791   Iterator* iter = NewIterator(read_opts1);
2792   // Seek and iterate accross block boundary.
2793   iter->Seek("b");
2794   ASSERT_TRUE(iter->Valid());
2795   ASSERT_OK(iter->status());
2796   ASSERT_EQ("bbb", iter->key());
2797   ub = Slice(ub2);
2798   iter->Seek("b");
2799   ASSERT_TRUE(iter->Valid());
2800   ASSERT_OK(iter->status());
2801   ASSERT_EQ("bbb", iter->key());
2802   iter->Next();
2803   ASSERT_FALSE(iter->Valid());
2804   ASSERT_OK(iter->status());
2805   delete iter;
2806 
2807   std::string lb1 = "a";
2808   std::string lb2 = "c";
2809   Slice lb(lb1);
2810   ReadOptions read_opts2;
2811   read_opts2.iterate_lower_bound = &lb;
2812   iter = NewIterator(read_opts2);
2813   iter->SeekForPrev("d");
2814   ASSERT_TRUE(iter->Valid());
2815   ASSERT_OK(iter->status());
2816   ASSERT_EQ("ccc", iter->key());
2817   lb = Slice(lb2);
2818   iter->SeekForPrev("d");
2819   ASSERT_TRUE(iter->Valid());
2820   ASSERT_OK(iter->status());
2821   ASSERT_EQ("ccc", iter->key());
2822   iter->Prev();
2823   ASSERT_FALSE(iter->Valid());
2824   ASSERT_OK(iter->status());
2825   delete iter;
2826 }
2827 
TEST_P(DBIteratorTest,IterateWithLowerBoundAcrossFileBoundary)2828 TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) {
2829   ASSERT_OK(Put("aaa", "v"));
2830   ASSERT_OK(Put("bbb", "v"));
2831   ASSERT_OK(Flush());
2832   ASSERT_OK(Put("ccc", "v"));
2833   ASSERT_OK(Put("ddd", "v"));
2834   ASSERT_OK(Flush());
2835   // Move both files to bottom level.
2836   ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2837   Slice lower_bound("b");
2838   ReadOptions read_opts;
2839   read_opts.iterate_lower_bound = &lower_bound;
2840   std::unique_ptr<Iterator> iter(NewIterator(read_opts));
2841   iter->SeekForPrev("d");
2842   ASSERT_TRUE(iter->Valid());
2843   ASSERT_OK(iter->status());
2844   ASSERT_EQ("ccc", iter->key());
2845   iter->Prev();
2846   ASSERT_TRUE(iter->Valid());
2847   ASSERT_OK(iter->status());
2848   ASSERT_EQ("bbb", iter->key());
2849   iter->Prev();
2850   ASSERT_FALSE(iter->Valid());
2851   ASSERT_OK(iter->status());
2852 }
2853 
2854 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
2855                         testing::Values(true, false));
2856 
2857 // Tests how DBIter work with ReadCallback
2858 class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
2859 
TEST_F(DBIteratorWithReadCallbackTest,ReadCallback)2860 TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
2861   class TestReadCallback : public ReadCallback {
2862    public:
2863     explicit TestReadCallback(SequenceNumber _max_visible_seq)
2864         : ReadCallback(_max_visible_seq) {}
2865 
2866     bool IsVisibleFullCheck(SequenceNumber seq) override {
2867       return seq <= max_visible_seq_;
2868     }
2869   };
2870 
2871   ASSERT_OK(Put("foo", "v1"));
2872   ASSERT_OK(Put("foo", "v2"));
2873   ASSERT_OK(Put("foo", "v3"));
2874   ASSERT_OK(Put("a", "va"));
2875   ASSERT_OK(Put("z", "vz"));
2876   SequenceNumber seq1 = db_->GetLatestSequenceNumber();
2877   TestReadCallback callback1(seq1);
2878   ASSERT_OK(Put("foo", "v4"));
2879   ASSERT_OK(Put("foo", "v5"));
2880   ASSERT_OK(Put("bar", "v7"));
2881 
2882   SequenceNumber seq2 = db_->GetLatestSequenceNumber();
2883   auto* cfd =
2884       reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
2885           ->cfd();
2886   // The iterator are suppose to see data before seq1.
2887   Iterator* iter =
2888       dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
2889 
2890   // Seek
2891   // The latest value of "foo" before seq1 is "v3"
2892   iter->Seek("foo");
2893   ASSERT_TRUE(iter->Valid());
2894   ASSERT_OK(iter->status());
2895   ASSERT_EQ("foo", iter->key());
2896   ASSERT_EQ("v3", iter->value());
2897   // "bar" is not visible to the iterator. It will move on to the next key
2898   // "foo".
2899   iter->Seek("bar");
2900   ASSERT_TRUE(iter->Valid());
2901   ASSERT_OK(iter->status());
2902   ASSERT_EQ("foo", iter->key());
2903   ASSERT_EQ("v3", iter->value());
2904 
2905   // Next
2906   // Seek to "a"
2907   iter->Seek("a");
2908   ASSERT_TRUE(iter->Valid());
2909   ASSERT_OK(iter->status());
2910   ASSERT_EQ("va", iter->value());
2911   // "bar" is not visible to the iterator. It will move on to the next key
2912   // "foo".
2913   iter->Next();
2914   ASSERT_TRUE(iter->Valid());
2915   ASSERT_OK(iter->status());
2916   ASSERT_EQ("foo", iter->key());
2917   ASSERT_EQ("v3", iter->value());
2918 
2919   // Prev
2920   // Seek to "z"
2921   iter->Seek("z");
2922   ASSERT_TRUE(iter->Valid());
2923   ASSERT_OK(iter->status());
2924   ASSERT_EQ("vz", iter->value());
2925   // The previous key is "foo", which is visible to the iterator.
2926   iter->Prev();
2927   ASSERT_TRUE(iter->Valid());
2928   ASSERT_OK(iter->status());
2929   ASSERT_EQ("foo", iter->key());
2930   ASSERT_EQ("v3", iter->value());
2931   // "bar" is not visible to the iterator. It will move on to the next key "a".
2932   iter->Prev();  // skipping "bar"
2933   ASSERT_TRUE(iter->Valid());
2934   ASSERT_OK(iter->status());
2935   ASSERT_EQ("a", iter->key());
2936   ASSERT_EQ("va", iter->value());
2937 
2938   // SeekForPrev
2939   // The previous key is "foo", which is visible to the iterator.
2940   iter->SeekForPrev("y");
2941   ASSERT_TRUE(iter->Valid());
2942   ASSERT_OK(iter->status());
2943   ASSERT_EQ("foo", iter->key());
2944   ASSERT_EQ("v3", iter->value());
2945   // "bar" is not visible to the iterator. It will move on to the next key "a".
2946   iter->SeekForPrev("bar");
2947   ASSERT_TRUE(iter->Valid());
2948   ASSERT_OK(iter->status());
2949   ASSERT_EQ("a", iter->key());
2950   ASSERT_EQ("va", iter->value());
2951 
2952   delete iter;
2953 
2954   // Prev beyond max_sequential_skip_in_iterations
2955   uint64_t num_versions =
2956       CurrentOptions().max_sequential_skip_in_iterations + 10;
2957   for (uint64_t i = 0; i < num_versions; i++) {
2958     ASSERT_OK(Put("bar", ToString(i)));
2959   }
2960   SequenceNumber seq3 = db_->GetLatestSequenceNumber();
2961   TestReadCallback callback2(seq3);
2962   ASSERT_OK(Put("bar", "v8"));
2963   SequenceNumber seq4 = db_->GetLatestSequenceNumber();
2964 
2965   // The iterator is suppose to see data before seq3.
2966   iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
2967   // Seek to "z", which is visible.
2968   iter->Seek("z");
2969   ASSERT_TRUE(iter->Valid());
2970   ASSERT_OK(iter->status());
2971   ASSERT_EQ("vz", iter->value());
2972   // Previous key is "foo" and the last value "v5" is visible.
2973   iter->Prev();
2974   ASSERT_TRUE(iter->Valid());
2975   ASSERT_OK(iter->status());
2976   ASSERT_EQ("foo", iter->key());
2977   ASSERT_EQ("v5", iter->value());
2978   // Since the number of values of "bar" is more than
2979   // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
2980   // seek in forward direction. Here we test the fallback seek is correct.
2981   // The last visible value should be (num_versions - 1), as "v8" is not
2982   // visible.
2983   iter->Prev();
2984   ASSERT_TRUE(iter->Valid());
2985   ASSERT_OK(iter->status());
2986   ASSERT_EQ("bar", iter->key());
2987   ASSERT_EQ(ToString(num_versions - 1), iter->value());
2988 
2989   delete iter;
2990 }
2991 
2992 }  // namespace ROCKSDB_NAMESPACE
2993 
main(int argc,char ** argv)2994 int main(int argc, char** argv) {
2995   ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
2996   ::testing::InitGoogleTest(&argc, argv);
2997   return RUN_ALL_TESTS();
2998 }
2999