1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include <stdio.h>
11 #include <algorithm>
12 #include <iostream>
13 #include <map>
14 #include <memory>
15 #include <string>
16 #include <vector>
17 
18 #include "block_fetcher.h"
19 #include "cache/lru_cache.h"
20 #include "db/dbformat.h"
21 #include "db/memtable.h"
22 #include "db/write_batch_internal.h"
23 #include "memtable/stl_wrappers.h"
24 #include "meta_blocks.h"
25 #include "monitoring/statistics.h"
26 #include "port/port.h"
27 #include "rocksdb/cache.h"
28 #include "rocksdb/db.h"
29 #include "rocksdb/env.h"
30 #include "rocksdb/file_checksum.h"
31 #include "rocksdb/file_system.h"
32 #include "rocksdb/iterator.h"
33 #include "rocksdb/memtablerep.h"
34 #include "rocksdb/perf_context.h"
35 #include "rocksdb/slice_transform.h"
36 #include "rocksdb/statistics.h"
37 #include "rocksdb/write_buffer_manager.h"
38 #include "table/block_based/block.h"
39 #include "table/block_based/block_based_table_builder.h"
40 #include "table/block_based/block_based_table_factory.h"
41 #include "table/block_based/block_based_table_reader.h"
42 #include "table/block_based/block_builder.h"
43 #include "table/block_based/flush_block_policy.h"
44 #include "table/format.h"
45 #include "table/get_context.h"
46 #include "table/internal_iterator.h"
47 #include "table/plain/plain_table_factory.h"
48 #include "table/scoped_arena_iterator.h"
49 #include "table/sst_file_writer_collectors.h"
50 #include "test_util/sync_point.h"
51 #include "test_util/testharness.h"
52 #include "test_util/testutil.h"
53 #include "util/compression.h"
54 #include "util/file_checksum_helper.h"
55 #include "util/random.h"
56 #include "util/string_util.h"
57 #include "utilities/merge_operators.h"
58 
59 namespace ROCKSDB_NAMESPACE {
60 
61 extern const uint64_t kLegacyBlockBasedTableMagicNumber;
62 extern const uint64_t kLegacyPlainTableMagicNumber;
63 extern const uint64_t kBlockBasedTableMagicNumber;
64 extern const uint64_t kPlainTableMagicNumber;
65 
66 namespace {
67 
68 const std::string kDummyValue(10000, 'o');
69 
70 // DummyPropertiesCollector used to test BlockBasedTableProperties
71 class DummyPropertiesCollector : public TablePropertiesCollector {
72  public:
Name() const73   const char* Name() const override { return "DummyPropertiesCollector"; }
74 
Finish(UserCollectedProperties *)75   Status Finish(UserCollectedProperties* /*properties*/) override {
76     return Status::OK();
77   }
78 
Add(const Slice &,const Slice &)79   Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override {
80     return Status::OK();
81   }
82 
GetReadableProperties() const83   UserCollectedProperties GetReadableProperties() const override {
84     return UserCollectedProperties{};
85   }
86 };
87 
88 class DummyPropertiesCollectorFactory1
89     : public TablePropertiesCollectorFactory {
90  public:
CreateTablePropertiesCollector(TablePropertiesCollectorFactory::Context)91   TablePropertiesCollector* CreateTablePropertiesCollector(
92       TablePropertiesCollectorFactory::Context /*context*/) override {
93     return new DummyPropertiesCollector();
94   }
Name() const95   const char* Name() const override {
96     return "DummyPropertiesCollectorFactory1";
97   }
98 };
99 
100 class DummyPropertiesCollectorFactory2
101     : public TablePropertiesCollectorFactory {
102  public:
CreateTablePropertiesCollector(TablePropertiesCollectorFactory::Context)103   TablePropertiesCollector* CreateTablePropertiesCollector(
104       TablePropertiesCollectorFactory::Context /*context*/) override {
105     return new DummyPropertiesCollector();
106   }
Name() const107   const char* Name() const override {
108     return "DummyPropertiesCollectorFactory2";
109   }
110 };
111 
112 // Return reverse of "key".
113 // Used to test non-lexicographic comparators.
Reverse(const Slice & key)114 std::string Reverse(const Slice& key) {
115   auto rev = key.ToString();
116   std::reverse(rev.begin(), rev.end());
117   return rev;
118 }
119 
120 class ReverseKeyComparator : public Comparator {
121  public:
Name() const122   const char* Name() const override {
123     return "rocksdb.ReverseBytewiseComparator";
124   }
125 
Compare(const Slice & a,const Slice & b) const126   int Compare(const Slice& a, const Slice& b) const override {
127     return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
128   }
129 
FindShortestSeparator(std::string * start,const Slice & limit) const130   void FindShortestSeparator(std::string* start,
131                              const Slice& limit) const override {
132     std::string s = Reverse(*start);
133     std::string l = Reverse(limit);
134     BytewiseComparator()->FindShortestSeparator(&s, l);
135     *start = Reverse(s);
136   }
137 
FindShortSuccessor(std::string * key) const138   void FindShortSuccessor(std::string* key) const override {
139     std::string s = Reverse(*key);
140     BytewiseComparator()->FindShortSuccessor(&s);
141     *key = Reverse(s);
142   }
143 };
144 
145 ReverseKeyComparator reverse_key_comparator;
146 
Increment(const Comparator * cmp,std::string * key)147 void Increment(const Comparator* cmp, std::string* key) {
148   if (cmp == BytewiseComparator()) {
149     key->push_back('\0');
150   } else {
151     assert(cmp == &reverse_key_comparator);
152     std::string rev = Reverse(*key);
153     rev.push_back('\0');
154     *key = Reverse(rev);
155   }
156 }
157 
158 const auto kUnknownColumnFamily =
159     TablePropertiesCollectorFactory::Context::kUnknownColumnFamily;
160 
161 }  // namespace
162 
163 // Helper class for tests to unify the interface between
164 // BlockBuilder/TableBuilder and Block/Table.
165 class Constructor {
166  public:
Constructor(const Comparator * cmp)167   explicit Constructor(const Comparator* cmp)
168       : data_(stl_wrappers::LessOfComparator(cmp)) {}
~Constructor()169   virtual ~Constructor() { }
170 
Add(const std::string & key,const Slice & value)171   void Add(const std::string& key, const Slice& value) {
172     data_[key] = value.ToString();
173   }
174 
175   // Finish constructing the data structure with all the keys that have
176   // been added so far.  Returns the keys in sorted order in "*keys"
177   // and stores the key/value pairs in "*kvmap"
Finish(const Options & options,const ImmutableOptions & ioptions,const MutableCFOptions & moptions,const BlockBasedTableOptions & table_options,const InternalKeyComparator & internal_comparator,std::vector<std::string> * keys,stl_wrappers::KVMap * kvmap)178   void Finish(const Options& options, const ImmutableOptions& ioptions,
179               const MutableCFOptions& moptions,
180               const BlockBasedTableOptions& table_options,
181               const InternalKeyComparator& internal_comparator,
182               std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
183     last_internal_key_ = &internal_comparator;
184     *kvmap = data_;
185     keys->clear();
186     for (const auto& kv : data_) {
187       keys->push_back(kv.first);
188     }
189     data_.clear();
190     Status s = FinishImpl(options, ioptions, moptions, table_options,
191                           internal_comparator, *kvmap);
192     ASSERT_TRUE(s.ok()) << s.ToString();
193   }
194 
195   // Construct the data structure from the data in "data"
196   virtual Status FinishImpl(const Options& options,
197                             const ImmutableOptions& ioptions,
198                             const MutableCFOptions& moptions,
199                             const BlockBasedTableOptions& table_options,
200                             const InternalKeyComparator& internal_comparator,
201                             const stl_wrappers::KVMap& data) = 0;
202 
203   virtual InternalIterator* NewIterator(
204       const SliceTransform* prefix_extractor = nullptr) const = 0;
205 
data()206   virtual const stl_wrappers::KVMap& data() { return data_; }
207 
IsArenaMode() const208   virtual bool IsArenaMode() const { return false; }
209 
db() const210   virtual DB* db() const { return nullptr; }  // Overridden in DBConstructor
211 
AnywayDeleteIterator() const212   virtual bool AnywayDeleteIterator() const { return false; }
213 
214  protected:
215   const InternalKeyComparator* last_internal_key_;
216 
217  private:
218   stl_wrappers::KVMap data_;
219 };
220 
221 // A helper class that converts internal format keys into user keys
222 class KeyConvertingIterator : public InternalIterator {
223  public:
KeyConvertingIterator(InternalIterator * iter,bool arena_mode=false)224   explicit KeyConvertingIterator(InternalIterator* iter,
225                                  bool arena_mode = false)
226       : iter_(iter), arena_mode_(arena_mode) {}
~KeyConvertingIterator()227   ~KeyConvertingIterator() override {
228     if (arena_mode_) {
229       iter_->~InternalIterator();
230     } else {
231       delete iter_;
232     }
233   }
Valid() const234   bool Valid() const override { return iter_->Valid() && status_.ok(); }
Seek(const Slice & target)235   void Seek(const Slice& target) override {
236     ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
237     std::string encoded;
238     AppendInternalKey(&encoded, ikey);
239     iter_->Seek(encoded);
240   }
SeekForPrev(const Slice & target)241   void SeekForPrev(const Slice& target) override {
242     ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
243     std::string encoded;
244     AppendInternalKey(&encoded, ikey);
245     iter_->SeekForPrev(encoded);
246   }
SeekToFirst()247   void SeekToFirst() override { iter_->SeekToFirst(); }
SeekToLast()248   void SeekToLast() override { iter_->SeekToLast(); }
Next()249   void Next() override { iter_->Next(); }
Prev()250   void Prev() override { iter_->Prev(); }
UpperBoundCheckResult()251   IterBoundCheck UpperBoundCheckResult() override {
252     return iter_->UpperBoundCheckResult();
253   }
254 
key() const255   Slice key() const override {
256     assert(Valid());
257     ParsedInternalKey parsed_key;
258     Status pik_status =
259         ParseInternalKey(iter_->key(), &parsed_key, true /* log_err_key */);
260     if (!pik_status.ok()) {
261       status_ = pik_status;
262       return Slice(status_.getState());
263     }
264     return parsed_key.user_key;
265   }
266 
value() const267   Slice value() const override { return iter_->value(); }
status() const268   Status status() const override {
269     return status_.ok() ? iter_->status() : status_;
270   }
271 
272  private:
273   mutable Status status_;
274   InternalIterator* iter_;
275   bool arena_mode_;
276 
277   // No copying allowed
278   KeyConvertingIterator(const KeyConvertingIterator&);
279   void operator=(const KeyConvertingIterator&);
280 };
281 
282 // `BlockConstructor` APIs always accept/return user keys.
283 class BlockConstructor : public Constructor {
284  public:
BlockConstructor(const Comparator * cmp)285   explicit BlockConstructor(const Comparator* cmp)
286       : Constructor(cmp), comparator_(cmp), block_(nullptr) {}
~BlockConstructor()287   ~BlockConstructor() override { delete block_; }
FinishImpl(const Options &,const ImmutableOptions &,const MutableCFOptions &,const BlockBasedTableOptions & table_options,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)288   Status FinishImpl(const Options& /*options*/,
289                     const ImmutableOptions& /*ioptions*/,
290                     const MutableCFOptions& /*moptions*/,
291                     const BlockBasedTableOptions& table_options,
292                     const InternalKeyComparator& /*internal_comparator*/,
293                     const stl_wrappers::KVMap& kv_map) override {
294     delete block_;
295     block_ = nullptr;
296     BlockBuilder builder(table_options.block_restart_interval);
297 
298     for (const auto& kv : kv_map) {
299       // `DataBlockIter` assumes it reads only internal keys. `BlockConstructor`
300       // clients provide user keys, so we need to convert to internal key format
301       // before writing the data block.
302       ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
303       std::string encoded;
304       AppendInternalKey(&encoded, ikey);
305       builder.Add(encoded, kv.second);
306     }
307     // Open the block
308     data_ = builder.Finish().ToString();
309     BlockContents contents;
310     contents.data = data_;
311     block_ = new Block(std::move(contents));
312     return Status::OK();
313   }
NewIterator(const SliceTransform *) const314   InternalIterator* NewIterator(
315       const SliceTransform* /*prefix_extractor*/) const override {
316     // `DataBlockIter` returns the internal keys it reads.
317     // `KeyConvertingIterator` converts them to user keys before they are
318     // exposed to the `BlockConstructor` clients.
319     return new KeyConvertingIterator(
320         block_->NewDataIterator(comparator_, kDisableGlobalSequenceNumber));
321   }
322 
323  private:
324   const Comparator* comparator_;
325   std::string data_;
326   Block* block_;
327 
328   BlockConstructor();
329 };
330 
331 class TableConstructor : public Constructor {
332  public:
TableConstructor(const Comparator * cmp,bool convert_to_internal_key=false,int level=-1,SequenceNumber largest_seqno=0)333   explicit TableConstructor(const Comparator* cmp,
334                             bool convert_to_internal_key = false,
335                             int level = -1, SequenceNumber largest_seqno = 0)
336       : Constructor(cmp),
337         largest_seqno_(largest_seqno),
338         convert_to_internal_key_(convert_to_internal_key),
339         level_(level) {
340     env_ = ROCKSDB_NAMESPACE::Env::Default();
341   }
~TableConstructor()342   ~TableConstructor() override { Reset(); }
343 
FinishImpl(const Options & options,const ImmutableOptions & ioptions,const MutableCFOptions & moptions,const BlockBasedTableOptions &,const InternalKeyComparator & internal_comparator,const stl_wrappers::KVMap & kv_map)344   Status FinishImpl(const Options& options, const ImmutableOptions& ioptions,
345                     const MutableCFOptions& moptions,
346                     const BlockBasedTableOptions& /*table_options*/,
347                     const InternalKeyComparator& internal_comparator,
348                     const stl_wrappers::KVMap& kv_map) override {
349     Reset();
350     soptions.use_mmap_reads = ioptions.allow_mmap_reads;
351     std::unique_ptr<FSWritableFile> sink(new test::StringSink());
352     file_writer_.reset(new WritableFileWriter(
353         std::move(sink), "" /* don't care */, FileOptions()));
354     std::unique_ptr<TableBuilder> builder;
355     IntTblPropCollectorFactories int_tbl_prop_collector_factories;
356 
357     if (largest_seqno_ != 0) {
358       // Pretend that it's an external file written by SstFileWriter.
359       int_tbl_prop_collector_factories.emplace_back(
360           new SstFileWriterPropertiesCollectorFactory(2 /* version */,
361                                                       0 /* global_seqno*/));
362     }
363 
364     std::string column_family_name;
365     builder.reset(ioptions.table_factory->NewTableBuilder(
366         TableBuilderOptions(ioptions, moptions, internal_comparator,
367                             &int_tbl_prop_collector_factories,
368                             options.compression, options.compression_opts,
369                             kUnknownColumnFamily, column_family_name, level_),
370         file_writer_.get()));
371 
372     for (const auto& kv : kv_map) {
373       if (convert_to_internal_key_) {
374         ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
375         std::string encoded;
376         AppendInternalKey(&encoded, ikey);
377         builder->Add(encoded, kv.second);
378       } else {
379         builder->Add(kv.first, kv.second);
380       }
381       EXPECT_OK(builder->status());
382     }
383     Status s = builder->Finish();
384     EXPECT_OK(file_writer_->Flush());
385     EXPECT_TRUE(s.ok()) << s.ToString();
386 
387     EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
388 
389     // Open the table
390     uniq_id_ = cur_uniq_id_++;
391     std::unique_ptr<FSRandomAccessFile> source(new test::StringSource(
392         TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads));
393 
394     file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
395     const bool kSkipFilters = true;
396     const bool kImmortal = true;
397     return ioptions.table_factory->NewTableReader(
398         TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
399                            internal_comparator, !kSkipFilters, !kImmortal,
400                            false, level_, largest_seqno_, &block_cache_tracer_,
401                            moptions.write_buffer_size),
402         std::move(file_reader_), TEST_GetSink()->contents().size(),
403         &table_reader_);
404   }
405 
NewIterator(const SliceTransform * prefix_extractor) const406   InternalIterator* NewIterator(
407       const SliceTransform* prefix_extractor) const override {
408     InternalIterator* iter = table_reader_->NewIterator(
409         read_options_, prefix_extractor, /*arena=*/nullptr,
410         /*skip_filters=*/false, TableReaderCaller::kUncategorized);
411     if (convert_to_internal_key_) {
412       return new KeyConvertingIterator(iter);
413     } else {
414       return iter;
415     }
416   }
417 
ApproximateOffsetOf(const Slice & key) const418   uint64_t ApproximateOffsetOf(const Slice& key) const {
419     if (convert_to_internal_key_) {
420       InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
421       const Slice skey = ikey.Encode();
422       return table_reader_->ApproximateOffsetOf(
423           skey, TableReaderCaller::kUncategorized);
424     }
425     return table_reader_->ApproximateOffsetOf(
426         key, TableReaderCaller::kUncategorized);
427   }
428 
Reopen(const ImmutableOptions & ioptions,const MutableCFOptions & moptions)429   virtual Status Reopen(const ImmutableOptions& ioptions,
430                         const MutableCFOptions& moptions) {
431     std::unique_ptr<FSRandomAccessFile> source(new test::StringSource(
432         TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads));
433 
434     file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
435     return ioptions.table_factory->NewTableReader(
436         TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
437                            *last_internal_key_),
438         std::move(file_reader_), TEST_GetSink()->contents().size(),
439         &table_reader_);
440   }
441 
GetTableReader()442   virtual TableReader* GetTableReader() { return table_reader_.get(); }
443 
AnywayDeleteIterator() const444   bool AnywayDeleteIterator() const override {
445     return convert_to_internal_key_;
446   }
447 
ResetTableReader()448   void ResetTableReader() { table_reader_.reset(); }
449 
ConvertToInternalKey()450   bool ConvertToInternalKey() { return convert_to_internal_key_; }
451 
TEST_GetSink()452   test::StringSink* TEST_GetSink() {
453     return static_cast<test::StringSink*>(file_writer_->writable_file());
454   }
455 
456   BlockCacheTracer block_cache_tracer_;
457 
458  private:
Reset()459   void Reset() {
460     uniq_id_ = 0;
461     table_reader_.reset();
462     file_writer_.reset();
463     file_reader_.reset();
464   }
465 
466   const ReadOptions read_options_;
467   uint64_t uniq_id_;
468   std::unique_ptr<WritableFileWriter> file_writer_;
469   std::unique_ptr<RandomAccessFileReader> file_reader_;
470   std::unique_ptr<TableReader> table_reader_;
471   SequenceNumber largest_seqno_;
472   bool convert_to_internal_key_;
473   int level_;
474 
475   TableConstructor();
476 
477   static uint64_t cur_uniq_id_;
478   EnvOptions soptions;
479   Env* env_;
480 };
481 uint64_t TableConstructor::cur_uniq_id_ = 1;
482 
483 class MemTableConstructor: public Constructor {
484  public:
MemTableConstructor(const Comparator * cmp,WriteBufferManager * wb)485   explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
486       : Constructor(cmp),
487         internal_comparator_(cmp),
488         write_buffer_manager_(wb),
489         table_factory_(new SkipListFactory) {
490     options_.memtable_factory = table_factory_;
491     ImmutableOptions ioptions(options_);
492     memtable_ =
493         new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
494                      wb, kMaxSequenceNumber, 0 /* column_family_id */);
495     memtable_->Ref();
496   }
~MemTableConstructor()497   ~MemTableConstructor() override { delete memtable_->Unref(); }
FinishImpl(const Options &,const ImmutableOptions & ioptions,const MutableCFOptions &,const BlockBasedTableOptions &,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)498   Status FinishImpl(const Options&, const ImmutableOptions& ioptions,
499                     const MutableCFOptions& /*moptions*/,
500                     const BlockBasedTableOptions& /*table_options*/,
501                     const InternalKeyComparator& /*internal_comparator*/,
502                     const stl_wrappers::KVMap& kv_map) override {
503     delete memtable_->Unref();
504     ImmutableOptions mem_ioptions(ioptions);
505     memtable_ = new MemTable(internal_comparator_, mem_ioptions,
506                              MutableCFOptions(options_), write_buffer_manager_,
507                              kMaxSequenceNumber, 0 /* column_family_id */);
508     memtable_->Ref();
509     int seq = 1;
510     for (const auto& kv : kv_map) {
511       Status s = memtable_->Add(seq, kTypeValue, kv.first, kv.second,
512                                 nullptr /* kv_prot_info */);
513       if (!s.ok()) {
514         return s;
515       }
516       seq++;
517     }
518     return Status::OK();
519   }
NewIterator(const SliceTransform *) const520   InternalIterator* NewIterator(
521       const SliceTransform* /*prefix_extractor*/) const override {
522     return new KeyConvertingIterator(
523         memtable_->NewIterator(ReadOptions(), &arena_), true);
524   }
525 
AnywayDeleteIterator() const526   bool AnywayDeleteIterator() const override { return true; }
527 
IsArenaMode() const528   bool IsArenaMode() const override { return true; }
529 
530  private:
531   mutable Arena arena_;
532   InternalKeyComparator internal_comparator_;
533   Options options_;
534   WriteBufferManager* write_buffer_manager_;
535   MemTable* memtable_;
536   std::shared_ptr<SkipListFactory> table_factory_;
537 };
538 
539 class InternalIteratorFromIterator : public InternalIterator {
540  public:
InternalIteratorFromIterator(Iterator * it)541   explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
Valid() const542   bool Valid() const override { return it_->Valid(); }
Seek(const Slice & target)543   void Seek(const Slice& target) override { it_->Seek(target); }
SeekForPrev(const Slice & target)544   void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); }
SeekToFirst()545   void SeekToFirst() override { it_->SeekToFirst(); }
SeekToLast()546   void SeekToLast() override { it_->SeekToLast(); }
Next()547   void Next() override { it_->Next(); }
Prev()548   void Prev() override { it_->Prev(); }
key() const549   Slice key() const override { return it_->key(); }
value() const550   Slice value() const override { return it_->value(); }
status() const551   Status status() const override { return it_->status(); }
552 
553  private:
554   std::unique_ptr<Iterator> it_;
555 };
556 
557 class DBConstructor: public Constructor {
558  public:
DBConstructor(const Comparator * cmp)559   explicit DBConstructor(const Comparator* cmp)
560       : Constructor(cmp),
561         comparator_(cmp) {
562     db_ = nullptr;
563     NewDB();
564   }
~DBConstructor()565   ~DBConstructor() override { delete db_; }
FinishImpl(const Options &,const ImmutableOptions &,const MutableCFOptions &,const BlockBasedTableOptions &,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)566   Status FinishImpl(const Options& /*options*/,
567                     const ImmutableOptions& /*ioptions*/,
568                     const MutableCFOptions& /*moptions*/,
569                     const BlockBasedTableOptions& /*table_options*/,
570                     const InternalKeyComparator& /*internal_comparator*/,
571                     const stl_wrappers::KVMap& kv_map) override {
572     delete db_;
573     db_ = nullptr;
574     NewDB();
575     for (const auto& kv : kv_map) {
576       WriteBatch batch;
577       EXPECT_OK(batch.Put(kv.first, kv.second));
578       EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
579     }
580     return Status::OK();
581   }
582 
NewIterator(const SliceTransform *) const583   InternalIterator* NewIterator(
584       const SliceTransform* /*prefix_extractor*/) const override {
585     return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
586   }
587 
db() const588   DB* db() const override { return db_; }
589 
590  private:
NewDB()591   void NewDB() {
592     std::string name = test::PerThreadDBPath("table_testdb");
593 
594     Options options;
595     options.comparator = comparator_;
596     Status status = DestroyDB(name, options);
597     ASSERT_TRUE(status.ok()) << status.ToString();
598 
599     options.create_if_missing = true;
600     options.error_if_exists = true;
601     options.write_buffer_size = 10000;  // Something small to force merging
602     status = DB::Open(options, name, &db_);
603     ASSERT_TRUE(status.ok()) << status.ToString();
604   }
605 
606   const Comparator* comparator_;
607   DB* db_;
608 };
609 
610 enum TestType {
611   BLOCK_BASED_TABLE_TEST,
612 #ifndef ROCKSDB_LITE
613   PLAIN_TABLE_SEMI_FIXED_PREFIX,
614   PLAIN_TABLE_FULL_STR_PREFIX,
615   PLAIN_TABLE_TOTAL_ORDER,
616 #endif  // !ROCKSDB_LITE
617   BLOCK_TEST,
618   MEMTABLE_TEST,
619   DB_TEST
620 };
621 
622 struct TestArgs {
623   TestType type;
624   bool reverse_compare;
625   int restart_interval;
626   CompressionType compression;
627   uint32_t compression_parallel_threads;
628   uint32_t format_version;
629   bool use_mmap;
630 };
631 
operator <<(std::ostream & os,const TestArgs & args)632 std::ostream& operator<<(std::ostream& os, const TestArgs& args) {
633   os << "type: " << args.type << " reverse_compare: " << args.reverse_compare
634      << " restart_interval: " << args.restart_interval
635      << " compression: " << args.compression
636      << " compression_parallel_threads: " << args.compression_parallel_threads
637      << " format_version: " << args.format_version
638      << " use_mmap: " << args.use_mmap;
639 
640   return os;
641 }
642 
GenerateArgList()643 static std::vector<TestArgs> GenerateArgList() {
644   std::vector<TestArgs> test_args;
645   std::vector<TestType> test_types = {
646       BLOCK_BASED_TABLE_TEST,
647 #ifndef ROCKSDB_LITE
648       PLAIN_TABLE_SEMI_FIXED_PREFIX,
649       PLAIN_TABLE_FULL_STR_PREFIX,
650       PLAIN_TABLE_TOTAL_ORDER,
651 #endif  // !ROCKSDB_LITE
652       BLOCK_TEST,
653       MEMTABLE_TEST, DB_TEST};
654   std::vector<bool> reverse_compare_types = {false, true};
655   std::vector<int> restart_intervals = {16, 1, 1024};
656   std::vector<uint32_t> compression_parallel_threads = {1, 4};
657 
658   // Only add compression if it is supported
659   std::vector<std::pair<CompressionType, bool>> compression_types;
660   compression_types.emplace_back(kNoCompression, false);
661   if (Snappy_Supported()) {
662     compression_types.emplace_back(kSnappyCompression, false);
663   }
664   if (Zlib_Supported()) {
665     compression_types.emplace_back(kZlibCompression, false);
666     compression_types.emplace_back(kZlibCompression, true);
667   }
668   if (BZip2_Supported()) {
669     compression_types.emplace_back(kBZip2Compression, false);
670     compression_types.emplace_back(kBZip2Compression, true);
671   }
672   if (LZ4_Supported()) {
673     compression_types.emplace_back(kLZ4Compression, false);
674     compression_types.emplace_back(kLZ4Compression, true);
675     compression_types.emplace_back(kLZ4HCCompression, false);
676     compression_types.emplace_back(kLZ4HCCompression, true);
677   }
678   if (XPRESS_Supported()) {
679     compression_types.emplace_back(kXpressCompression, false);
680     compression_types.emplace_back(kXpressCompression, true);
681   }
682   if (ZSTD_Supported()) {
683     compression_types.emplace_back(kZSTD, false);
684     compression_types.emplace_back(kZSTD, true);
685   }
686 
687   for (auto test_type : test_types) {
688     for (auto reverse_compare : reverse_compare_types) {
689 #ifndef ROCKSDB_LITE
690       if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
691           test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
692           test_type == PLAIN_TABLE_TOTAL_ORDER) {
693         // Plain table doesn't use restart index or compression.
694         TestArgs one_arg;
695         one_arg.type = test_type;
696         one_arg.reverse_compare = reverse_compare;
697         one_arg.restart_interval = restart_intervals[0];
698         one_arg.compression = compression_types[0].first;
699         one_arg.compression_parallel_threads = 1;
700         one_arg.format_version = 0;
701         one_arg.use_mmap = true;
702         test_args.push_back(one_arg);
703         one_arg.use_mmap = false;
704         test_args.push_back(one_arg);
705         continue;
706       }
707 #endif  // !ROCKSDB_LITE
708 
709       for (auto restart_interval : restart_intervals) {
710         for (auto compression_type : compression_types) {
711           for (auto num_threads : compression_parallel_threads) {
712             TestArgs one_arg;
713             one_arg.type = test_type;
714             one_arg.reverse_compare = reverse_compare;
715             one_arg.restart_interval = restart_interval;
716             one_arg.compression = compression_type.first;
717             one_arg.compression_parallel_threads = num_threads;
718             one_arg.format_version = compression_type.second ? 2 : 1;
719             one_arg.use_mmap = false;
720             test_args.push_back(one_arg);
721           }
722         }
723       }
724     }
725   }
726   return test_args;
727 }
728 
729 // In order to make all tests run for plain table format, including
730 // those operating on empty keys, create a new prefix transformer which
731 // return fixed prefix if the slice is not shorter than the prefix length,
732 // and the full slice if it is shorter.
733 class FixedOrLessPrefixTransform : public SliceTransform {
734  private:
735   const size_t prefix_len_;
736 
737  public:
FixedOrLessPrefixTransform(size_t prefix_len)738   explicit FixedOrLessPrefixTransform(size_t prefix_len) :
739       prefix_len_(prefix_len) {
740   }
741 
Name() const742   const char* Name() const override { return "rocksdb.FixedPrefix"; }
743 
Transform(const Slice & src) const744   Slice Transform(const Slice& src) const override {
745     assert(InDomain(src));
746     if (src.size() < prefix_len_) {
747       return src;
748     }
749     return Slice(src.data(), prefix_len_);
750   }
751 
InDomain(const Slice &) const752   bool InDomain(const Slice& /*src*/) const override { return true; }
753 
InRange(const Slice & dst) const754   bool InRange(const Slice& dst) const override {
755     return (dst.size() <= prefix_len_);
756   }
FullLengthEnabled(size_t *) const757   bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
758 };
759 
760 class HarnessTest : public testing::Test {
761  public:
HarnessTest(const TestArgs & args)762   explicit HarnessTest(const TestArgs& args)
763       : args_(args),
764         ioptions_(options_),
765         moptions_(options_),
766         write_buffer_(options_.db_write_buffer_size),
767         support_prev_(true),
768         only_support_prefix_seek_(false) {
769     options_.compression = args_.compression;
770     options_.compression_opts.parallel_threads =
771         args_.compression_parallel_threads;
772     // Use shorter block size for tests to exercise block boundary
773     // conditions more.
774     if (args_.reverse_compare) {
775       options_.comparator = &reverse_key_comparator;
776     }
777 
778     internal_comparator_.reset(
779         new test::PlainInternalKeyComparator(options_.comparator));
780 
781     options_.allow_mmap_reads = args_.use_mmap;
782     switch (args_.type) {
783       case BLOCK_BASED_TABLE_TEST:
784         table_options_.flush_block_policy_factory.reset(
785             new FlushBlockBySizePolicyFactory());
786         table_options_.block_size = 256;
787         table_options_.block_restart_interval = args_.restart_interval;
788         table_options_.index_block_restart_interval = args_.restart_interval;
789         table_options_.format_version = args_.format_version;
790         options_.table_factory.reset(
791             new BlockBasedTableFactory(table_options_));
792         constructor_.reset(new TableConstructor(
793             options_.comparator, true /* convert_to_internal_key_ */));
794         internal_comparator_.reset(
795             new InternalKeyComparator(options_.comparator));
796         break;
797 // Plain table is not supported in ROCKSDB_LITE
798 #ifndef ROCKSDB_LITE
799       case PLAIN_TABLE_SEMI_FIXED_PREFIX:
800         support_prev_ = false;
801         only_support_prefix_seek_ = true;
802         options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
803         options_.table_factory.reset(NewPlainTableFactory());
804         constructor_.reset(new TableConstructor(
805             options_.comparator, true /* convert_to_internal_key_ */));
806         internal_comparator_.reset(
807             new InternalKeyComparator(options_.comparator));
808         break;
809       case PLAIN_TABLE_FULL_STR_PREFIX:
810         support_prev_ = false;
811         only_support_prefix_seek_ = true;
812         options_.prefix_extractor.reset(NewNoopTransform());
813         options_.table_factory.reset(NewPlainTableFactory());
814         constructor_.reset(new TableConstructor(
815             options_.comparator, true /* convert_to_internal_key_ */));
816         internal_comparator_.reset(
817             new InternalKeyComparator(options_.comparator));
818         break;
819       case PLAIN_TABLE_TOTAL_ORDER:
820         support_prev_ = false;
821         only_support_prefix_seek_ = false;
822         options_.prefix_extractor = nullptr;
823 
824         {
825           PlainTableOptions plain_table_options;
826           plain_table_options.user_key_len = kPlainTableVariableLength;
827           plain_table_options.bloom_bits_per_key = 0;
828           plain_table_options.hash_table_ratio = 0;
829 
830           options_.table_factory.reset(
831               NewPlainTableFactory(plain_table_options));
832         }
833         constructor_.reset(new TableConstructor(
834             options_.comparator, true /* convert_to_internal_key_ */));
835         internal_comparator_.reset(
836             new InternalKeyComparator(options_.comparator));
837         break;
838 #endif  // !ROCKSDB_LITE
839       case BLOCK_TEST:
840         table_options_.block_size = 256;
841         options_.table_factory.reset(
842             new BlockBasedTableFactory(table_options_));
843         constructor_.reset(new BlockConstructor(options_.comparator));
844         break;
845       case MEMTABLE_TEST:
846         table_options_.block_size = 256;
847         options_.table_factory.reset(
848             new BlockBasedTableFactory(table_options_));
849         constructor_.reset(
850             new MemTableConstructor(options_.comparator, &write_buffer_));
851         break;
852       case DB_TEST:
853         table_options_.block_size = 256;
854         options_.table_factory.reset(
855             new BlockBasedTableFactory(table_options_));
856         constructor_.reset(new DBConstructor(options_.comparator));
857         break;
858     }
859     ioptions_ = ImmutableOptions(options_);
860     moptions_ = MutableCFOptions(options_);
861   }
862 
Add(const std::string & key,const std::string & value)863   void Add(const std::string& key, const std::string& value) {
864     constructor_->Add(key, value);
865   }
866 
Test(Random * rnd)867   void Test(Random* rnd) {
868     std::vector<std::string> keys;
869     stl_wrappers::KVMap data;
870     constructor_->Finish(options_, ioptions_, moptions_, table_options_,
871                          *internal_comparator_, &keys, &data);
872 
873     TestForwardScan(keys, data);
874     if (support_prev_) {
875       TestBackwardScan(keys, data);
876     }
877     TestRandomAccess(rnd, keys, data);
878   }
879 
TestForwardScan(const std::vector<std::string> &,const stl_wrappers::KVMap & data)880   void TestForwardScan(const std::vector<std::string>& /*keys*/,
881                        const stl_wrappers::KVMap& data) {
882     InternalIterator* iter = constructor_->NewIterator();
883     ASSERT_TRUE(!iter->Valid());
884     iter->SeekToFirst();
885     ASSERT_OK(iter->status());
886     for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
887          model_iter != data.end(); ++model_iter) {
888       ASSERT_EQ(ToString(data, model_iter), ToString(iter));
889       iter->Next();
890       ASSERT_OK(iter->status());
891     }
892     ASSERT_TRUE(!iter->Valid());
893     ASSERT_OK(iter->status());
894     if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
895       iter->~InternalIterator();
896     } else {
897       delete iter;
898     }
899   }
900 
TestBackwardScan(const std::vector<std::string> &,const stl_wrappers::KVMap & data)901   void TestBackwardScan(const std::vector<std::string>& /*keys*/,
902                         const stl_wrappers::KVMap& data) {
903     InternalIterator* iter = constructor_->NewIterator();
904     ASSERT_TRUE(!iter->Valid());
905     iter->SeekToLast();
906     ASSERT_OK(iter->status());
907     for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
908          model_iter != data.rend(); ++model_iter) {
909       ASSERT_EQ(ToString(data, model_iter), ToString(iter));
910       iter->Prev();
911       ASSERT_OK(iter->status());
912     }
913     ASSERT_TRUE(!iter->Valid());
914     ASSERT_OK(iter->status());
915     if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
916       iter->~InternalIterator();
917     } else {
918       delete iter;
919     }
920   }
921 
TestRandomAccess(Random * rnd,const std::vector<std::string> & keys,const stl_wrappers::KVMap & data)922   void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
923                         const stl_wrappers::KVMap& data) {
924     static const bool kVerbose = false;
925     InternalIterator* iter = constructor_->NewIterator();
926     ASSERT_TRUE(!iter->Valid());
927     stl_wrappers::KVMap::const_iterator model_iter = data.begin();
928     if (kVerbose) fprintf(stderr, "---\n");
929     for (int i = 0; i < 200; i++) {
930       const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
931       switch (toss) {
932         case 0: {
933           if (iter->Valid()) {
934             if (kVerbose) fprintf(stderr, "Next\n");
935             iter->Next();
936             ASSERT_OK(iter->status());
937             ++model_iter;
938             ASSERT_EQ(ToString(data, model_iter), ToString(iter));
939           }
940           break;
941         }
942 
943         case 1: {
944           if (kVerbose) fprintf(stderr, "SeekToFirst\n");
945           iter->SeekToFirst();
946           ASSERT_OK(iter->status());
947           model_iter = data.begin();
948           ASSERT_EQ(ToString(data, model_iter), ToString(iter));
949           break;
950         }
951 
952         case 2: {
953           std::string key = PickRandomKey(rnd, keys);
954           model_iter = data.lower_bound(key);
955           if (kVerbose) fprintf(stderr, "Seek '%s'\n",
956                                 EscapeString(key).c_str());
957           iter->Seek(Slice(key));
958           ASSERT_OK(iter->status());
959           ASSERT_EQ(ToString(data, model_iter), ToString(iter));
960           break;
961         }
962 
963         case 3: {
964           if (iter->Valid()) {
965             if (kVerbose) fprintf(stderr, "Prev\n");
966             iter->Prev();
967             ASSERT_OK(iter->status());
968             if (model_iter == data.begin()) {
969               model_iter = data.end();   // Wrap around to invalid value
970             } else {
971               --model_iter;
972             }
973             ASSERT_EQ(ToString(data, model_iter), ToString(iter));
974           }
975           break;
976         }
977 
978         case 4: {
979           if (kVerbose) fprintf(stderr, "SeekToLast\n");
980           iter->SeekToLast();
981           ASSERT_OK(iter->status());
982           if (keys.empty()) {
983             model_iter = data.end();
984           } else {
985             std::string last = data.rbegin()->first;
986             model_iter = data.lower_bound(last);
987           }
988           ASSERT_EQ(ToString(data, model_iter), ToString(iter));
989           break;
990         }
991       }
992     }
993     if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
994       iter->~InternalIterator();
995     } else {
996       delete iter;
997     }
998   }
999 
ToString(const stl_wrappers::KVMap & data,const stl_wrappers::KVMap::const_iterator & it)1000   std::string ToString(const stl_wrappers::KVMap& data,
1001                        const stl_wrappers::KVMap::const_iterator& it) {
1002     if (it == data.end()) {
1003       return "END";
1004     } else {
1005       return "'" + it->first + "->" + it->second + "'";
1006     }
1007   }
1008 
ToString(const stl_wrappers::KVMap & data,const stl_wrappers::KVMap::const_reverse_iterator & it)1009   std::string ToString(const stl_wrappers::KVMap& data,
1010                        const stl_wrappers::KVMap::const_reverse_iterator& it) {
1011     if (it == data.rend()) {
1012       return "END";
1013     } else {
1014       return "'" + it->first + "->" + it->second + "'";
1015     }
1016   }
1017 
ToString(const InternalIterator * it)1018   std::string ToString(const InternalIterator* it) {
1019     if (!it->Valid()) {
1020       return "END";
1021     } else {
1022       return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
1023     }
1024   }
1025 
PickRandomKey(Random * rnd,const std::vector<std::string> & keys)1026   std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
1027     if (keys.empty()) {
1028       return "foo";
1029     } else {
1030       const int index = rnd->Uniform(static_cast<int>(keys.size()));
1031       std::string result = keys[index];
1032       switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
1033         case 0:
1034           // Return an existing key
1035           break;
1036         case 1: {
1037           // Attempt to return something smaller than an existing key
1038           if (result.size() > 0 && result[result.size() - 1] > '\0'
1039               && (!only_support_prefix_seek_
1040                   || options_.prefix_extractor->Transform(result).size()
1041                   < result.size())) {
1042             result[result.size() - 1]--;
1043           }
1044           break;
1045       }
1046         case 2: {
1047           // Return something larger than an existing key
1048           Increment(options_.comparator, &result);
1049           break;
1050         }
1051       }
1052       return result;
1053     }
1054   }
1055 
1056   // Returns nullptr if not running against a DB
db() const1057   DB* db() const { return constructor_->db(); }
1058 
1059  private:
1060   TestArgs args_;
1061   Options options_;
1062   ImmutableOptions ioptions_;
1063   MutableCFOptions moptions_;
1064   BlockBasedTableOptions table_options_;
1065   std::unique_ptr<Constructor> constructor_;
1066   WriteBufferManager write_buffer_;
1067   bool support_prev_;
1068   bool only_support_prefix_seek_;
1069   std::shared_ptr<InternalKeyComparator> internal_comparator_;
1070 };
1071 
1072 class ParameterizedHarnessTest : public HarnessTest,
1073                                  public testing::WithParamInterface<TestArgs> {
1074  public:
ParameterizedHarnessTest()1075   ParameterizedHarnessTest() : HarnessTest(GetParam()) {}
1076 };
1077 
1078 INSTANTIATE_TEST_CASE_P(TableTest, ParameterizedHarnessTest,
1079                         ::testing::ValuesIn(GenerateArgList()));
1080 
1081 class DBHarnessTest : public HarnessTest {
1082  public:
DBHarnessTest()1083   DBHarnessTest()
1084       : HarnessTest(TestArgs{DB_TEST, /* reverse_compare */ false,
1085                              /* restart_interval */ 16, kNoCompression,
1086                              /* compression_parallel_threads */ 1,
1087                              /* format_version */ 0, /* use_mmap */ false}) {}
1088 };
1089 
Between(uint64_t val,uint64_t low,uint64_t high)1090 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1091   bool result = (val >= low) && (val <= high);
1092   if (!result) {
1093     fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1094             (unsigned long long)(val),
1095             (unsigned long long)(low),
1096             (unsigned long long)(high));
1097   }
1098   return result;
1099 }
1100 
1101 // Tests against all kinds of tables
1102 class TableTest : public testing::Test {
1103  public:
GetPlainInternalComparator(const Comparator * comp)1104   const InternalKeyComparator& GetPlainInternalComparator(
1105       const Comparator* comp) {
1106     if (!plain_internal_comparator) {
1107       plain_internal_comparator.reset(
1108           new test::PlainInternalKeyComparator(comp));
1109     }
1110     return *plain_internal_comparator;
1111   }
1112   void IndexTest(BlockBasedTableOptions table_options);
1113 
1114  private:
1115   std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
1116 };
1117 
1118 class GeneralTableTest : public TableTest {};
1119 class BlockBasedTableTest
1120     : public TableTest,
1121       virtual public ::testing::WithParamInterface<uint32_t> {
1122  public:
BlockBasedTableTest()1123   BlockBasedTableTest() : format_(GetParam()) {
1124     env_ = ROCKSDB_NAMESPACE::Env::Default();
1125   }
1126 
GetBlockBasedTableOptions()1127   BlockBasedTableOptions GetBlockBasedTableOptions() {
1128     BlockBasedTableOptions options;
1129     options.format_version = format_;
1130     return options;
1131   }
1132 
SetupTracingTest(TableConstructor * c)1133   void SetupTracingTest(TableConstructor* c) {
1134     test_path_ = test::PerThreadDBPath("block_based_table_tracing_test");
1135     EXPECT_OK(env_->CreateDir(test_path_));
1136     trace_file_path_ = test_path_ + "/block_cache_trace_file";
1137     TraceOptions trace_opt;
1138     std::unique_ptr<TraceWriter> trace_writer;
1139     EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_,
1140                                  &trace_writer));
1141     // Always return Status::OK().
1142     assert(c->block_cache_tracer_
1143                .StartTrace(env_->GetSystemClock().get(), trace_opt,
1144                            std::move(trace_writer))
1145                .ok());
1146     {
1147       std::string user_key = "k01";
1148       InternalKey internal_key(user_key, 0, kTypeValue);
1149       std::string encoded_key = internal_key.Encode().ToString();
1150       c->Add(encoded_key, kDummyValue);
1151     }
1152     {
1153       std::string user_key = "k02";
1154       InternalKey internal_key(user_key, 0, kTypeValue);
1155       std::string encoded_key = internal_key.Encode().ToString();
1156       c->Add(encoded_key, kDummyValue);
1157     }
1158   }
1159 
VerifyBlockAccessTrace(TableConstructor * c,const std::vector<BlockCacheTraceRecord> & expected_records)1160   void VerifyBlockAccessTrace(
1161       TableConstructor* c,
1162       const std::vector<BlockCacheTraceRecord>& expected_records) {
1163     c->block_cache_tracer_.EndTrace();
1164 
1165     {
1166       std::unique_ptr<TraceReader> trace_reader;
1167       Status s =
1168           NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
1169       EXPECT_OK(s);
1170       BlockCacheTraceReader reader(std::move(trace_reader));
1171       BlockCacheTraceHeader header;
1172       EXPECT_OK(reader.ReadHeader(&header));
1173       uint32_t index = 0;
1174       while (s.ok()) {
1175         BlockCacheTraceRecord access;
1176         s = reader.ReadAccess(&access);
1177         if (!s.ok()) {
1178           break;
1179         }
1180         ASSERT_LT(index, expected_records.size());
1181         EXPECT_NE("", access.block_key);
1182         EXPECT_EQ(access.block_type, expected_records[index].block_type);
1183         EXPECT_GT(access.block_size, 0);
1184         EXPECT_EQ(access.caller, expected_records[index].caller);
1185         EXPECT_EQ(access.no_insert, expected_records[index].no_insert);
1186         EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit);
1187         // Get
1188         if (access.caller == TableReaderCaller::kUserGet) {
1189           EXPECT_EQ(access.referenced_key,
1190                     expected_records[index].referenced_key);
1191           EXPECT_EQ(access.get_id, expected_records[index].get_id);
1192           EXPECT_EQ(access.get_from_user_specified_snapshot,
1193                     expected_records[index].get_from_user_specified_snapshot);
1194           if (access.block_type == TraceType::kBlockTraceDataBlock) {
1195             EXPECT_GT(access.referenced_data_size, 0);
1196             EXPECT_GT(access.num_keys_in_block, 0);
1197             EXPECT_EQ(access.referenced_key_exist_in_block,
1198                       expected_records[index].referenced_key_exist_in_block);
1199           }
1200         } else {
1201           EXPECT_EQ(access.referenced_key, "");
1202           EXPECT_EQ(access.get_id, 0);
1203           EXPECT_TRUE(access.get_from_user_specified_snapshot == Boolean::kFalse);
1204           EXPECT_EQ(access.referenced_data_size, 0);
1205           EXPECT_EQ(access.num_keys_in_block, 0);
1206           EXPECT_TRUE(access.referenced_key_exist_in_block == Boolean::kFalse);
1207         }
1208         index++;
1209       }
1210       EXPECT_EQ(index, expected_records.size());
1211     }
1212     EXPECT_OK(env_->DeleteFile(trace_file_path_));
1213     EXPECT_OK(env_->DeleteDir(test_path_));
1214   }
1215 
1216  protected:
1217   uint64_t IndexUncompressedHelper(bool indexCompress);
1218 
1219  private:
1220   uint32_t format_;
1221   Env* env_;
1222   std::string trace_file_path_;
1223   std::string test_path_;
1224 };
1225 class PlainTableTest : public TableTest {};
1226 class TablePropertyTest : public testing::Test {};
1227 class BBTTailPrefetchTest : public TableTest {};
1228 
1229 // The helper class to test the file checksum
1230 class FileChecksumTestHelper {
1231  public:
FileChecksumTestHelper(bool convert_to_internal_key=false)1232   FileChecksumTestHelper(bool convert_to_internal_key = false)
1233       : convert_to_internal_key_(convert_to_internal_key) {
1234   }
~FileChecksumTestHelper()1235   ~FileChecksumTestHelper() {}
1236 
CreateWriteableFile()1237   void CreateWriteableFile() {
1238     sink_ = new test::StringSink();
1239     std::unique_ptr<FSWritableFile> holder(sink_);
1240     file_writer_.reset(new WritableFileWriter(
1241         std::move(holder), "" /* don't care */, FileOptions()));
1242   }
1243 
SetFileChecksumGenerator(FileChecksumGenerator * checksum_generator)1244   void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) {
1245     if (file_writer_ != nullptr) {
1246       file_writer_->TEST_SetFileChecksumGenerator(checksum_generator);
1247     } else {
1248       delete checksum_generator;
1249     }
1250   }
1251 
GetFileWriter()1252   WritableFileWriter* GetFileWriter() { return file_writer_.get(); }
1253 
ResetTableBuilder(std::unique_ptr<TableBuilder> && builder)1254   Status ResetTableBuilder(std::unique_ptr<TableBuilder>&& builder) {
1255     assert(builder != nullptr);
1256     table_builder_ = std::move(builder);
1257     return Status::OK();
1258   }
1259 
AddKVtoKVMap(int num_entries)1260   void AddKVtoKVMap(int num_entries) {
1261     Random rnd(test::RandomSeed());
1262     for (int i = 0; i < num_entries; i++) {
1263       std::string v = rnd.RandomString(100);
1264       kv_map_[test::RandomKey(&rnd, 20)] = v;
1265     }
1266   }
1267 
WriteKVAndFlushTable()1268   Status WriteKVAndFlushTable() {
1269     for (const auto& kv : kv_map_) {
1270       if (convert_to_internal_key_) {
1271         ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
1272         std::string encoded;
1273         AppendInternalKey(&encoded, ikey);
1274         table_builder_->Add(encoded, kv.second);
1275       } else {
1276         table_builder_->Add(kv.first, kv.second);
1277       }
1278       EXPECT_TRUE(table_builder_->status().ok());
1279     }
1280     Status s = table_builder_->Finish();
1281     EXPECT_OK(file_writer_->Flush());
1282     EXPECT_OK(s);
1283 
1284     EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize());
1285     return s;
1286   }
1287 
GetFileChecksum()1288   std::string GetFileChecksum() {
1289     EXPECT_OK(file_writer_->Close());
1290     return table_builder_->GetFileChecksum();
1291   }
1292 
GetFileChecksumFuncName()1293   const char* GetFileChecksumFuncName() {
1294     return table_builder_->GetFileChecksumFuncName();
1295   }
1296 
CalculateFileChecksum(FileChecksumGenerator * file_checksum_generator,std::string * checksum)1297   Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator,
1298                                std::string* checksum) {
1299     assert(file_checksum_generator != nullptr);
1300     cur_uniq_id_ = checksum_uniq_id_++;
1301     test::StringSink* ss_rw =
1302         static_cast<test::StringSink*>(file_writer_->writable_file());
1303     std::unique_ptr<FSRandomAccessFile> source(
1304         new test::StringSource(ss_rw->contents()));
1305     file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
1306 
1307     std::unique_ptr<char[]> scratch(new char[2048]);
1308     Slice result;
1309     uint64_t offset = 0;
1310     Status s;
1311     s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(),
1312                            nullptr, false);
1313     if (!s.ok()) {
1314       return s;
1315     }
1316     while (result.size() != 0) {
1317       file_checksum_generator->Update(scratch.get(), result.size());
1318       offset += static_cast<uint64_t>(result.size());
1319       s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(),
1320                              nullptr, false);
1321       if (!s.ok()) {
1322         return s;
1323       }
1324     }
1325     EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize()));
1326     file_checksum_generator->Finalize();
1327     *checksum = file_checksum_generator->GetChecksum();
1328     return Status::OK();
1329   }
1330 
1331  private:
1332   bool convert_to_internal_key_;
1333   uint64_t cur_uniq_id_;
1334   std::unique_ptr<WritableFileWriter> file_writer_;
1335   std::unique_ptr<RandomAccessFileReader> file_reader_;
1336   std::unique_ptr<TableBuilder> table_builder_;
1337   stl_wrappers::KVMap kv_map_;
1338   test::StringSink* sink_ = nullptr;
1339 
1340   static uint64_t checksum_uniq_id_;
1341 };
1342 
1343 uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;
1344 
1345 INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
1346                         testing::Values(test::kDefaultFormatVersion));
1347 INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
1348                         testing::Values(test::kLatestFormatVersion));
1349 
1350 // This test serves as the living tutorial for the prefix scan of user collected
1351 // properties.
TEST_F(TablePropertyTest,PrefixScanTest)1352 TEST_F(TablePropertyTest, PrefixScanTest) {
1353   UserCollectedProperties props{{"num.111.1", "1"},
1354                                 {"num.111.2", "2"},
1355                                 {"num.111.3", "3"},
1356                                 {"num.333.1", "1"},
1357                                 {"num.333.2", "2"},
1358                                 {"num.333.3", "3"},
1359                                 {"num.555.1", "1"},
1360                                 {"num.555.2", "2"},
1361                                 {"num.555.3", "3"}, };
1362 
1363   // prefixes that exist
1364   for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
1365     int num = 0;
1366     for (auto pos = props.lower_bound(prefix);
1367          pos != props.end() &&
1368              pos->first.compare(0, prefix.size(), prefix) == 0;
1369          ++pos) {
1370       ++num;
1371       auto key = prefix + "." + ToString(num);
1372       ASSERT_EQ(key, pos->first);
1373       ASSERT_EQ(ToString(num), pos->second);
1374     }
1375     ASSERT_EQ(3, num);
1376   }
1377 
1378   // prefixes that don't exist
1379   for (const std::string& prefix :
1380        {"num.000", "num.222", "num.444", "num.666"}) {
1381     auto pos = props.lower_bound(prefix);
1382     ASSERT_TRUE(pos == props.end() ||
1383                 pos->first.compare(0, prefix.size(), prefix) != 0);
1384   }
1385 }
1386 
1387 // This test include all the basic checks except those for index size and block
1388 // size, which will be conducted in separated unit tests.
TEST_P(BlockBasedTableTest,BasicBlockBasedTableProperties)1389 TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1390   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1391 
1392   c.Add("a1", "val1");
1393   c.Add("b2", "val2");
1394   c.Add("c3", "val3");
1395   c.Add("d4", "val4");
1396   c.Add("e5", "val5");
1397   c.Add("f6", "val6");
1398   c.Add("g7", "val7");
1399   c.Add("h8", "val8");
1400   c.Add("j9", "val9");
1401   uint64_t diff_internal_user_bytes = 9 * 8;  // 8 is seq size, 9 k-v totally
1402 
1403   std::vector<std::string> keys;
1404   stl_wrappers::KVMap kvmap;
1405   Options options;
1406   options.compression = kNoCompression;
1407   options.statistics = CreateDBStatistics();
1408   options.statistics->set_stats_level(StatsLevel::kAll);
1409   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1410   table_options.block_restart_interval = 1;
1411   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1412 
1413   ImmutableOptions ioptions(options);
1414   MutableCFOptions moptions(options);
1415   c.Finish(options, ioptions, moptions, table_options,
1416            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1417   ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
1418 
1419   auto& props = *c.GetTableReader()->GetTableProperties();
1420   ASSERT_EQ(kvmap.size(), props.num_entries);
1421 
1422   auto raw_key_size = kvmap.size() * 2ul;
1423   auto raw_value_size = kvmap.size() * 4ul;
1424 
1425   ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
1426   ASSERT_EQ(raw_value_size, props.raw_value_size);
1427   ASSERT_EQ(1ul, props.num_data_blocks);
1428   ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
1429 
1430   // Verify data size.
1431   BlockBuilder block_builder(1);
1432   for (const auto& item : kvmap) {
1433     block_builder.Add(item.first, item.second);
1434   }
1435   Slice content = block_builder.Finish();
1436   ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
1437             props.data_size);
1438   c.ResetTableReader();
1439 }
1440 
1441 #ifdef SNAPPY
IndexUncompressedHelper(bool compressed)1442 uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
1443   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1444   constexpr size_t kNumKeys = 10000;
1445 
1446   for (size_t k = 0; k < kNumKeys; ++k) {
1447     c.Add("key" + ToString(k), "val" + ToString(k));
1448   }
1449 
1450   std::vector<std::string> keys;
1451   stl_wrappers::KVMap kvmap;
1452   Options options;
1453   options.compression = kSnappyCompression;
1454   options.statistics = CreateDBStatistics();
1455   options.statistics->set_stats_level(StatsLevel::kAll);
1456   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1457   table_options.block_restart_interval = 1;
1458   table_options.enable_index_compression = compressed;
1459   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1460 
1461   ImmutableOptions ioptions(options);
1462   MutableCFOptions moptions(options);
1463   c.Finish(options, ioptions, moptions, table_options,
1464            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1465   c.ResetTableReader();
1466   return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
1467 }
TEST_P(BlockBasedTableTest,IndexUncompressed)1468 TEST_P(BlockBasedTableTest, IndexUncompressed) {
1469   uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
1470   uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
1471   // tbl1_compressed_cnt should include 1 index block
1472   EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
1473 }
1474 #endif  // SNAPPY
1475 
TEST_P(BlockBasedTableTest,BlockBasedTableProperties2)1476 TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1477   TableConstructor c(&reverse_key_comparator);
1478   std::vector<std::string> keys;
1479   stl_wrappers::KVMap kvmap;
1480 
1481   {
1482     Options options;
1483     options.compression = CompressionType::kNoCompression;
1484     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1485     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1486 
1487     const ImmutableOptions ioptions(options);
1488     const MutableCFOptions moptions(options);
1489     c.Finish(options, ioptions, moptions, table_options,
1490              GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1491 
1492     auto& props = *c.GetTableReader()->GetTableProperties();
1493 
1494     // Default comparator
1495     ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
1496     // No merge operator
1497     ASSERT_EQ("nullptr", props.merge_operator_name);
1498     // No prefix extractor
1499     ASSERT_EQ("nullptr", props.prefix_extractor_name);
1500     // No property collectors
1501     ASSERT_EQ("[]", props.property_collectors_names);
1502     // No filter policy is used
1503     ASSERT_EQ("", props.filter_policy_name);
1504     // Compression type == that set:
1505     ASSERT_EQ("NoCompression", props.compression_name);
1506     c.ResetTableReader();
1507   }
1508 
1509   {
1510     Options options;
1511     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1512     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1513     options.comparator = &reverse_key_comparator;
1514     options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1515     options.prefix_extractor.reset(NewNoopTransform());
1516     options.table_properties_collector_factories.emplace_back(
1517         new DummyPropertiesCollectorFactory1());
1518     options.table_properties_collector_factories.emplace_back(
1519         new DummyPropertiesCollectorFactory2());
1520 
1521     const ImmutableOptions ioptions(options);
1522     const MutableCFOptions moptions(options);
1523     c.Finish(options, ioptions, moptions, table_options,
1524              GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1525 
1526     auto& props = *c.GetTableReader()->GetTableProperties();
1527 
1528     ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
1529     ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
1530     ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1531     ASSERT_EQ(
1532         "[DummyPropertiesCollectorFactory1,DummyPropertiesCollectorFactory2]",
1533         props.property_collectors_names);
1534     ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
1535     c.ResetTableReader();
1536   }
1537 }
1538 
TEST_P(BlockBasedTableTest,RangeDelBlock)1539 TEST_P(BlockBasedTableTest, RangeDelBlock) {
1540   TableConstructor c(BytewiseComparator());
1541   std::vector<std::string> keys = {"1pika", "2chu"};
1542   std::vector<std::string> vals = {"p", "c"};
1543 
1544   std::vector<RangeTombstone> expected_tombstones = {
1545       {"1pika", "2chu", 0},
1546       {"2chu", "c", 1},
1547       {"2chu", "c", 0},
1548       {"c", "p", 0},
1549   };
1550 
1551   for (int i = 0; i < 2; i++) {
1552     RangeTombstone t(keys[i], vals[i], i);
1553     std::pair<InternalKey, Slice> p = t.Serialize();
1554     c.Add(p.first.Encode().ToString(), p.second);
1555   }
1556 
1557   std::vector<std::string> sorted_keys;
1558   stl_wrappers::KVMap kvmap;
1559   Options options;
1560   options.compression = kNoCompression;
1561   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1562   table_options.block_restart_interval = 1;
1563   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1564 
1565   const ImmutableOptions ioptions(options);
1566   const MutableCFOptions moptions(options);
1567   std::unique_ptr<InternalKeyComparator> internal_cmp(
1568       new InternalKeyComparator(options.comparator));
1569   c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
1570            &sorted_keys, &kvmap);
1571 
1572   for (int j = 0; j < 2; ++j) {
1573     std::unique_ptr<InternalIterator> iter(
1574         c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
1575     if (j > 0) {
1576       // For second iteration, delete the table reader object and verify the
1577       // iterator can still access its metablock's range tombstones.
1578       c.ResetTableReader();
1579     }
1580     ASSERT_FALSE(iter->Valid());
1581     iter->SeekToFirst();
1582     ASSERT_TRUE(iter->Valid());
1583     for (size_t i = 0; i < expected_tombstones.size(); i++) {
1584       ASSERT_TRUE(iter->Valid());
1585       ParsedInternalKey parsed_key;
1586       ASSERT_OK(
1587           ParseInternalKey(iter->key(), &parsed_key, true /* log_err_key */));
1588       RangeTombstone t(parsed_key, iter->value());
1589       const auto& expected_t = expected_tombstones[i];
1590       ASSERT_EQ(t.start_key_, expected_t.start_key_);
1591       ASSERT_EQ(t.end_key_, expected_t.end_key_);
1592       ASSERT_EQ(t.seq_, expected_t.seq_);
1593       iter->Next();
1594     }
1595     ASSERT_TRUE(!iter->Valid());
1596   }
1597 }
1598 
TEST_P(BlockBasedTableTest,FilterPolicyNameProperties)1599 TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1600   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1601   c.Add("a1", "val1");
1602   std::vector<std::string> keys;
1603   stl_wrappers::KVMap kvmap;
1604   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1605   table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1606   Options options;
1607   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1608 
1609   const ImmutableOptions ioptions(options);
1610   const MutableCFOptions moptions(options);
1611   c.Finish(options, ioptions, moptions, table_options,
1612            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1613   auto& props = *c.GetTableReader()->GetTableProperties();
1614   ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1615   c.ResetTableReader();
1616 }
1617 
1618 //
1619 // BlockBasedTableTest::PrefetchTest
1620 //
AssertKeysInCache(BlockBasedTable * table_reader,const std::vector<std::string> & keys_in_cache,const std::vector<std::string> & keys_not_in_cache,bool convert=false)1621 void AssertKeysInCache(BlockBasedTable* table_reader,
1622                        const std::vector<std::string>& keys_in_cache,
1623                        const std::vector<std::string>& keys_not_in_cache,
1624                        bool convert = false) {
1625   if (convert) {
1626     for (auto key : keys_in_cache) {
1627       InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1628       ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1629     }
1630     for (auto key : keys_not_in_cache) {
1631       InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1632       ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1633     }
1634   } else {
1635     for (auto key : keys_in_cache) {
1636       ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
1637     }
1638     for (auto key : keys_not_in_cache) {
1639       ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
1640     }
1641   }
1642 }
1643 
PrefetchRange(TableConstructor * c,Options * opt,BlockBasedTableOptions * table_options,const char * key_begin,const char * key_end,const std::vector<std::string> & keys_in_cache,const std::vector<std::string> & keys_not_in_cache,const Status expected_status=Status::OK ())1644 void PrefetchRange(TableConstructor* c, Options* opt,
1645                    BlockBasedTableOptions* table_options, const char* key_begin,
1646                    const char* key_end,
1647                    const std::vector<std::string>& keys_in_cache,
1648                    const std::vector<std::string>& keys_not_in_cache,
1649                    const Status expected_status = Status::OK()) {
1650   // reset the cache and reopen the table
1651   table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1652   opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
1653   const ImmutableOptions ioptions2(*opt);
1654   const MutableCFOptions moptions(*opt);
1655   ASSERT_OK(c->Reopen(ioptions2, moptions));
1656 
1657   // prefetch
1658   auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1659   Status s;
1660   std::unique_ptr<Slice> begin, end;
1661   std::unique_ptr<InternalKey> i_begin, i_end;
1662   if (key_begin != nullptr) {
1663     if (c->ConvertToInternalKey()) {
1664       i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
1665       begin.reset(new Slice(i_begin->Encode()));
1666     } else {
1667       begin.reset(new Slice(key_begin));
1668     }
1669   }
1670   if (key_end != nullptr) {
1671     if (c->ConvertToInternalKey()) {
1672       i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
1673       end.reset(new Slice(i_end->Encode()));
1674     } else {
1675       end.reset(new Slice(key_end));
1676     }
1677   }
1678   s = table_reader->Prefetch(begin.get(), end.get());
1679 
1680   ASSERT_TRUE(s.code() == expected_status.code());
1681 
1682   // assert our expectation in cache warmup
1683   AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
1684                     c->ConvertToInternalKey());
1685   c->ResetTableReader();
1686 }
1687 
TEST_P(BlockBasedTableTest,PrefetchTest)1688 TEST_P(BlockBasedTableTest, PrefetchTest) {
1689   // The purpose of this test is to test the prefetching operation built into
1690   // BlockBasedTable.
1691   Options opt;
1692   std::unique_ptr<InternalKeyComparator> ikc;
1693   ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
1694   opt.compression = kNoCompression;
1695   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1696   table_options.block_size = 1024;
1697   // big enough so we don't ever lose cached values.
1698   table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1699   opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
1700 
1701   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1702   c.Add("k01", "hello");
1703   c.Add("k02", "hello2");
1704   c.Add("k03", std::string(10000, 'x'));
1705   c.Add("k04", std::string(200000, 'x'));
1706   c.Add("k05", std::string(300000, 'x'));
1707   c.Add("k06", "hello3");
1708   c.Add("k07", std::string(100000, 'x'));
1709   std::vector<std::string> keys;
1710   stl_wrappers::KVMap kvmap;
1711   const ImmutableOptions ioptions(opt);
1712   const MutableCFOptions moptions(opt);
1713   c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1714   c.ResetTableReader();
1715 
1716   // We get the following data spread :
1717   //
1718   // Data block         Index
1719   // ========================
1720   // [ k01 k02 k03 ]    k03
1721   // [ k04         ]    k04
1722   // [ k05         ]    k05
1723   // [ k06 k07     ]    k07
1724 
1725 
1726   // Simple
1727   PrefetchRange(&c, &opt, &table_options,
1728                 /*key_range=*/"k01", "k05",
1729                 /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
1730                 /*keys_not_in_cache=*/{"k06", "k07"});
1731   PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1732                 {"k04", "k05", "k06", "k07"});
1733   // odd
1734   PrefetchRange(&c, &opt, &table_options, "a", "z",
1735                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1736   PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1737                 {"k04", "k05", "k06", "k07"});
1738   // Edge cases
1739   PrefetchRange(&c, &opt, &table_options, "k00", "k06",
1740                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1741   PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
1742                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1743   // null keys
1744   PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
1745                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1746   PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
1747                 {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
1748   PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
1749                 {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1750   // invalid
1751   PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1752                 Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1753   c.ResetTableReader();
1754 }
1755 
TEST_P(BlockBasedTableTest,TotalOrderSeekOnHashIndex)1756 TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
1757   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1758   for (int i = 0; i <= 5; ++i) {
1759     Options options;
1760     // Make each key/value an individual block
1761     table_options.block_size = 64;
1762     switch (i) {
1763     case 0:
1764       // Binary search index
1765       table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1766       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1767       break;
1768     case 1:
1769       // Hash search index
1770       table_options.index_type = BlockBasedTableOptions::kHashSearch;
1771       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1772       options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1773       break;
1774     case 2:
1775       // Hash search index with hash_index_allow_collision
1776       table_options.index_type = BlockBasedTableOptions::kHashSearch;
1777       table_options.hash_index_allow_collision = true;
1778       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1779       options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1780       break;
1781     case 3:
1782       // Hash search index with filter policy
1783       table_options.index_type = BlockBasedTableOptions::kHashSearch;
1784       table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1785       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1786       options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1787       break;
1788     case 4:
1789       // Two-level index
1790       table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1791       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1792       break;
1793     case 5:
1794       // Binary search with first key
1795       table_options.index_type =
1796           BlockBasedTableOptions::kBinarySearchWithFirstKey;
1797       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1798       break;
1799     }
1800 
1801     TableConstructor c(BytewiseComparator(),
1802                        true /* convert_to_internal_key_ */);
1803     c.Add("aaaa1", std::string('a', 56));
1804     c.Add("bbaa1", std::string('a', 56));
1805     c.Add("cccc1", std::string('a', 56));
1806     c.Add("bbbb1", std::string('a', 56));
1807     c.Add("baaa1", std::string('a', 56));
1808     c.Add("abbb1", std::string('a', 56));
1809     c.Add("cccc2", std::string('a', 56));
1810     std::vector<std::string> keys;
1811     stl_wrappers::KVMap kvmap;
1812     const ImmutableOptions ioptions(options);
1813     const MutableCFOptions moptions(options);
1814     c.Finish(options, ioptions, moptions, table_options,
1815              GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1816     auto props = c.GetTableReader()->GetTableProperties();
1817     ASSERT_EQ(7u, props->num_data_blocks);
1818     auto* reader = c.GetTableReader();
1819     ReadOptions ro;
1820     ro.total_order_seek = true;
1821     std::unique_ptr<InternalIterator> iter(reader->NewIterator(
1822         ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1823         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1824 
1825     iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
1826     ASSERT_OK(iter->status());
1827     ASSERT_TRUE(iter->Valid());
1828     ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
1829     iter->Next();
1830     ASSERT_OK(iter->status());
1831     ASSERT_TRUE(iter->Valid());
1832     ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1833 
1834     iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
1835     ASSERT_OK(iter->status());
1836     ASSERT_TRUE(iter->Valid());
1837     ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1838     iter->Next();
1839     ASSERT_OK(iter->status());
1840     ASSERT_TRUE(iter->Valid());
1841     ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1842 
1843     iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
1844     ASSERT_OK(iter->status());
1845     ASSERT_TRUE(iter->Valid());
1846     ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1847     iter->Next();
1848     ASSERT_OK(iter->status());
1849     ASSERT_TRUE(iter->Valid());
1850     ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
1851   }
1852 }
1853 
TEST_P(BlockBasedTableTest,NoopTransformSeek)1854 TEST_P(BlockBasedTableTest, NoopTransformSeek) {
1855   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1856   table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1857 
1858   Options options;
1859   options.comparator = BytewiseComparator();
1860   options.table_factory.reset(new BlockBasedTableFactory(table_options));
1861   options.prefix_extractor.reset(NewNoopTransform());
1862 
1863   TableConstructor c(options.comparator);
1864   // To tickle the PrefixMayMatch bug it is important that the
1865   // user-key is a single byte so that the index key exactly matches
1866   // the user-key.
1867   InternalKey key("a", 1, kTypeValue);
1868   c.Add(key.Encode().ToString(), "b");
1869   std::vector<std::string> keys;
1870   stl_wrappers::KVMap kvmap;
1871   const ImmutableOptions ioptions(options);
1872   const MutableCFOptions moptions(options);
1873   const InternalKeyComparator internal_comparator(options.comparator);
1874   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1875            &keys, &kvmap);
1876 
1877   auto* reader = c.GetTableReader();
1878   for (int i = 0; i < 2; ++i) {
1879     ReadOptions ro;
1880     ro.total_order_seek = (i == 0);
1881     std::unique_ptr<InternalIterator> iter(reader->NewIterator(
1882         ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1883         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1884 
1885     iter->Seek(key.Encode());
1886     ASSERT_OK(iter->status());
1887     ASSERT_TRUE(iter->Valid());
1888     ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
1889   }
1890 }
1891 
TEST_P(BlockBasedTableTest,SkipPrefixBloomFilter)1892 TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
1893   // if DB is opened with a prefix extractor of a different name,
1894   // prefix bloom is skipped when read the file
1895   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1896   table_options.filter_policy.reset(NewBloomFilterPolicy(2));
1897   table_options.whole_key_filtering = false;
1898 
1899   Options options;
1900   options.comparator = BytewiseComparator();
1901   options.table_factory.reset(new BlockBasedTableFactory(table_options));
1902   options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1903 
1904   TableConstructor c(options.comparator);
1905   InternalKey key("abcdefghijk", 1, kTypeValue);
1906   c.Add(key.Encode().ToString(), "test");
1907   std::vector<std::string> keys;
1908   stl_wrappers::KVMap kvmap;
1909   const ImmutableOptions ioptions(options);
1910   const MutableCFOptions moptions(options);
1911   const InternalKeyComparator internal_comparator(options.comparator);
1912   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1913            &keys, &kvmap);
1914   // TODO(Zhongyi): update test to use MutableCFOptions
1915   options.prefix_extractor.reset(NewFixedPrefixTransform(9));
1916   const ImmutableOptions new_ioptions(options);
1917   const MutableCFOptions new_moptions(options);
1918   ASSERT_OK(c.Reopen(new_ioptions, new_moptions));
1919   auto reader = c.GetTableReader();
1920   ReadOptions read_options;
1921   std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
1922       read_options, new_moptions.prefix_extractor.get(), /*arena=*/nullptr,
1923       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1924 
1925   // Test point lookup
1926   // only one kv
1927   for (auto& kv : kvmap) {
1928     db_iter->Seek(kv.first);
1929     ASSERT_TRUE(db_iter->Valid());
1930     ASSERT_OK(db_iter->status());
1931     ASSERT_EQ(db_iter->key(), kv.first);
1932     ASSERT_EQ(db_iter->value(), kv.second);
1933   }
1934 }
1935 
AddInternalKey(TableConstructor * c,const std::string & prefix,std::string value="v",int=800)1936 void AddInternalKey(TableConstructor* c, const std::string& prefix,
1937                     std::string value = "v", int /*suffix_len*/ = 800) {
1938   static Random rnd(1023);
1939   InternalKey k(prefix + rnd.RandomString(800), 0, kTypeValue);
1940   c->Add(k.Encode().ToString(), value);
1941 }
1942 
IndexTest(BlockBasedTableOptions table_options)1943 void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1944   TableConstructor c(BytewiseComparator());
1945 
1946   // keys with prefix length 3, make sure the key/value is big enough to fill
1947   // one block
1948   AddInternalKey(&c, "0015");
1949   AddInternalKey(&c, "0035");
1950 
1951   AddInternalKey(&c, "0054");
1952   AddInternalKey(&c, "0055");
1953 
1954   AddInternalKey(&c, "0056");
1955   AddInternalKey(&c, "0057");
1956 
1957   AddInternalKey(&c, "0058");
1958   AddInternalKey(&c, "0075");
1959 
1960   AddInternalKey(&c, "0076");
1961   AddInternalKey(&c, "0095");
1962 
1963   std::vector<std::string> keys;
1964   stl_wrappers::KVMap kvmap;
1965   Options options;
1966   options.prefix_extractor.reset(NewFixedPrefixTransform(3));
1967   table_options.block_size = 1700;
1968   table_options.block_cache = NewLRUCache(1024, 4);
1969   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1970 
1971   std::unique_ptr<InternalKeyComparator> comparator(
1972       new InternalKeyComparator(BytewiseComparator()));
1973   const ImmutableOptions ioptions(options);
1974   const MutableCFOptions moptions(options);
1975   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
1976            &kvmap);
1977   auto reader = c.GetTableReader();
1978 
1979   auto props = reader->GetTableProperties();
1980   ASSERT_EQ(5u, props->num_data_blocks);
1981 
1982   // TODO(Zhongyi): update test to use MutableCFOptions
1983   ReadOptions read_options;
1984   std::unique_ptr<InternalIterator> index_iter(reader->NewIterator(
1985       read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1986       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1987 
1988   // -- Find keys do not exist, but have common prefix.
1989   std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
1990   std::vector<std::string> lower_bound = {
1991       keys[0], keys[1], keys[2], keys[7], keys[9],
1992   };
1993 
1994   // find the lower bound of the prefix
1995   for (size_t i = 0; i < prefixes.size(); ++i) {
1996     index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
1997     ASSERT_OK(index_iter->status());
1998     ASSERT_TRUE(index_iter->Valid());
1999 
2000     // seek the first element in the block
2001     ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
2002     ASSERT_EQ("v", index_iter->value().ToString());
2003   }
2004 
2005   // find the upper bound of prefixes
2006   std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
2007 
2008   // find existing keys
2009   for (const auto& item : kvmap) {
2010     auto ukey = ExtractUserKey(item.first).ToString();
2011     index_iter->Seek(ukey);
2012 
2013     // ASSERT_OK(regular_iter->status());
2014     ASSERT_OK(index_iter->status());
2015 
2016     // ASSERT_TRUE(regular_iter->Valid());
2017     ASSERT_TRUE(index_iter->Valid());
2018 
2019     ASSERT_EQ(item.first, index_iter->key().ToString());
2020     ASSERT_EQ(item.second, index_iter->value().ToString());
2021   }
2022 
2023   for (size_t i = 0; i < prefixes.size(); ++i) {
2024     // the key is greater than any existing keys.
2025     auto key = prefixes[i] + "9";
2026     index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
2027 
2028     ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound());
2029     ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid());
2030     if (i == prefixes.size() - 1) {
2031       // last key
2032       ASSERT_TRUE(!index_iter->Valid());
2033     } else {
2034       ASSERT_TRUE(index_iter->Valid());
2035       // seek the first element in the block
2036       ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
2037       ASSERT_EQ("v", index_iter->value().ToString());
2038     }
2039   }
2040 
2041   // find keys with prefix that don't match any of the existing prefixes.
2042   std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
2043   for (const auto& prefix : non_exist_prefixes) {
2044     index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
2045     // regular_iter->Seek(prefix);
2046 
2047     ASSERT_OK(index_iter->status());
2048     // Seek to non-existing prefixes should yield either invalid, or a
2049     // key with prefix greater than the target.
2050     if (index_iter->Valid()) {
2051       Slice ukey = ExtractUserKey(index_iter->key());
2052       Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
2053       ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
2054     }
2055   }
2056   for (const auto& prefix : non_exist_prefixes) {
2057     index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode());
2058     // regular_iter->Seek(prefix);
2059 
2060     ASSERT_OK(index_iter->status());
2061     // Seek to non-existing prefixes should yield either invalid, or a
2062     // key with prefix greater than the target.
2063     if (index_iter->Valid()) {
2064       Slice ukey = ExtractUserKey(index_iter->key());
2065       Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
2066       ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0);
2067     }
2068   }
2069 
2070   {
2071     // Test reseek case. It should impact partitioned index more.
2072     ReadOptions ro;
2073     ro.total_order_seek = true;
2074     std::unique_ptr<InternalIterator> index_iter2(reader->NewIterator(
2075         ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
2076         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2077 
2078     // Things to cover in partitioned index:
2079     // 1. Both of Seek() and SeekToLast() has optimization to prevent
2080     //    rereek leaf index block if it remains to the same one, and
2081     //    they reuse the same variable.
2082     // 2. When Next() or Prev() is called, the block moves, so the
2083     //    optimization should kick in only with the current one.
2084     index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
2085     ASSERT_TRUE(index_iter2->Valid());
2086     ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));
2087 
2088     index_iter2->SeekToLast();
2089     ASSERT_TRUE(index_iter2->Valid());
2090     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2091 
2092     index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
2093     ASSERT_TRUE(index_iter2->Valid());
2094     ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));
2095 
2096     index_iter2->SeekToLast();
2097     ASSERT_TRUE(index_iter2->Valid());
2098     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2099     index_iter2->Prev();
2100     ASSERT_TRUE(index_iter2->Valid());
2101     index_iter2->Prev();
2102     ASSERT_TRUE(index_iter2->Valid());
2103     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2104 
2105     index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
2106     ASSERT_TRUE(index_iter2->Valid());
2107     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2108     index_iter2->Prev();
2109     ASSERT_TRUE(index_iter2->Valid());
2110     index_iter2->Prev();
2111     ASSERT_TRUE(index_iter2->Valid());
2112     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2113 
2114     index_iter2->SeekToLast();
2115     ASSERT_TRUE(index_iter2->Valid());
2116     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2117 
2118     index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
2119     ASSERT_TRUE(index_iter2->Valid());
2120     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2121 
2122     index_iter2->Prev();
2123     ASSERT_TRUE(index_iter2->Valid());
2124     index_iter2->Prev();
2125     ASSERT_TRUE(index_iter2->Valid());
2126     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2127 
2128     index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode());
2129     ASSERT_TRUE(index_iter2->Valid());
2130     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2131 
2132     index_iter2->Next();
2133     ASSERT_TRUE(index_iter2->Valid());
2134     index_iter2->Next();
2135     ASSERT_TRUE(index_iter2->Valid());
2136     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2137 
2138     index_iter2->SeekToLast();
2139     ASSERT_TRUE(index_iter2->Valid());
2140     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2141   }
2142 
2143   c.ResetTableReader();
2144 }
2145 
TEST_P(BlockBasedTableTest,BinaryIndexTest)2146 TEST_P(BlockBasedTableTest, BinaryIndexTest) {
2147   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2148   table_options.index_type = BlockBasedTableOptions::kBinarySearch;
2149   IndexTest(table_options);
2150 }
2151 
TEST_P(BlockBasedTableTest,HashIndexTest)2152 TEST_P(BlockBasedTableTest, HashIndexTest) {
2153   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2154   table_options.index_type = BlockBasedTableOptions::kHashSearch;
2155   IndexTest(table_options);
2156 }
2157 
TEST_P(BlockBasedTableTest,PartitionIndexTest)2158 TEST_P(BlockBasedTableTest, PartitionIndexTest) {
2159   const int max_index_keys = 5;
2160   const int est_max_index_key_value_size = 32;
2161   const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
2162   for (int i = 1; i <= est_max_index_size + 1; i++) {
2163     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2164     table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
2165     table_options.metadata_block_size = i;
2166     IndexTest(table_options);
2167   }
2168 }
2169 
TEST_P(BlockBasedTableTest,IndexSeekOptimizationIncomplete)2170 TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) {
2171   std::unique_ptr<InternalKeyComparator> comparator(
2172       new InternalKeyComparator(BytewiseComparator()));
2173   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2174   Options options;
2175   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2176   const ImmutableOptions ioptions(options);
2177   const MutableCFOptions moptions(options);
2178 
2179   TableConstructor c(BytewiseComparator());
2180   AddInternalKey(&c, "pika");
2181 
2182   std::vector<std::string> keys;
2183   stl_wrappers::KVMap kvmap;
2184   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2185            &kvmap);
2186   ASSERT_EQ(1, keys.size());
2187 
2188   auto reader = c.GetTableReader();
2189   ReadOptions ropt;
2190   ropt.read_tier = ReadTier::kBlockCacheTier;
2191   std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2192       ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2193       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2194 
2195   auto ikey = [](Slice user_key) {
2196     return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2197   };
2198 
2199   iter->Seek(ikey("pika"));
2200   ASSERT_FALSE(iter->Valid());
2201   ASSERT_TRUE(iter->status().IsIncomplete());
2202 
2203   // This used to crash at some point.
2204   iter->Seek(ikey("pika"));
2205   ASSERT_FALSE(iter->Valid());
2206   ASSERT_TRUE(iter->status().IsIncomplete());
2207 }
2208 
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKey1)2209 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) {
2210   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2211   table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
2212   IndexTest(table_options);
2213 }
2214 
2215 class CustomFlushBlockPolicy : public FlushBlockPolicyFactory,
2216                                public FlushBlockPolicy {
2217  public:
CustomFlushBlockPolicy(std::vector<int> keys_per_block)2218   explicit CustomFlushBlockPolicy(std::vector<int> keys_per_block)
2219       : keys_per_block_(keys_per_block) {}
2220 
Name() const2221   const char* Name() const override { return "CustomFlushBlockPolicy"; }
2222 
NewFlushBlockPolicy(const BlockBasedTableOptions &,const BlockBuilder &) const2223   FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&,
2224                                         const BlockBuilder&) const override {
2225     return new CustomFlushBlockPolicy(keys_per_block_);
2226   }
2227 
Update(const Slice &,const Slice &)2228   bool Update(const Slice&, const Slice&) override {
2229     if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) {
2230       ++current_block_idx_;
2231       keys_in_current_block_ = 1;
2232       return true;
2233     }
2234 
2235     ++keys_in_current_block_;
2236     return false;
2237   }
2238 
2239   std::vector<int> keys_per_block_;
2240 
2241   int current_block_idx_ = 0;
2242   int keys_in_current_block_ = 0;
2243 };
2244 
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKey2)2245 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) {
2246   for (int use_first_key = 0; use_first_key < 2; ++use_first_key) {
2247     SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key));
2248     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2249     table_options.index_type =
2250         use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey
2251                       : BlockBasedTableOptions::kBinarySearch;
2252     table_options.block_cache = NewLRUCache(10000);  // fits all blocks
2253     table_options.index_shortening =
2254         BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
2255     table_options.flush_block_policy_factory =
2256         std::make_shared<CustomFlushBlockPolicy>(std::vector<int>{2, 1, 3, 2});
2257     Options options;
2258     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2259     options.statistics = CreateDBStatistics();
2260     Statistics* stats = options.statistics.get();
2261     std::unique_ptr<InternalKeyComparator> comparator(
2262         new InternalKeyComparator(BytewiseComparator()));
2263     const ImmutableOptions ioptions(options);
2264     const MutableCFOptions moptions(options);
2265 
2266     TableConstructor c(BytewiseComparator());
2267 
2268     // Block 0.
2269     AddInternalKey(&c, "aaaa", "v0");
2270     AddInternalKey(&c, "aaac", "v1");
2271 
2272     // Block 1.
2273     AddInternalKey(&c, "aaca", "v2");
2274 
2275     // Block 2.
2276     AddInternalKey(&c, "caaa", "v3");
2277     AddInternalKey(&c, "caac", "v4");
2278     AddInternalKey(&c, "caae", "v5");
2279 
2280     // Block 3.
2281     AddInternalKey(&c, "ccaa", "v6");
2282     AddInternalKey(&c, "ccac", "v7");
2283 
2284     // Write the file.
2285     std::vector<std::string> keys;
2286     stl_wrappers::KVMap kvmap;
2287     c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2288              &kvmap);
2289     ASSERT_EQ(8, keys.size());
2290 
2291     auto reader = c.GetTableReader();
2292     auto props = reader->GetTableProperties();
2293     ASSERT_EQ(4u, props->num_data_blocks);
2294     ReadOptions read_options;
2295     std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2296         read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2297         /*skip_filters=*/false, TableReaderCaller::kUncategorized,
2298         /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true));
2299 
2300     // Shouldn't have read data blocks before iterator is seeked.
2301     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2302     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2303 
2304     auto ikey = [](Slice user_key) {
2305       return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2306     };
2307 
2308     // Seek to a key between blocks. If index contains first key, we shouldn't
2309     // read any data blocks until value is requested.
2310     iter->Seek(ikey("aaba"));
2311     ASSERT_TRUE(iter->Valid());
2312     EXPECT_EQ(keys[2], iter->key().ToString());
2313     EXPECT_EQ(use_first_key ? 0 : 1,
2314               stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2315     ASSERT_TRUE(iter->PrepareValue());
2316     EXPECT_EQ("v2", iter->value().ToString());
2317     EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2318     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2319 
2320     // Seek to the middle of a block. The block should be read right away.
2321     iter->Seek(ikey("caab"));
2322     ASSERT_TRUE(iter->Valid());
2323     EXPECT_EQ(keys[4], iter->key().ToString());
2324     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2325     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2326     ASSERT_TRUE(iter->PrepareValue());
2327     EXPECT_EQ("v4", iter->value().ToString());
2328     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2329 
2330     // Seek to just before the same block and don't access value.
2331     // The iterator should keep pinning the block contents.
2332     iter->Seek(ikey("baaa"));
2333     ASSERT_TRUE(iter->Valid());
2334     EXPECT_EQ(keys[3], iter->key().ToString());
2335     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2336 
2337     // Seek to the same block again to check that the block is still pinned.
2338     iter->Seek(ikey("caae"));
2339     ASSERT_TRUE(iter->Valid());
2340     EXPECT_EQ(keys[5], iter->key().ToString());
2341     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2342     ASSERT_TRUE(iter->PrepareValue());
2343     EXPECT_EQ("v5", iter->value().ToString());
2344     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2345     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2346 
2347     // Step forward and fall through to the next block. Don't access value.
2348     iter->Next();
2349     ASSERT_TRUE(iter->Valid());
2350     EXPECT_EQ(keys[6], iter->key().ToString());
2351     EXPECT_EQ(use_first_key ? 2 : 3,
2352               stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2353     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2354 
2355     // Step forward again. Block should be read.
2356     iter->Next();
2357     ASSERT_TRUE(iter->Valid());
2358     EXPECT_EQ(keys[7], iter->key().ToString());
2359     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2360     ASSERT_TRUE(iter->PrepareValue());
2361     EXPECT_EQ("v7", iter->value().ToString());
2362     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2363 
2364     // Step forward and reach the end.
2365     iter->Next();
2366     EXPECT_FALSE(iter->Valid());
2367     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2368     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2369 
2370     // Seek to a single-key block and step forward without accessing value.
2371     iter->Seek(ikey("aaca"));
2372     ASSERT_TRUE(iter->Valid());
2373     EXPECT_EQ(keys[2], iter->key().ToString());
2374     EXPECT_EQ(use_first_key ? 0 : 1,
2375               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2376 
2377     iter->Next();
2378     ASSERT_TRUE(iter->Valid());
2379     EXPECT_EQ(keys[3], iter->key().ToString());
2380     EXPECT_EQ(use_first_key ? 1 : 2,
2381               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2382     ASSERT_TRUE(iter->PrepareValue());
2383     EXPECT_EQ("v3", iter->value().ToString());
2384     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2385     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2386 
2387     // Seek between blocks and step back without accessing value.
2388     iter->Seek(ikey("aaca"));
2389     ASSERT_TRUE(iter->Valid());
2390     EXPECT_EQ(keys[2], iter->key().ToString());
2391     EXPECT_EQ(use_first_key ? 2 : 3,
2392               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2393     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2394 
2395     iter->Prev();
2396     ASSERT_TRUE(iter->Valid());
2397     EXPECT_EQ(keys[1], iter->key().ToString());
2398     EXPECT_EQ(use_first_key ? 2 : 3,
2399               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2400     // All blocks are in cache now, there'll be no more misses ever.
2401     EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2402     ASSERT_TRUE(iter->PrepareValue());
2403     EXPECT_EQ("v1", iter->value().ToString());
2404 
2405     // Next into the next block again.
2406     iter->Next();
2407     ASSERT_TRUE(iter->Valid());
2408     EXPECT_EQ(keys[2], iter->key().ToString());
2409     EXPECT_EQ(use_first_key ? 2 : 4,
2410               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2411 
2412     // Seek to first and step back without accessing value.
2413     iter->SeekToFirst();
2414     ASSERT_TRUE(iter->Valid());
2415     EXPECT_EQ(keys[0], iter->key().ToString());
2416     EXPECT_EQ(use_first_key ? 2 : 5,
2417               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2418 
2419     iter->Prev();
2420     EXPECT_FALSE(iter->Valid());
2421     EXPECT_EQ(use_first_key ? 2 : 5,
2422               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2423 
2424     // Do some SeekForPrev() and SeekToLast() just to cover all methods.
2425     iter->SeekForPrev(ikey("caad"));
2426     ASSERT_TRUE(iter->Valid());
2427     EXPECT_EQ(keys[4], iter->key().ToString());
2428     EXPECT_EQ(use_first_key ? 3 : 6,
2429               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2430     ASSERT_TRUE(iter->PrepareValue());
2431     EXPECT_EQ("v4", iter->value().ToString());
2432     EXPECT_EQ(use_first_key ? 3 : 6,
2433               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2434 
2435     iter->SeekToLast();
2436     ASSERT_TRUE(iter->Valid());
2437     EXPECT_EQ(keys[7], iter->key().ToString());
2438     EXPECT_EQ(use_first_key ? 4 : 7,
2439               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2440     ASSERT_TRUE(iter->PrepareValue());
2441     EXPECT_EQ("v7", iter->value().ToString());
2442     EXPECT_EQ(use_first_key ? 4 : 7,
2443               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2444 
2445     EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2446 
2447     c.ResetTableReader();
2448   }
2449 }
2450 
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKeyGlobalSeqno)2451 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) {
2452   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2453   table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
2454   table_options.block_cache = NewLRUCache(10000);
2455   Options options;
2456   options.statistics = CreateDBStatistics();
2457   Statistics* stats = options.statistics.get();
2458   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2459   std::unique_ptr<InternalKeyComparator> comparator(
2460       new InternalKeyComparator(BytewiseComparator()));
2461   const ImmutableOptions ioptions(options);
2462   const MutableCFOptions moptions(options);
2463 
2464   TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false,
2465                      /* level */ -1, /* largest_seqno */ 42);
2466 
2467   c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x");
2468   c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y");
2469 
2470   std::vector<std::string> keys;
2471   stl_wrappers::KVMap kvmap;
2472   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2473            &kvmap);
2474   ASSERT_EQ(2, keys.size());
2475 
2476   auto reader = c.GetTableReader();
2477   auto props = reader->GetTableProperties();
2478   ASSERT_EQ(1u, props->num_data_blocks);
2479   ReadOptions read_options;
2480   std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2481       read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2482       /*skip_filters=*/false, TableReaderCaller::kUncategorized,
2483       /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true));
2484 
2485   iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString());
2486   ASSERT_TRUE(iter->Valid());
2487   EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
2488             iter->key().ToString());
2489   EXPECT_NE(keys[0], iter->key().ToString());
2490   // Key should have been served from index, without reading data blocks.
2491   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2492 
2493   ASSERT_TRUE(iter->PrepareValue());
2494   EXPECT_EQ("x", iter->value().ToString());
2495   EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2496   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2497   EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
2498             iter->key().ToString());
2499 
2500   c.ResetTableReader();
2501 }
2502 
2503 // It's very hard to figure out the index block size of a block accurately.
2504 // To make sure we get the index size, we just make sure as key number
2505 // grows, the filter block size also grows.
TEST_P(BlockBasedTableTest,IndexSizeStat)2506 TEST_P(BlockBasedTableTest, IndexSizeStat) {
2507   uint64_t last_index_size = 0;
2508 
2509   // we need to use random keys since the pure human readable texts
2510   // may be well compressed, resulting insignifcant change of index
2511   // block size.
2512   Random rnd(test::RandomSeed());
2513   std::vector<std::string> keys;
2514 
2515   for (int i = 0; i < 100; ++i) {
2516     keys.push_back(rnd.RandomString(10000));
2517   }
2518 
2519   // Each time we load one more key to the table. the table index block
2520   // size is expected to be larger than last time's.
2521   for (size_t i = 1; i < keys.size(); ++i) {
2522     TableConstructor c(BytewiseComparator(),
2523                        true /* convert_to_internal_key_ */);
2524     for (size_t j = 0; j < i; ++j) {
2525       c.Add(keys[j], "val");
2526     }
2527 
2528     std::vector<std::string> ks;
2529     stl_wrappers::KVMap kvmap;
2530     Options options;
2531     options.compression = kNoCompression;
2532     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2533     table_options.block_restart_interval = 1;
2534     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2535 
2536     const ImmutableOptions ioptions(options);
2537     const MutableCFOptions moptions(options);
2538     c.Finish(options, ioptions, moptions, table_options,
2539              GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2540     auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
2541     ASSERT_GT(index_size, last_index_size);
2542     last_index_size = index_size;
2543     c.ResetTableReader();
2544   }
2545 }
2546 
TEST_P(BlockBasedTableTest,NumBlockStat)2547 TEST_P(BlockBasedTableTest, NumBlockStat) {
2548   Random rnd(test::RandomSeed());
2549   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2550   Options options;
2551   options.compression = kNoCompression;
2552   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2553   table_options.block_restart_interval = 1;
2554   table_options.block_size = 1000;
2555   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2556 
2557   for (int i = 0; i < 10; ++i) {
2558     // the key/val are slightly smaller than block size, so that each block
2559     // holds roughly one key/value pair.
2560     c.Add(rnd.RandomString(900), "val");
2561   }
2562 
2563   std::vector<std::string> ks;
2564   stl_wrappers::KVMap kvmap;
2565   const ImmutableOptions ioptions(options);
2566   const MutableCFOptions moptions(options);
2567   c.Finish(options, ioptions, moptions, table_options,
2568            GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2569   ASSERT_EQ(kvmap.size(),
2570             c.GetTableReader()->GetTableProperties()->num_data_blocks);
2571   c.ResetTableReader();
2572 }
2573 
TEST_P(BlockBasedTableTest,TracingGetTest)2574 TEST_P(BlockBasedTableTest, TracingGetTest) {
2575   TableConstructor c(BytewiseComparator());
2576   Options options;
2577   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2578   options.create_if_missing = true;
2579   table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2580   table_options.cache_index_and_filter_blocks = true;
2581   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2582   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2583   SetupTracingTest(&c);
2584   std::vector<std::string> keys;
2585   stl_wrappers::KVMap kvmap;
2586   ImmutableOptions ioptions(options);
2587   MutableCFOptions moptions(options);
2588   c.Finish(options, ioptions, moptions, table_options,
2589            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2590   std::string user_key = "k01";
2591   InternalKey internal_key(user_key, 0, kTypeValue);
2592   std::string encoded_key = internal_key.Encode().ToString();
2593   for (uint32_t i = 1; i <= 2; i++) {
2594     PinnableSlice value;
2595     GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2596                            GetContext::kNotFound, user_key, &value, nullptr,
2597                            nullptr, true, nullptr, nullptr, nullptr, nullptr,
2598                            nullptr, nullptr, /*tracing_get_id=*/i);
2599     get_perf_context()->Reset();
2600     ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context,
2601                                       moptions.prefix_extractor.get()));
2602     ASSERT_EQ(get_context.State(), GetContext::kFound);
2603     ASSERT_EQ(value.ToString(), kDummyValue);
2604   }
2605 
2606   // Verify traces.
2607   std::vector<BlockCacheTraceRecord> expected_records;
2608   // The first two records should be prefetching index and filter blocks.
2609   BlockCacheTraceRecord record;
2610   record.block_type = TraceType::kBlockTraceIndexBlock;
2611   record.caller = TableReaderCaller::kPrefetch;
2612   record.is_cache_hit = Boolean::kFalse;
2613   record.no_insert = Boolean::kFalse;
2614   expected_records.push_back(record);
2615   record.block_type = TraceType::kBlockTraceFilterBlock;
2616   expected_records.push_back(record);
2617   // Then we should have three records for one index, one filter, and one data
2618   // block access.
2619   record.get_id = 1;
2620   record.block_type = TraceType::kBlockTraceIndexBlock;
2621   record.caller = TableReaderCaller::kUserGet;
2622   record.get_from_user_specified_snapshot = Boolean::kFalse;
2623   record.referenced_key = encoded_key;
2624   record.referenced_key_exist_in_block = Boolean::kTrue;
2625   record.is_cache_hit = Boolean::kTrue;
2626   expected_records.push_back(record);
2627   record.block_type = TraceType::kBlockTraceFilterBlock;
2628   expected_records.push_back(record);
2629   record.is_cache_hit = Boolean::kFalse;
2630   record.block_type = TraceType::kBlockTraceDataBlock;
2631   expected_records.push_back(record);
2632   // The second get should all observe cache hits.
2633   record.is_cache_hit = Boolean::kTrue;
2634   record.get_id = 2;
2635   record.block_type = TraceType::kBlockTraceIndexBlock;
2636   record.caller = TableReaderCaller::kUserGet;
2637   record.get_from_user_specified_snapshot = Boolean::kFalse;
2638   record.referenced_key = encoded_key;
2639   expected_records.push_back(record);
2640   record.block_type = TraceType::kBlockTraceFilterBlock;
2641   expected_records.push_back(record);
2642   record.block_type = TraceType::kBlockTraceDataBlock;
2643   expected_records.push_back(record);
2644   VerifyBlockAccessTrace(&c, expected_records);
2645   c.ResetTableReader();
2646 }
2647 
TEST_P(BlockBasedTableTest,TracingApproximateOffsetOfTest)2648 TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) {
2649   TableConstructor c(BytewiseComparator());
2650   Options options;
2651   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2652   options.create_if_missing = true;
2653   table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2654   table_options.cache_index_and_filter_blocks = true;
2655   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2656   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2657   SetupTracingTest(&c);
2658   std::vector<std::string> keys;
2659   stl_wrappers::KVMap kvmap;
2660   ImmutableOptions ioptions(options);
2661   MutableCFOptions moptions(options);
2662   c.Finish(options, ioptions, moptions, table_options,
2663            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2664   for (uint32_t i = 1; i <= 2; i++) {
2665     std::string user_key = "k01";
2666     InternalKey internal_key(user_key, 0, kTypeValue);
2667     std::string encoded_key = internal_key.Encode().ToString();
2668     c.GetTableReader()->ApproximateOffsetOf(
2669         encoded_key, TableReaderCaller::kUserApproximateSize);
2670   }
2671   // Verify traces.
2672   std::vector<BlockCacheTraceRecord> expected_records;
2673   // The first two records should be prefetching index and filter blocks.
2674   BlockCacheTraceRecord record;
2675   record.block_type = TraceType::kBlockTraceIndexBlock;
2676   record.caller = TableReaderCaller::kPrefetch;
2677   record.is_cache_hit = Boolean::kFalse;
2678   record.no_insert = Boolean::kFalse;
2679   expected_records.push_back(record);
2680   record.block_type = TraceType::kBlockTraceFilterBlock;
2681   expected_records.push_back(record);
2682   // Then we should have two records for only index blocks.
2683   record.block_type = TraceType::kBlockTraceIndexBlock;
2684   record.caller = TableReaderCaller::kUserApproximateSize;
2685   record.is_cache_hit = Boolean::kTrue;
2686   expected_records.push_back(record);
2687   expected_records.push_back(record);
2688   VerifyBlockAccessTrace(&c, expected_records);
2689   c.ResetTableReader();
2690 }
2691 
TEST_P(BlockBasedTableTest,TracingIterator)2692 TEST_P(BlockBasedTableTest, TracingIterator) {
2693   TableConstructor c(BytewiseComparator());
2694   Options options;
2695   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2696   options.create_if_missing = true;
2697   table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2698   table_options.cache_index_and_filter_blocks = true;
2699   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2700   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2701   SetupTracingTest(&c);
2702   std::vector<std::string> keys;
2703   stl_wrappers::KVMap kvmap;
2704   ImmutableOptions ioptions(options);
2705   MutableCFOptions moptions(options);
2706   c.Finish(options, ioptions, moptions, table_options,
2707            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2708 
2709   for (uint32_t i = 1; i <= 2; i++) {
2710     ReadOptions read_options;
2711     std::unique_ptr<InternalIterator> iter(c.GetTableReader()->NewIterator(
2712         read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
2713         /*skip_filters=*/false, TableReaderCaller::kUserIterator));
2714     iter->SeekToFirst();
2715     while (iter->Valid()) {
2716       iter->key();
2717       iter->value();
2718       iter->Next();
2719     }
2720     ASSERT_OK(iter->status());
2721     iter.reset();
2722   }
2723 
2724   // Verify traces.
2725   std::vector<BlockCacheTraceRecord> expected_records;
2726   // The first two records should be prefetching index and filter blocks.
2727   BlockCacheTraceRecord record;
2728   record.block_type = TraceType::kBlockTraceIndexBlock;
2729   record.caller = TableReaderCaller::kPrefetch;
2730   record.is_cache_hit = Boolean::kFalse;
2731   record.no_insert = Boolean::kFalse;
2732   expected_records.push_back(record);
2733   record.block_type = TraceType::kBlockTraceFilterBlock;
2734   expected_records.push_back(record);
2735   // Then we should have three records for index and two data block access.
2736   record.block_type = TraceType::kBlockTraceIndexBlock;
2737   record.caller = TableReaderCaller::kUserIterator;
2738   record.is_cache_hit = Boolean::kTrue;
2739   expected_records.push_back(record);
2740   record.block_type = TraceType::kBlockTraceDataBlock;
2741   record.is_cache_hit = Boolean::kFalse;
2742   expected_records.push_back(record);
2743   expected_records.push_back(record);
2744   // When we iterate this file for the second time, we should observe all cache
2745   // hits.
2746   record.block_type = TraceType::kBlockTraceIndexBlock;
2747   record.is_cache_hit = Boolean::kTrue;
2748   expected_records.push_back(record);
2749   record.block_type = TraceType::kBlockTraceDataBlock;
2750   expected_records.push_back(record);
2751   expected_records.push_back(record);
2752   VerifyBlockAccessTrace(&c, expected_records);
2753   c.ResetTableReader();
2754 }
2755 
2756 // A simple tool that takes the snapshot of block cache statistics.
2757 class BlockCachePropertiesSnapshot {
2758  public:
BlockCachePropertiesSnapshot(Statistics * statistics)2759   explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
2760     block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
2761     block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
2762     index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
2763     index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
2764     data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
2765     data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
2766     filter_block_cache_miss =
2767         statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
2768     filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
2769     block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
2770     block_cache_bytes_write =
2771         statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
2772   }
2773 
AssertIndexBlockStat(int64_t expected_index_block_cache_miss,int64_t expected_index_block_cache_hit)2774   void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
2775                             int64_t expected_index_block_cache_hit) {
2776     ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
2777     ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2778   }
2779 
AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,int64_t expected_filter_block_cache_hit)2780   void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
2781                              int64_t expected_filter_block_cache_hit) {
2782     ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
2783     ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
2784   }
2785 
2786   // Check if the fetched props matches the expected ones.
2787   // TODO(kailiu) Use this only when you disabled filter policy!
AssertEqual(int64_t expected_index_block_cache_miss,int64_t expected_index_block_cache_hit,int64_t expected_data_block_cache_miss,int64_t expected_data_block_cache_hit) const2788   void AssertEqual(int64_t expected_index_block_cache_miss,
2789                    int64_t expected_index_block_cache_hit,
2790                    int64_t expected_data_block_cache_miss,
2791                    int64_t expected_data_block_cache_hit) const {
2792     ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
2793     ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2794     ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
2795     ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
2796     ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
2797               block_cache_miss);
2798     ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
2799               block_cache_hit);
2800   }
2801 
GetCacheBytesRead()2802   int64_t GetCacheBytesRead() { return block_cache_bytes_read; }
2803 
GetCacheBytesWrite()2804   int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }
2805 
2806  private:
2807   int64_t block_cache_miss = 0;
2808   int64_t block_cache_hit = 0;
2809   int64_t index_block_cache_miss = 0;
2810   int64_t index_block_cache_hit = 0;
2811   int64_t data_block_cache_miss = 0;
2812   int64_t data_block_cache_hit = 0;
2813   int64_t filter_block_cache_miss = 0;
2814   int64_t filter_block_cache_hit = 0;
2815   int64_t block_cache_bytes_read = 0;
2816   int64_t block_cache_bytes_write = 0;
2817 };
2818 
2819 // Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
2820 // use block cache to store them).
TEST_P(BlockBasedTableTest,BlockCacheDisabledTest)2821 TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
2822   Options options;
2823   options.create_if_missing = true;
2824   options.statistics = CreateDBStatistics();
2825   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2826   table_options.block_cache = NewLRUCache(1024, 4);
2827   table_options.filter_policy.reset(NewBloomFilterPolicy(10));
2828   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2829   std::vector<std::string> keys;
2830   stl_wrappers::KVMap kvmap;
2831 
2832   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2833   c.Add("key", "value");
2834   const ImmutableOptions ioptions(options);
2835   const MutableCFOptions moptions(options);
2836   c.Finish(options, ioptions, moptions, table_options,
2837            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2838 
2839   // preloading filter/index blocks is enabled.
2840   auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2841   ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2842   ASSERT_FALSE(reader->TEST_IndexBlockInCache());
2843 
2844   {
2845     // nothing happens in the beginning
2846     BlockCachePropertiesSnapshot props(options.statistics.get());
2847     props.AssertIndexBlockStat(0, 0);
2848     props.AssertFilterBlockStat(0, 0);
2849   }
2850 
2851   {
2852     GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2853                            GetContext::kNotFound, Slice(), nullptr, nullptr,
2854                            nullptr, true, nullptr, nullptr);
2855     // a hack that just to trigger BlockBasedTable::GetFilter.
2856     ASSERT_OK(reader->Get(ReadOptions(), "non-exist-key", &get_context,
2857                           moptions.prefix_extractor.get()));
2858     BlockCachePropertiesSnapshot props(options.statistics.get());
2859     props.AssertIndexBlockStat(0, 0);
2860     props.AssertFilterBlockStat(0, 0);
2861   }
2862 }
2863 
2864 // Due to the difficulities of the intersaction between statistics, this test
2865 // only tests the case when "index block is put to block cache"
TEST_P(BlockBasedTableTest,FilterBlockInBlockCache)2866 TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
2867   // -- Table construction
2868   Options options;
2869   options.create_if_missing = true;
2870   options.statistics = CreateDBStatistics();
2871 
2872   // Enable the cache for index/filter blocks
2873   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2874   LRUCacheOptions co;
2875   co.capacity = 2048;
2876   co.num_shard_bits = 2;
2877   co.metadata_charge_policy = kDontChargeCacheMetadata;
2878   table_options.block_cache = NewLRUCache(co);
2879   table_options.cache_index_and_filter_blocks = true;
2880   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2881   std::vector<std::string> keys;
2882   stl_wrappers::KVMap kvmap;
2883 
2884   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2885   c.Add("key", "value");
2886   const ImmutableOptions ioptions(options);
2887   const MutableCFOptions moptions(options);
2888   c.Finish(options, ioptions, moptions, table_options,
2889            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2890   // preloading filter/index blocks is prohibited.
2891   auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2892   ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2893   ASSERT_TRUE(reader->TEST_IndexBlockInCache());
2894 
2895   // -- PART 1: Open with regular block cache.
2896   // Since block_cache is disabled, no cache activities will be involved.
2897   std::unique_ptr<InternalIterator> iter;
2898 
2899   int64_t last_cache_bytes_read = 0;
2900   // At first, no block will be accessed.
2901   {
2902     BlockCachePropertiesSnapshot props(options.statistics.get());
2903     // index will be added to block cache.
2904     props.AssertEqual(1,  // index block miss
2905                       0, 0, 0);
2906     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2907     ASSERT_EQ(props.GetCacheBytesWrite(),
2908               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2909     last_cache_bytes_read = props.GetCacheBytesRead();
2910   }
2911 
2912   // Only index block will be accessed
2913   {
2914     iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2915     BlockCachePropertiesSnapshot props(options.statistics.get());
2916     // NOTE: to help better highlight the "detla" of each ticker, I use
2917     // <last_value> + <added_value> to indicate the increment of changed
2918     // value; other numbers remain the same.
2919     props.AssertEqual(1, 0 + 1,  // index block hit
2920                       0, 0);
2921     // Cache hit, bytes read from cache should increase
2922     ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2923     ASSERT_EQ(props.GetCacheBytesWrite(),
2924               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2925     last_cache_bytes_read = props.GetCacheBytesRead();
2926   }
2927 
2928   // Only data block will be accessed
2929   {
2930     iter->SeekToFirst();
2931     ASSERT_OK(iter->status());
2932     BlockCachePropertiesSnapshot props(options.statistics.get());
2933     props.AssertEqual(1, 1, 0 + 1,  // data block miss
2934                       0);
2935     // Cache miss, Bytes read from cache should not change
2936     ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
2937     ASSERT_EQ(props.GetCacheBytesWrite(),
2938               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2939     last_cache_bytes_read = props.GetCacheBytesRead();
2940   }
2941 
2942   // Data block will be in cache
2943   {
2944     iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2945     iter->SeekToFirst();
2946     ASSERT_OK(iter->status());
2947     BlockCachePropertiesSnapshot props(options.statistics.get());
2948     props.AssertEqual(1, 1 + 1, /* index block hit */
2949                       1, 0 + 1 /* data block hit */);
2950     // Cache hit, bytes read from cache should increase
2951     ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2952     ASSERT_EQ(props.GetCacheBytesWrite(),
2953               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2954   }
2955   // release the iterator so that the block cache can reset correctly.
2956   iter.reset();
2957 
2958   c.ResetTableReader();
2959 
2960   // -- PART 2: Open with very small block cache
2961   // In this test, no block will ever get hit since the block cache is
2962   // too small to fit even one entry.
2963   table_options.block_cache = NewLRUCache(1, 4);
2964   options.statistics = CreateDBStatistics();
2965   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2966   const ImmutableOptions ioptions2(options);
2967   const MutableCFOptions moptions2(options);
2968   ASSERT_OK(c.Reopen(ioptions2, moptions2));
2969   {
2970     BlockCachePropertiesSnapshot props(options.statistics.get());
2971     props.AssertEqual(1,  // index block miss
2972                       0, 0, 0);
2973     // Cache miss, Bytes read from cache should not change
2974     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2975   }
2976 
2977   {
2978     // Both index and data block get accessed.
2979     // It first cache index block then data block. But since the cache size
2980     // is only 1, index block will be purged after data block is inserted.
2981     iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2982     BlockCachePropertiesSnapshot props(options.statistics.get());
2983     props.AssertEqual(1 + 1,  // index block miss
2984                       0, 0,   // data block miss
2985                       0);
2986     // Cache hit, bytes read from cache should increase
2987     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2988   }
2989 
2990   {
2991     // SeekToFirst() accesses data block. With similar reason, we expect data
2992     // block's cache miss.
2993     iter->SeekToFirst();
2994     ASSERT_OK(iter->status());
2995     BlockCachePropertiesSnapshot props(options.statistics.get());
2996     props.AssertEqual(2, 0, 0 + 1,  // data block miss
2997                       0);
2998     // Cache miss, Bytes read from cache should not change
2999     ASSERT_EQ(props.GetCacheBytesRead(), 0);
3000   }
3001   iter.reset();
3002   c.ResetTableReader();
3003 
3004   // -- PART 3: Open table with bloom filter enabled but not in SST file
3005   table_options.block_cache = NewLRUCache(4096, 4);
3006   table_options.cache_index_and_filter_blocks = false;
3007   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3008 
3009   TableConstructor c3(BytewiseComparator());
3010   std::string user_key = "k01";
3011   InternalKey internal_key(user_key, 0, kTypeValue);
3012   c3.Add(internal_key.Encode().ToString(), "hello");
3013   ImmutableOptions ioptions3(options);
3014   MutableCFOptions moptions3(options);
3015   // Generate table without filter policy
3016   c3.Finish(options, ioptions3, moptions3, table_options,
3017             GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3018   c3.ResetTableReader();
3019 
3020   // Open table with filter policy
3021   table_options.filter_policy.reset(NewBloomFilterPolicy(1));
3022   options.table_factory.reset(new BlockBasedTableFactory(table_options));
3023   options.statistics = CreateDBStatistics();
3024   ImmutableOptions ioptions4(options);
3025   MutableCFOptions moptions4(options);
3026   ASSERT_OK(c3.Reopen(ioptions4, moptions4));
3027   reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
3028   ASSERT_FALSE(reader->TEST_FilterBlockInCache());
3029   PinnableSlice value;
3030   GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3031                          GetContext::kNotFound, user_key, &value, nullptr,
3032                          nullptr, true, nullptr, nullptr);
3033   ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
3034                         moptions4.prefix_extractor.get()));
3035   ASSERT_STREQ(value.data(), "hello");
3036   BlockCachePropertiesSnapshot props(options.statistics.get());
3037   props.AssertFilterBlockStat(0, 0);
3038   c3.ResetTableReader();
3039 }
3040 
ValidateBlockSizeDeviation(int value,int expected)3041 void ValidateBlockSizeDeviation(int value, int expected) {
3042   BlockBasedTableOptions table_options;
3043   table_options.block_size_deviation = value;
3044   BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
3045 
3046   const BlockBasedTableOptions* normalized_table_options =
3047       factory->GetOptions<BlockBasedTableOptions>();
3048   ASSERT_EQ(normalized_table_options->block_size_deviation, expected);
3049 
3050   delete factory;
3051 }
3052 
ValidateBlockRestartInterval(int value,int expected)3053 void ValidateBlockRestartInterval(int value, int expected) {
3054   BlockBasedTableOptions table_options;
3055   table_options.block_restart_interval = value;
3056   BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
3057 
3058   const BlockBasedTableOptions* normalized_table_options =
3059       factory->GetOptions<BlockBasedTableOptions>();
3060   ASSERT_EQ(normalized_table_options->block_restart_interval, expected);
3061 
3062   delete factory;
3063 }
3064 
TEST_P(BlockBasedTableTest,InvalidOptions)3065 TEST_P(BlockBasedTableTest, InvalidOptions) {
3066   // invalid values for block_size_deviation (<0 or >100) are silently set to 0
3067   ValidateBlockSizeDeviation(-10, 0);
3068   ValidateBlockSizeDeviation(-1, 0);
3069   ValidateBlockSizeDeviation(0, 0);
3070   ValidateBlockSizeDeviation(1, 1);
3071   ValidateBlockSizeDeviation(99, 99);
3072   ValidateBlockSizeDeviation(100, 100);
3073   ValidateBlockSizeDeviation(101, 0);
3074   ValidateBlockSizeDeviation(1000, 0);
3075 
3076   // invalid values for block_restart_interval (<1) are silently set to 1
3077   ValidateBlockRestartInterval(-10, 1);
3078   ValidateBlockRestartInterval(-1, 1);
3079   ValidateBlockRestartInterval(0, 1);
3080   ValidateBlockRestartInterval(1, 1);
3081   ValidateBlockRestartInterval(2, 2);
3082   ValidateBlockRestartInterval(1000, 1000);
3083 }
3084 
TEST_P(BlockBasedTableTest,BlockReadCountTest)3085 TEST_P(BlockBasedTableTest, BlockReadCountTest) {
3086   // bloom_filter_type = 0 -- block-based filter
3087   // bloom_filter_type = 0 -- full filter
3088   for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
3089     for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
3090          ++index_and_filter_in_cache) {
3091       Options options;
3092       options.create_if_missing = true;
3093 
3094       BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3095       table_options.block_cache = NewLRUCache(1, 0);
3096       table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
3097       table_options.filter_policy.reset(
3098           NewBloomFilterPolicy(10, bloom_filter_type == 0));
3099       options.table_factory.reset(new BlockBasedTableFactory(table_options));
3100       std::vector<std::string> keys;
3101       stl_wrappers::KVMap kvmap;
3102 
3103       TableConstructor c(BytewiseComparator());
3104       std::string user_key = "k04";
3105       InternalKey internal_key(user_key, 0, kTypeValue);
3106       std::string encoded_key = internal_key.Encode().ToString();
3107       c.Add(encoded_key, "hello");
3108       ImmutableOptions ioptions(options);
3109       MutableCFOptions moptions(options);
3110       // Generate table with filter policy
3111       c.Finish(options, ioptions, moptions, table_options,
3112                GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3113       auto reader = c.GetTableReader();
3114       PinnableSlice value;
3115       {
3116         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3117                                GetContext::kNotFound, user_key, &value, nullptr,
3118                                nullptr, true, nullptr, nullptr);
3119         get_perf_context()->Reset();
3120         ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
3121                               moptions.prefix_extractor.get()));
3122         if (index_and_filter_in_cache) {
3123           // data, index and filter block
3124           ASSERT_EQ(get_perf_context()->block_read_count, 3);
3125           ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
3126           ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3127         } else {
3128           // just the data block
3129           ASSERT_EQ(get_perf_context()->block_read_count, 1);
3130         }
3131         ASSERT_EQ(get_context.State(), GetContext::kFound);
3132         ASSERT_STREQ(value.data(), "hello");
3133       }
3134 
3135       // Get non-existing key
3136       user_key = "does-not-exist";
3137       internal_key = InternalKey(user_key, 0, kTypeValue);
3138       encoded_key = internal_key.Encode().ToString();
3139 
3140       value.Reset();
3141       {
3142         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3143                                GetContext::kNotFound, user_key, &value, nullptr,
3144                                nullptr, true, nullptr, nullptr);
3145         get_perf_context()->Reset();
3146         ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
3147                               moptions.prefix_extractor.get()));
3148         ASSERT_EQ(get_context.State(), GetContext::kNotFound);
3149       }
3150 
3151       if (index_and_filter_in_cache) {
3152         if (bloom_filter_type == 0) {
3153           // with block-based, we read index and then the filter
3154           ASSERT_EQ(get_perf_context()->block_read_count, 2);
3155           ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
3156           ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3157         } else {
3158           // with full-filter, we read filter first and then we stop
3159           ASSERT_EQ(get_perf_context()->block_read_count, 1);
3160           ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3161         }
3162       } else {
3163         // filter is already in memory and it figures out that the key doesn't
3164         // exist
3165         ASSERT_EQ(get_perf_context()->block_read_count, 0);
3166       }
3167     }
3168   }
3169 }
3170 
TEST_P(BlockBasedTableTest,BlockCacheLeak)3171 TEST_P(BlockBasedTableTest, BlockCacheLeak) {
3172   // Check that when we reopen a table we don't lose access to blocks already
3173   // in the cache. This test checks whether the Table actually makes use of the
3174   // unique ID from the file.
3175 
3176   Options opt;
3177   std::unique_ptr<InternalKeyComparator> ikc;
3178   ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
3179   opt.compression = kNoCompression;
3180   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3181   table_options.block_size = 1024;
3182   // big enough so we don't ever lose cached values.
3183   table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3184   opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3185 
3186   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3187   c.Add("k01", "hello");
3188   c.Add("k02", "hello2");
3189   c.Add("k03", std::string(10000, 'x'));
3190   c.Add("k04", std::string(200000, 'x'));
3191   c.Add("k05", std::string(300000, 'x'));
3192   c.Add("k06", "hello3");
3193   c.Add("k07", std::string(100000, 'x'));
3194   std::vector<std::string> keys;
3195   stl_wrappers::KVMap kvmap;
3196   const ImmutableOptions ioptions(opt);
3197   const MutableCFOptions moptions(opt);
3198   c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
3199 
3200   std::unique_ptr<InternalIterator> iter(
3201       c.NewIterator(moptions.prefix_extractor.get()));
3202   iter->SeekToFirst();
3203   while (iter->Valid()) {
3204     iter->key();
3205     iter->value();
3206     iter->Next();
3207   }
3208   ASSERT_OK(iter->status());
3209   iter.reset();
3210 
3211   const ImmutableOptions ioptions1(opt);
3212   const MutableCFOptions moptions1(opt);
3213   ASSERT_OK(c.Reopen(ioptions1, moptions1));
3214   auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
3215   for (const std::string& key : keys) {
3216     InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
3217     ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
3218   }
3219   c.ResetTableReader();
3220 
3221   // rerun with different block cache
3222   table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3223   opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3224   const ImmutableOptions ioptions2(opt);
3225   const MutableCFOptions moptions2(opt);
3226   ASSERT_OK(c.Reopen(ioptions2, moptions2));
3227   table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
3228   for (const std::string& key : keys) {
3229     InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
3230     ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
3231   }
3232   c.ResetTableReader();
3233 }
3234 
3235 namespace {
3236 class CustomMemoryAllocator : public MemoryAllocator {
3237  public:
Name() const3238   const char* Name() const override { return "CustomMemoryAllocator"; }
3239 
Allocate(size_t size)3240   void* Allocate(size_t size) override {
3241     ++numAllocations;
3242     auto ptr = new char[size + 16];
3243     memcpy(ptr, "memory_allocator_", 16);  // mangle first 16 bytes
3244     return reinterpret_cast<void*>(ptr + 16);
3245   }
Deallocate(void * p)3246   void Deallocate(void* p) override {
3247     ++numDeallocations;
3248     char* ptr = reinterpret_cast<char*>(p) - 16;
3249     delete[] ptr;
3250   }
3251 
3252   std::atomic<int> numAllocations;
3253   std::atomic<int> numDeallocations;
3254 };
3255 }  // namespace
3256 
TEST_P(BlockBasedTableTest,MemoryAllocator)3257 TEST_P(BlockBasedTableTest, MemoryAllocator) {
3258   auto custom_memory_allocator = std::make_shared<CustomMemoryAllocator>();
3259   {
3260     Options opt;
3261     std::unique_ptr<InternalKeyComparator> ikc;
3262     ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
3263     opt.compression = kNoCompression;
3264     BlockBasedTableOptions table_options;
3265     table_options.block_size = 1024;
3266     LRUCacheOptions lruOptions;
3267     lruOptions.memory_allocator = custom_memory_allocator;
3268     lruOptions.capacity = 16 * 1024 * 1024;
3269     lruOptions.num_shard_bits = 4;
3270     table_options.block_cache = NewLRUCache(std::move(lruOptions));
3271     opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3272 
3273     TableConstructor c(BytewiseComparator(),
3274                        true /* convert_to_internal_key_ */);
3275     c.Add("k01", "hello");
3276     c.Add("k02", "hello2");
3277     c.Add("k03", std::string(10000, 'x'));
3278     c.Add("k04", std::string(200000, 'x'));
3279     c.Add("k05", std::string(300000, 'x'));
3280     c.Add("k06", "hello3");
3281     c.Add("k07", std::string(100000, 'x'));
3282     std::vector<std::string> keys;
3283     stl_wrappers::KVMap kvmap;
3284     const ImmutableOptions ioptions(opt);
3285     const MutableCFOptions moptions(opt);
3286     c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
3287 
3288     std::unique_ptr<InternalIterator> iter(
3289         c.NewIterator(moptions.prefix_extractor.get()));
3290     iter->SeekToFirst();
3291     while (iter->Valid()) {
3292       iter->key();
3293       iter->value();
3294       iter->Next();
3295     }
3296     ASSERT_OK(iter->status());
3297   }
3298 
3299   // out of scope, block cache should have been deleted, all allocations
3300   // deallocated
3301   EXPECT_EQ(custom_memory_allocator->numAllocations.load(),
3302             custom_memory_allocator->numDeallocations.load());
3303   // make sure that allocations actually happened through the cache allocator
3304   EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0);
3305 }
3306 
3307 // Test the file checksum of block based table
TEST_P(BlockBasedTableTest,NoFileChecksum)3308 TEST_P(BlockBasedTableTest, NoFileChecksum) {
3309   Options options;
3310   ImmutableOptions ioptions(options);
3311   MutableCFOptions moptions(options);
3312   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3313   std::unique_ptr<InternalKeyComparator> comparator(
3314       new InternalKeyComparator(BytewiseComparator()));
3315   int level = 0;
3316   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3317   std::string column_family_name;
3318 
3319   FileChecksumTestHelper f(true);
3320   f.CreateWriteableFile();
3321   std::unique_ptr<TableBuilder> builder;
3322   builder.reset(ioptions.table_factory->NewTableBuilder(
3323       TableBuilderOptions(ioptions, moptions, *comparator,
3324                           &int_tbl_prop_collector_factories,
3325                           options.compression, options.compression_opts,
3326                           kUnknownColumnFamily, column_family_name, level),
3327       f.GetFileWriter()));
3328   ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3329   f.AddKVtoKVMap(1000);
3330   ASSERT_OK(f.WriteKVAndFlushTable());
3331   ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName);
3332   ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum);
3333 }
3334 
TEST_P(BlockBasedTableTest,Crc32cFileChecksum)3335 TEST_P(BlockBasedTableTest, Crc32cFileChecksum) {
3336   FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
3337       new FileChecksumGenCrc32cFactory();
3338   Options options;
3339   options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3340   ImmutableOptions ioptions(options);
3341   MutableCFOptions moptions(options);
3342   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3343   std::unique_ptr<InternalKeyComparator> comparator(
3344       new InternalKeyComparator(BytewiseComparator()));
3345   int level = 0;
3346   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3347   std::string column_family_name;
3348 
3349   FileChecksumGenContext gen_context;
3350   gen_context.file_name = "db/tmp";
3351   std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
3352       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3353           gen_context);
3354   FileChecksumTestHelper f(true);
3355   f.CreateWriteableFile();
3356   f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
3357   std::unique_ptr<TableBuilder> builder;
3358   builder.reset(ioptions.table_factory->NewTableBuilder(
3359       TableBuilderOptions(ioptions, moptions, *comparator,
3360                           &int_tbl_prop_collector_factories,
3361                           options.compression, options.compression_opts,
3362                           kUnknownColumnFamily, column_family_name, level),
3363       f.GetFileWriter()));
3364   ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3365   f.AddKVtoKVMap(1000);
3366   ASSERT_OK(f.WriteKVAndFlushTable());
3367   ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3368 
3369   std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
3370       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3371           gen_context);
3372   std::string checksum;
3373   ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
3374   ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3375 
3376   // Unit test the generator itself for schema stability
3377   std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen3 =
3378       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3379           gen_context);
3380   const char data[] = "here is some data";
3381   checksum_crc32c_gen3->Update(data, sizeof(data));
3382   checksum_crc32c_gen3->Finalize();
3383   checksum = checksum_crc32c_gen3->GetChecksum();
3384   ASSERT_STREQ(checksum.c_str(), "\345\245\277\110");
3385 }
3386 
3387 // Plain table is not supported in ROCKSDB_LITE
3388 #ifndef ROCKSDB_LITE
TEST_F(PlainTableTest,BasicPlainTableProperties)3389 TEST_F(PlainTableTest, BasicPlainTableProperties) {
3390   PlainTableOptions plain_table_options;
3391   plain_table_options.user_key_len = 8;
3392   plain_table_options.bloom_bits_per_key = 8;
3393   plain_table_options.hash_table_ratio = 0;
3394 
3395   PlainTableFactory factory(plain_table_options);
3396   std::unique_ptr<FSWritableFile> sink(new test::StringSink());
3397   std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
3398       std::move(sink), "" /* don't care */, FileOptions()));
3399   Options options;
3400   const ImmutableOptions ioptions(options);
3401   const MutableCFOptions moptions(options);
3402   InternalKeyComparator ikc(options.comparator);
3403   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3404   std::string column_family_name;
3405   int unknown_level = -1;
3406   std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3407       TableBuilderOptions(ioptions, moptions, ikc,
3408                           &int_tbl_prop_collector_factories, kNoCompression,
3409                           CompressionOptions(), kUnknownColumnFamily,
3410                           column_family_name, unknown_level),
3411       file_writer.get()));
3412 
3413   for (char c = 'a'; c <= 'z'; ++c) {
3414     std::string key(8, c);
3415     key.append("\1       ");  // PlainTable expects internal key structure
3416     std::string value(28, c + 42);
3417     builder->Add(key, value);
3418   }
3419   ASSERT_OK(builder->Finish());
3420   ASSERT_OK(file_writer->Flush());
3421 
3422   test::StringSink* ss =
3423       static_cast<test::StringSink*>(file_writer->writable_file());
3424   std::unique_ptr<FSRandomAccessFile> source(
3425       new test::StringSource(ss->contents(), 72242, true));
3426   std::unique_ptr<RandomAccessFileReader> file_reader(
3427       new RandomAccessFileReader(std::move(source), "test"));
3428 
3429   TableProperties* props = nullptr;
3430   auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
3431                                kPlainTableMagicNumber, ioptions,
3432                                &props, true /* compression_type_missing */);
3433   std::unique_ptr<TableProperties> props_guard(props);
3434   ASSERT_OK(s);
3435 
3436   ASSERT_EQ(0ul, props->index_size);
3437   ASSERT_EQ(0ul, props->filter_size);
3438   ASSERT_EQ(16ul * 26, props->raw_key_size);
3439   ASSERT_EQ(28ul * 26, props->raw_value_size);
3440   ASSERT_EQ(26ul, props->num_entries);
3441   ASSERT_EQ(1ul, props->num_data_blocks);
3442 }
3443 
TEST_F(PlainTableTest,NoFileChecksum)3444 TEST_F(PlainTableTest, NoFileChecksum) {
3445   PlainTableOptions plain_table_options;
3446   plain_table_options.user_key_len = 20;
3447   plain_table_options.bloom_bits_per_key = 8;
3448   plain_table_options.hash_table_ratio = 0;
3449   PlainTableFactory factory(plain_table_options);
3450 
3451   Options options;
3452   const ImmutableOptions ioptions(options);
3453   const MutableCFOptions moptions(options);
3454   InternalKeyComparator ikc(options.comparator);
3455   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3456   std::string column_family_name;
3457   int unknown_level = -1;
3458   FileChecksumTestHelper f(true);
3459   f.CreateWriteableFile();
3460 
3461   std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3462       TableBuilderOptions(ioptions, moptions, ikc,
3463                           &int_tbl_prop_collector_factories, kNoCompression,
3464                           CompressionOptions(), kUnknownColumnFamily,
3465                           column_family_name, unknown_level),
3466       f.GetFileWriter()));
3467   ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3468   f.AddKVtoKVMap(1000);
3469   ASSERT_OK(f.WriteKVAndFlushTable());
3470   ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName);
3471   EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum);
3472 }
3473 
TEST_F(PlainTableTest,Crc32cFileChecksum)3474 TEST_F(PlainTableTest, Crc32cFileChecksum) {
3475   PlainTableOptions plain_table_options;
3476   plain_table_options.user_key_len = 20;
3477   plain_table_options.bloom_bits_per_key = 8;
3478   plain_table_options.hash_table_ratio = 0;
3479   PlainTableFactory factory(plain_table_options);
3480 
3481   FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
3482       new FileChecksumGenCrc32cFactory();
3483   Options options;
3484   options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3485   const ImmutableOptions ioptions(options);
3486   const MutableCFOptions moptions(options);
3487   InternalKeyComparator ikc(options.comparator);
3488   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3489   std::string column_family_name;
3490   int unknown_level = -1;
3491 
3492   FileChecksumGenContext gen_context;
3493   gen_context.file_name = "db/tmp";
3494   std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
3495       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3496           gen_context);
3497   FileChecksumTestHelper f(true);
3498   f.CreateWriteableFile();
3499   f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
3500 
3501   std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3502       TableBuilderOptions(ioptions, moptions, ikc,
3503                           &int_tbl_prop_collector_factories, kNoCompression,
3504                           CompressionOptions(), kUnknownColumnFamily,
3505                           column_family_name, unknown_level),
3506       f.GetFileWriter()));
3507   ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3508   f.AddKVtoKVMap(1000);
3509   ASSERT_OK(f.WriteKVAndFlushTable());
3510   ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3511 
3512   std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
3513       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3514           gen_context);
3515   std::string checksum;
3516   ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
3517   EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3518 }
3519 
3520 #endif  // !ROCKSDB_LITE
3521 
TEST_F(GeneralTableTest,ApproximateOffsetOfPlain)3522 TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
3523   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3524   c.Add("k01", "hello");
3525   c.Add("k02", "hello2");
3526   c.Add("k03", std::string(10000, 'x'));
3527   c.Add("k04", std::string(200000, 'x'));
3528   c.Add("k05", std::string(300000, 'x'));
3529   c.Add("k06", "hello3");
3530   c.Add("k07", std::string(100000, 'x'));
3531   std::vector<std::string> keys;
3532   stl_wrappers::KVMap kvmap;
3533   Options options;
3534   options.db_host_id = "";
3535   test::PlainInternalKeyComparator internal_comparator(options.comparator);
3536   options.compression = kNoCompression;
3537   BlockBasedTableOptions table_options;
3538   table_options.block_size = 1024;
3539   const ImmutableOptions ioptions(options);
3540   const MutableCFOptions moptions(options);
3541   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
3542            &keys, &kvmap);
3543 
3544   ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
3545   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
3546   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"),      0,      0));
3547   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
3548   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),       0,      0));
3549   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),   10000,  11000));
3550   // k04 and k05 will be in two consecutive blocks, the index is
3551   // an arbitrary slice between k04 and k05, either before or after k04a
3552   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
3553   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"),  210000, 211000));
3554   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"),  510000, 511000));
3555   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"),  510000, 511000));
3556   ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 612000));
3557   c.ResetTableReader();
3558 }
3559 
DoCompressionTest(CompressionType comp)3560 static void DoCompressionTest(CompressionType comp) {
3561   Random rnd(301);
3562   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3563   std::string tmp;
3564   c.Add("k01", "hello");
3565   c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
3566   c.Add("k03", "hello3");
3567   c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
3568   std::vector<std::string> keys;
3569   stl_wrappers::KVMap kvmap;
3570   Options options;
3571   test::PlainInternalKeyComparator ikc(options.comparator);
3572   options.compression = comp;
3573   BlockBasedTableOptions table_options;
3574   table_options.block_size = 1024;
3575   const ImmutableOptions ioptions(options);
3576   const MutableCFOptions moptions(options);
3577   c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
3578 
3579   ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
3580   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
3581   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
3582   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3500));
3583   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3500));
3584   ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7000));
3585   c.ResetTableReader();
3586 }
3587 
TEST_F(GeneralTableTest,ApproximateOffsetOfCompressed)3588 TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
3589   std::vector<CompressionType> compression_state;
3590   if (!Snappy_Supported()) {
3591     fprintf(stderr, "skipping snappy compression tests\n");
3592   } else {
3593     compression_state.push_back(kSnappyCompression);
3594   }
3595 
3596   if (!Zlib_Supported()) {
3597     fprintf(stderr, "skipping zlib compression tests\n");
3598   } else {
3599     compression_state.push_back(kZlibCompression);
3600   }
3601 
3602   // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
3603   /*
3604   if (!BZip2_Supported()) {
3605     fprintf(stderr, "skipping bzip2 compression tests\n");
3606   } else {
3607     compression_state.push_back(kBZip2Compression);
3608   }
3609   */
3610 
3611   if (!LZ4_Supported()) {
3612     fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
3613   } else {
3614     compression_state.push_back(kLZ4Compression);
3615     compression_state.push_back(kLZ4HCCompression);
3616   }
3617 
3618   if (!XPRESS_Supported()) {
3619     fprintf(stderr, "skipping xpress and xpress compression tests\n");
3620   }
3621   else {
3622     compression_state.push_back(kXpressCompression);
3623   }
3624 
3625   for (auto state : compression_state) {
3626     DoCompressionTest(state);
3627   }
3628 }
3629 
3630 #ifndef ROCKSDB_VALGRIND_RUN
TEST_P(ParameterizedHarnessTest,RandomizedHarnessTest)3631 TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) {
3632   Random rnd(test::RandomSeed() + 5);
3633   for (int num_entries = 0; num_entries < 2000;
3634        num_entries += (num_entries < 50 ? 1 : 200)) {
3635     for (int e = 0; e < num_entries; e++) {
3636       Add(test::RandomKey(&rnd, rnd.Skewed(4)),
3637           rnd.RandomString(rnd.Skewed(5)));
3638     }
3639     Test(&rnd);
3640   }
3641 }
3642 
3643 #ifndef ROCKSDB_LITE
TEST_F(DBHarnessTest,RandomizedLongDB)3644 TEST_F(DBHarnessTest, RandomizedLongDB) {
3645   Random rnd(test::RandomSeed());
3646   int num_entries = 100000;
3647   for (int e = 0; e < num_entries; e++) {
3648     std::string v;
3649     Add(test::RandomKey(&rnd, rnd.Skewed(4)), rnd.RandomString(rnd.Skewed(5)));
3650   }
3651   Test(&rnd);
3652 
3653   // We must have created enough data to force merging
3654   int files = 0;
3655   for (int level = 0; level < db()->NumberLevels(); level++) {
3656     std::string value;
3657     char name[100];
3658     snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
3659     ASSERT_TRUE(db()->GetProperty(name, &value));
3660     files += atoi(value.c_str());
3661   }
3662   ASSERT_GT(files, 0);
3663 }
3664 #endif  // ROCKSDB_LITE
3665 #endif  // ROCKSDB_VALGRIND_RUN
3666 
3667 class MemTableTest : public testing::Test {
3668  public:
MemTableTest()3669   MemTableTest() {
3670     InternalKeyComparator cmp(BytewiseComparator());
3671     auto table_factory = std::make_shared<SkipListFactory>();
3672     options_.memtable_factory = table_factory;
3673     ImmutableOptions ioptions(options_);
3674     wb_ = new WriteBufferManager(options_.db_write_buffer_size);
3675     memtable_ = new MemTable(cmp, ioptions, MutableCFOptions(options_), wb_,
3676                              kMaxSequenceNumber, 0 /* column_family_id */);
3677     memtable_->Ref();
3678   }
3679 
~MemTableTest()3680   ~MemTableTest() {
3681     delete memtable_->Unref();
3682     delete wb_;
3683   }
3684 
GetMemTable()3685   MemTable* GetMemTable() { return memtable_; }
3686 
3687  private:
3688   MemTable* memtable_;
3689   Options options_;
3690   WriteBufferManager* wb_;
3691 };
3692 
TEST_F(MemTableTest,Simple)3693 TEST_F(MemTableTest, Simple) {
3694   WriteBatch batch;
3695   WriteBatchInternal::SetSequence(&batch, 100);
3696   ASSERT_OK(batch.Put(std::string("k1"), std::string("v1")));
3697   ASSERT_OK(batch.Put(std::string("k2"), std::string("v2")));
3698   ASSERT_OK(batch.Put(std::string("k3"), std::string("v3")));
3699   ASSERT_OK(batch.Put(std::string("largekey"), std::string("vlarge")));
3700   ASSERT_OK(batch.DeleteRange(std::string("chi"), std::string("xigua")));
3701   ASSERT_OK(batch.DeleteRange(std::string("begin"), std::string("end")));
3702   ColumnFamilyMemTablesDefault cf_mems_default(GetMemTable());
3703   ASSERT_TRUE(
3704       WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr)
3705           .ok());
3706 
3707   for (int i = 0; i < 2; ++i) {
3708     Arena arena;
3709     ScopedArenaIterator arena_iter_guard;
3710     std::unique_ptr<InternalIterator> iter_guard;
3711     InternalIterator* iter;
3712     if (i == 0) {
3713       iter = GetMemTable()->NewIterator(ReadOptions(), &arena);
3714       arena_iter_guard.set(iter);
3715     } else {
3716       iter = GetMemTable()->NewRangeTombstoneIterator(
3717           ReadOptions(), kMaxSequenceNumber /* read_seq */);
3718       iter_guard.reset(iter);
3719     }
3720     if (iter == nullptr) {
3721       continue;
3722     }
3723     iter->SeekToFirst();
3724     while (iter->Valid()) {
3725       fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
3726               iter->value().ToString().c_str());
3727       iter->Next();
3728     }
3729   }
3730 }
3731 
3732 // Test the empty key
TEST_P(ParameterizedHarnessTest,SimpleEmptyKey)3733 TEST_P(ParameterizedHarnessTest, SimpleEmptyKey) {
3734   Random rnd(test::RandomSeed() + 1);
3735   Add("", "v");
3736   Test(&rnd);
3737 }
3738 
TEST_P(ParameterizedHarnessTest,SimpleSingle)3739 TEST_P(ParameterizedHarnessTest, SimpleSingle) {
3740   Random rnd(test::RandomSeed() + 2);
3741   Add("abc", "v");
3742   Test(&rnd);
3743 }
3744 
TEST_P(ParameterizedHarnessTest,SimpleMulti)3745 TEST_P(ParameterizedHarnessTest, SimpleMulti) {
3746   Random rnd(test::RandomSeed() + 3);
3747   Add("abc", "v");
3748   Add("abcd", "v");
3749   Add("ac", "v2");
3750   Test(&rnd);
3751 }
3752 
TEST_P(ParameterizedHarnessTest,SimpleSpecialKey)3753 TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) {
3754   Random rnd(test::RandomSeed() + 4);
3755   Add("\xff\xff", "v3");
3756   Test(&rnd);
3757 }
3758 
TEST(TableTest,FooterTests)3759 TEST(TableTest, FooterTests) {
3760   {
3761     // upconvert legacy block based
3762     std::string encoded;
3763     Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
3764     BlockHandle meta_index(10, 5), index(20, 15);
3765     footer.set_metaindex_handle(meta_index);
3766     footer.set_index_handle(index);
3767     footer.EncodeTo(&encoded);
3768     Footer decoded_footer;
3769     Slice encoded_slice(encoded);
3770     ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3771     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3772     ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3773     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3774     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3775     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3776     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3777     ASSERT_EQ(decoded_footer.version(), 0U);
3778   }
3779   {
3780     // xxhash block based
3781     std::string encoded;
3782     Footer footer(kBlockBasedTableMagicNumber, 1);
3783     BlockHandle meta_index(10, 5), index(20, 15);
3784     footer.set_metaindex_handle(meta_index);
3785     footer.set_index_handle(index);
3786     footer.set_checksum(kxxHash);
3787     footer.EncodeTo(&encoded);
3788     Footer decoded_footer;
3789     Slice encoded_slice(encoded);
3790     ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3791     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3792     ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3793     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3794     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3795     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3796     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3797     ASSERT_EQ(decoded_footer.version(), 1U);
3798   }
3799   {
3800     // xxhash64 block based
3801     std::string encoded;
3802     Footer footer(kBlockBasedTableMagicNumber, 1);
3803     BlockHandle meta_index(10, 5), index(20, 15);
3804     footer.set_metaindex_handle(meta_index);
3805     footer.set_index_handle(index);
3806     footer.set_checksum(kxxHash64);
3807     footer.EncodeTo(&encoded);
3808     Footer decoded_footer;
3809     Slice encoded_slice(encoded);
3810     ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3811     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3812     ASSERT_EQ(decoded_footer.checksum(), kxxHash64);
3813     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3814     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3815     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3816     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3817     ASSERT_EQ(decoded_footer.version(), 1U);
3818   }
3819 // Plain table is not supported in ROCKSDB_LITE
3820 #ifndef ROCKSDB_LITE
3821   {
3822     // upconvert legacy plain table
3823     std::string encoded;
3824     Footer footer(kLegacyPlainTableMagicNumber, 0);
3825     BlockHandle meta_index(10, 5), index(20, 15);
3826     footer.set_metaindex_handle(meta_index);
3827     footer.set_index_handle(index);
3828     footer.EncodeTo(&encoded);
3829     Footer decoded_footer;
3830     Slice encoded_slice(encoded);
3831     ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3832     ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3833     ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3834     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3835     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3836     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3837     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3838     ASSERT_EQ(decoded_footer.version(), 0U);
3839   }
3840   {
3841     // xxhash block based
3842     std::string encoded;
3843     Footer footer(kPlainTableMagicNumber, 1);
3844     BlockHandle meta_index(10, 5), index(20, 15);
3845     footer.set_metaindex_handle(meta_index);
3846     footer.set_index_handle(index);
3847     footer.set_checksum(kxxHash);
3848     footer.EncodeTo(&encoded);
3849     Footer decoded_footer;
3850     Slice encoded_slice(encoded);
3851     ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3852     ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3853     ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3854     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3855     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3856     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3857     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3858     ASSERT_EQ(decoded_footer.version(), 1U);
3859   }
3860 #endif  // !ROCKSDB_LITE
3861   {
3862     // version == 2
3863     std::string encoded;
3864     Footer footer(kBlockBasedTableMagicNumber, 2);
3865     BlockHandle meta_index(10, 5), index(20, 15);
3866     footer.set_metaindex_handle(meta_index);
3867     footer.set_index_handle(index);
3868     footer.EncodeTo(&encoded);
3869     Footer decoded_footer;
3870     Slice encoded_slice(encoded);
3871     ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3872     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3873     ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3874     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3875     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3876     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3877     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3878     ASSERT_EQ(decoded_footer.version(), 2U);
3879   }
3880 }
3881 
3882 class IndexBlockRestartIntervalTest
3883     : public TableTest,
3884       public ::testing::WithParamInterface<std::pair<int, bool>> {
3885  public:
GetRestartValues()3886   static std::vector<std::pair<int, bool>> GetRestartValues() {
3887     return {{-1, false}, {0, false},  {1, false}, {8, false},
3888             {16, false}, {32, false}, {-1, true}, {0, true},
3889             {1, true},   {8, true},   {16, true}, {32, true}};
3890   }
3891 };
3892 
3893 INSTANTIATE_TEST_CASE_P(
3894     IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
3895     ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
3896 
TEST_P(IndexBlockRestartIntervalTest,IndexBlockRestartInterval)3897 TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
3898   const int kKeysInTable = 10000;
3899   const int kKeySize = 100;
3900   const int kValSize = 500;
3901 
3902   const int index_block_restart_interval = std::get<0>(GetParam());
3903   const bool value_delta_encoding = std::get<1>(GetParam());
3904 
3905   Options options;
3906   BlockBasedTableOptions table_options;
3907   table_options.block_size = 64;  // small block size to get big index block
3908   table_options.index_block_restart_interval = index_block_restart_interval;
3909   if (value_delta_encoding) {
3910     table_options.format_version = 4;
3911   } else {
3912     table_options.format_version = 3;
3913   }
3914   options.table_factory.reset(new BlockBasedTableFactory(table_options));
3915 
3916   TableConstructor c(BytewiseComparator());
3917   static Random rnd(301);
3918   for (int i = 0; i < kKeysInTable; i++) {
3919     InternalKey k(rnd.RandomString(kKeySize), 0, kTypeValue);
3920     c.Add(k.Encode().ToString(), rnd.RandomString(kValSize));
3921   }
3922 
3923   std::vector<std::string> keys;
3924   stl_wrappers::KVMap kvmap;
3925   std::unique_ptr<InternalKeyComparator> comparator(
3926       new InternalKeyComparator(BytewiseComparator()));
3927   const ImmutableOptions ioptions(options);
3928   const MutableCFOptions moptions(options);
3929   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
3930            &kvmap);
3931   auto reader = c.GetTableReader();
3932 
3933   ReadOptions read_options;
3934   std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
3935       read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
3936       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
3937 
3938   // Test point lookup
3939   for (auto& kv : kvmap) {
3940     db_iter->Seek(kv.first);
3941 
3942     ASSERT_TRUE(db_iter->Valid());
3943     ASSERT_OK(db_iter->status());
3944     ASSERT_EQ(db_iter->key(), kv.first);
3945     ASSERT_EQ(db_iter->value(), kv.second);
3946   }
3947 
3948   // Test iterating
3949   auto kv_iter = kvmap.begin();
3950   for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3951     ASSERT_EQ(db_iter->key(), kv_iter->first);
3952     ASSERT_EQ(db_iter->value(), kv_iter->second);
3953     kv_iter++;
3954   }
3955   ASSERT_EQ(kv_iter, kvmap.end());
3956   c.ResetTableReader();
3957 }
3958 
3959 class PrefixTest : public testing::Test {
3960  public:
PrefixTest()3961   PrefixTest() : testing::Test() {}
~PrefixTest()3962   ~PrefixTest() override {}
3963 };
3964 
3965 namespace {
3966 // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
3967 class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform {
3968  public:
~TestPrefixExtractor()3969   ~TestPrefixExtractor() override{};
Name() const3970   const char* Name() const override { return "TestPrefixExtractor"; }
3971 
Transform(const ROCKSDB_NAMESPACE::Slice & src) const3972   ROCKSDB_NAMESPACE::Slice Transform(
3973       const ROCKSDB_NAMESPACE::Slice& src) const override {
3974     assert(IsValid(src));
3975     return ROCKSDB_NAMESPACE::Slice(src.data(), 3);
3976   }
3977 
InDomain(const ROCKSDB_NAMESPACE::Slice & src) const3978   bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override {
3979     return IsValid(src);
3980   }
3981 
InRange(const ROCKSDB_NAMESPACE::Slice &) const3982   bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override {
3983     return true;
3984   }
3985 
IsValid(const ROCKSDB_NAMESPACE::Slice & src) const3986   bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const {
3987     if (src.size() != 4) {
3988       return false;
3989     }
3990     if (src[0] != '[') {
3991       return false;
3992     }
3993     if (src[1] < '0' || src[1] > '9') {
3994       return false;
3995     }
3996     if (src[2] != ']') {
3997       return false;
3998     }
3999     if (src[3] < '0' || src[3] > '9') {
4000       return false;
4001     }
4002     return true;
4003   }
4004 };
4005 }  // namespace
4006 
TEST_F(PrefixTest,PrefixAndWholeKeyTest)4007 TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
4008   ROCKSDB_NAMESPACE::Options options;
4009   options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal;
4010   options.num_levels = 20;
4011   options.create_if_missing = true;
4012   options.optimize_filters_for_hits = false;
4013   options.target_file_size_base = 268435456;
4014   options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
4015   ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto;
4016   bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
4017   bbto.block_size = 262144;
4018   bbto.whole_key_filtering = true;
4019 
4020   const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
4021   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4022   ASSERT_OK(DestroyDB(kDBPath, options));
4023   ROCKSDB_NAMESPACE::DB* db;
4024   ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4025 
4026   // Create a bunch of keys with 10 filters.
4027   for (int i = 0; i < 10; i++) {
4028     std::string prefix = "[" + std::to_string(i) + "]";
4029     for (int j = 0; j < 10; j++) {
4030       std::string key = prefix + std::to_string(j);
4031       ASSERT_OK(db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1"));
4032     }
4033   }
4034 
4035   // Trigger compaction.
4036   ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4037   delete db;
4038   // In the second round, turn whole_key_filtering off and expect
4039   // rocksdb still works.
4040 }
4041 
4042 /*
4043  * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
4044  * the SST file any more. Instead, RocksDB deduces global_seqno from the
4045  * MANIFEST while reading from an SST. Therefore, it's not possible to test the
4046  * functionality of global_seqno in a single, isolated unit test without the
4047  * involvement of Version, VersionSet, etc.
4048  */
TEST_P(BlockBasedTableTest,DISABLED_TableWithGlobalSeqno)4049 TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
4050   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4051   test::StringSink* sink = new test::StringSink();
4052   std::unique_ptr<FSWritableFile> holder(sink);
4053   std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
4054       std::move(holder), "" /* don't care */, FileOptions()));
4055   Options options;
4056   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4057   const ImmutableOptions ioptions(options);
4058   const MutableCFOptions moptions(options);
4059   InternalKeyComparator ikc(options.comparator);
4060   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
4061   int_tbl_prop_collector_factories.emplace_back(
4062       new SstFileWriterPropertiesCollectorFactory(2 /* version */,
4063                                                   0 /* global_seqno*/));
4064   std::string column_family_name;
4065   std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4066       TableBuilderOptions(ioptions, moptions, ikc,
4067                           &int_tbl_prop_collector_factories, kNoCompression,
4068                           CompressionOptions(), kUnknownColumnFamily,
4069                           column_family_name, -1),
4070       file_writer.get()));
4071 
4072   for (char c = 'a'; c <= 'z'; ++c) {
4073     std::string key(8, c);
4074     std::string value = key;
4075     InternalKey ik(key, 0, kTypeValue);
4076 
4077     builder->Add(ik.Encode(), value);
4078   }
4079   ASSERT_OK(builder->Finish());
4080   ASSERT_OK(file_writer->Flush());
4081 
4082   test::RandomRWStringSink ss_rw(sink);
4083   uint32_t version;
4084   uint64_t global_seqno;
4085   uint64_t global_seqno_offset;
4086 
4087   // Helper function to get version, global_seqno, global_seqno_offset
4088   std::function<void()> GetVersionAndGlobalSeqno = [&]() {
4089     std::unique_ptr<FSRandomAccessFile> source(
4090         new test::StringSource(ss_rw.contents(), 73342, true));
4091     std::unique_ptr<RandomAccessFileReader> file_reader(
4092         new RandomAccessFileReader(std::move(source), ""));
4093 
4094     TableProperties* props = nullptr;
4095     ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
4096                                   kBlockBasedTableMagicNumber, ioptions,
4097                                   &props, true /* compression_type_missing */));
4098 
4099     UserCollectedProperties user_props = props->user_collected_properties;
4100     version = DecodeFixed32(
4101         user_props[ExternalSstFilePropertyNames::kVersion].c_str());
4102     global_seqno = DecodeFixed64(
4103         user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
4104     global_seqno_offset =
4105         props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];
4106 
4107     delete props;
4108   };
4109 
4110   // Helper function to update the value of the global seqno in the file
4111   std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
4112     std::string new_global_seqno;
4113     PutFixed64(&new_global_seqno, val);
4114 
4115     ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno, IOOptions(),
4116                           nullptr));
4117   };
4118 
4119   // Helper function to get the contents of the table InternalIterator
4120   std::unique_ptr<TableReader> table_reader;
4121   const ReadOptions read_options;
4122   std::function<InternalIterator*()> GetTableInternalIter = [&]() {
4123     std::unique_ptr<FSRandomAccessFile> source(
4124         new test::StringSource(ss_rw.contents(), 73342, true));
4125     std::unique_ptr<RandomAccessFileReader> file_reader(
4126         new RandomAccessFileReader(std::move(source), ""));
4127 
4128     options.table_factory->NewTableReader(
4129         TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
4130                            EnvOptions(), ikc),
4131         std::move(file_reader), ss_rw.contents().size(), &table_reader);
4132 
4133     return table_reader->NewIterator(
4134         read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
4135         /*skip_filters=*/false, TableReaderCaller::kUncategorized);
4136   };
4137 
4138   GetVersionAndGlobalSeqno();
4139   ASSERT_EQ(2u, version);
4140   ASSERT_EQ(0u, global_seqno);
4141 
4142   InternalIterator* iter = GetTableInternalIter();
4143   char current_c = 'a';
4144   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4145     ParsedInternalKey pik;
4146     ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4147 
4148     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4149     ASSERT_EQ(pik.sequence, 0);
4150     ASSERT_EQ(pik.user_key, iter->value());
4151     ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4152     current_c++;
4153   }
4154   ASSERT_EQ(current_c, 'z' + 1);
4155   delete iter;
4156 
4157   // Update global sequence number to 10
4158   SetGlobalSeqno(10);
4159   GetVersionAndGlobalSeqno();
4160   ASSERT_EQ(2u, version);
4161   ASSERT_EQ(10u, global_seqno);
4162 
4163   iter = GetTableInternalIter();
4164   current_c = 'a';
4165   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4166     ParsedInternalKey pik;
4167     ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4168 
4169     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4170     ASSERT_EQ(pik.sequence, 10);
4171     ASSERT_EQ(pik.user_key, iter->value());
4172     ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4173     current_c++;
4174   }
4175   ASSERT_EQ(current_c, 'z' + 1);
4176 
4177   // Verify Seek
4178   for (char c = 'a'; c <= 'z'; c++) {
4179     std::string k = std::string(8, c);
4180     InternalKey ik(k, 10, kValueTypeForSeek);
4181     iter->Seek(ik.Encode());
4182     ASSERT_TRUE(iter->Valid());
4183 
4184     ParsedInternalKey pik;
4185     ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4186 
4187     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4188     ASSERT_EQ(pik.sequence, 10);
4189     ASSERT_EQ(pik.user_key.ToString(), k);
4190     ASSERT_EQ(iter->value().ToString(), k);
4191   }
4192   delete iter;
4193 
4194   // Update global sequence number to 3
4195   SetGlobalSeqno(3);
4196   GetVersionAndGlobalSeqno();
4197   ASSERT_EQ(2u, version);
4198   ASSERT_EQ(3u, global_seqno);
4199 
4200   iter = GetTableInternalIter();
4201   current_c = 'a';
4202   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4203     ParsedInternalKey pik;
4204     ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4205 
4206     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4207     ASSERT_EQ(pik.sequence, 3);
4208     ASSERT_EQ(pik.user_key, iter->value());
4209     ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4210     current_c++;
4211   }
4212   ASSERT_EQ(current_c, 'z' + 1);
4213 
4214   // Verify Seek
4215   for (char c = 'a'; c <= 'z'; c++) {
4216     std::string k = std::string(8, c);
4217     // seqno=4 is less than 3 so we still should get our key
4218     InternalKey ik(k, 4, kValueTypeForSeek);
4219     iter->Seek(ik.Encode());
4220     ASSERT_TRUE(iter->Valid());
4221 
4222     ParsedInternalKey pik;
4223     ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4224 
4225     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4226     ASSERT_EQ(pik.sequence, 3);
4227     ASSERT_EQ(pik.user_key.ToString(), k);
4228     ASSERT_EQ(iter->value().ToString(), k);
4229   }
4230 
4231   delete iter;
4232 }
4233 
TEST_P(BlockBasedTableTest,BlockAlignTest)4234 TEST_P(BlockBasedTableTest, BlockAlignTest) {
4235   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4236   bbto.block_align = true;
4237   test::StringSink* sink = new test::StringSink();
4238   std::unique_ptr<FSWritableFile> holder(sink);
4239   std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
4240       std::move(holder), "" /* don't care */, FileOptions()));
4241   Options options;
4242   options.compression = kNoCompression;
4243   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4244   const ImmutableOptions ioptions(options);
4245   const MutableCFOptions moptions(options);
4246   InternalKeyComparator ikc(options.comparator);
4247   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
4248   std::string column_family_name;
4249   std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4250       TableBuilderOptions(ioptions, moptions, ikc,
4251                           &int_tbl_prop_collector_factories, kNoCompression,
4252                           CompressionOptions(), kUnknownColumnFamily,
4253                           column_family_name, -1),
4254       file_writer.get()));
4255 
4256   for (int i = 1; i <= 10000; ++i) {
4257     std::ostringstream ostr;
4258     ostr << std::setfill('0') << std::setw(5) << i;
4259     std::string key = ostr.str();
4260     std::string value = "val";
4261     InternalKey ik(key, 0, kTypeValue);
4262 
4263     builder->Add(ik.Encode(), value);
4264   }
4265   ASSERT_OK(builder->Finish());
4266   ASSERT_OK(file_writer->Flush());
4267 
4268   std::unique_ptr<FSRandomAccessFile> source(
4269       new test::StringSource(sink->contents(), 73342, false));
4270   std::unique_ptr<RandomAccessFileReader> file_reader(
4271       new RandomAccessFileReader(std::move(source), "test"));
4272   // Helper function to get version, global_seqno, global_seqno_offset
4273   std::function<void()> VerifyBlockAlignment = [&]() {
4274     TableProperties* props = nullptr;
4275     ASSERT_OK(ReadTableProperties(file_reader.get(), sink->contents().size(),
4276                                   kBlockBasedTableMagicNumber, ioptions, &props,
4277                                   true /* compression_type_missing */));
4278 
4279     uint64_t data_block_size = props->data_size / props->num_data_blocks;
4280     ASSERT_EQ(data_block_size, 4096);
4281     ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
4282     delete props;
4283   };
4284 
4285   VerifyBlockAlignment();
4286 
4287   // The below block of code verifies that we can read back the keys. Set
4288   // block_align to false when creating the reader to ensure we can flip between
4289   // the two modes without any issues
4290   std::unique_ptr<TableReader> table_reader;
4291   bbto.block_align = false;
4292   Options options2;
4293   options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
4294   ImmutableOptions ioptions2(options2);
4295   const MutableCFOptions moptions2(options2);
4296 
4297   ASSERT_OK(ioptions.table_factory->NewTableReader(
4298       TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
4299                          EnvOptions(),
4300                          GetPlainInternalComparator(options2.comparator)),
4301       std::move(file_reader), sink->contents().size(), &table_reader));
4302 
4303   ReadOptions read_options;
4304   std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
4305       read_options, moptions2.prefix_extractor.get(), /*arena=*/nullptr,
4306       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4307 
4308   int expected_key = 1;
4309   for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
4310     std::ostringstream ostr;
4311     ostr << std::setfill('0') << std::setw(5) << expected_key++;
4312     std::string key = ostr.str();
4313     std::string value = "val";
4314 
4315     ASSERT_OK(db_iter->status());
4316     ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
4317     ASSERT_EQ(db_iter->value().ToString(), value);
4318   }
4319   expected_key--;
4320   ASSERT_EQ(expected_key, 10000);
4321   table_reader.reset();
4322 }
4323 
TEST_P(BlockBasedTableTest,PropertiesBlockRestartPointTest)4324 TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
4325   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4326   bbto.block_align = true;
4327   test::StringSink* sink = new test::StringSink();
4328   std::unique_ptr<FSWritableFile> holder(sink);
4329   std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
4330       std::move(holder), "" /* don't care */, FileOptions()));
4331 
4332   Options options;
4333   options.compression = kNoCompression;
4334   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4335 
4336   const ImmutableOptions ioptions(options);
4337   const MutableCFOptions moptions(options);
4338   InternalKeyComparator ikc(options.comparator);
4339   IntTblPropCollectorFactories int_tbl_prop_collector_factories;
4340   std::string column_family_name;
4341 
4342   std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4343       TableBuilderOptions(ioptions, moptions, ikc,
4344                           &int_tbl_prop_collector_factories, kNoCompression,
4345                           CompressionOptions(), kUnknownColumnFamily,
4346                           column_family_name, -1),
4347       file_writer.get()));
4348 
4349   for (int i = 1; i <= 10000; ++i) {
4350     std::ostringstream ostr;
4351     ostr << std::setfill('0') << std::setw(5) << i;
4352     std::string key = ostr.str();
4353     std::string value = "val";
4354     InternalKey ik(key, 0, kTypeValue);
4355 
4356     builder->Add(ik.Encode(), value);
4357   }
4358   ASSERT_OK(builder->Finish());
4359   ASSERT_OK(file_writer->Flush());
4360 
4361   std::unique_ptr<FSRandomAccessFile> source(
4362       new test::StringSource(sink->contents(), 73342, true));
4363   std::unique_ptr<RandomAccessFileReader> file_reader(
4364       new RandomAccessFileReader(std::move(source), "test"));
4365 
4366   {
4367     RandomAccessFileReader* file = file_reader.get();
4368     uint64_t file_size = sink->contents().size();
4369 
4370     Footer footer;
4371     IOOptions opts;
4372     ASSERT_OK(ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */,
4373                                  file_size, &footer,
4374                                  kBlockBasedTableMagicNumber));
4375 
4376     auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
4377                                 BlockContents* contents) {
4378       ReadOptions read_options;
4379       read_options.verify_checksums = false;
4380       PersistentCacheOptions cache_options;
4381 
4382       BlockFetcher block_fetcher(
4383           file, nullptr /* prefetch_buffer */, footer, read_options, handle,
4384           contents, ioptions, false /* decompress */,
4385           false /*maybe_compressed*/, block_type,
4386           UncompressionDict::GetEmptyDict(), cache_options);
4387 
4388       ASSERT_OK(block_fetcher.ReadBlockContents());
4389     };
4390 
4391     // -- Read metaindex block
4392     auto metaindex_handle = footer.metaindex_handle();
4393     BlockContents metaindex_contents;
4394 
4395     BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
4396                      &metaindex_contents);
4397     Block metaindex_block(std::move(metaindex_contents));
4398 
4399     std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
4400         BytewiseComparator(), kDisableGlobalSequenceNumber));
4401     bool found_properties_block = true;
4402     ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
4403     ASSERT_TRUE(found_properties_block);
4404 
4405     // -- Read properties block
4406     Slice v = meta_iter->value();
4407     BlockHandle properties_handle;
4408     ASSERT_OK(properties_handle.DecodeFrom(&v));
4409     BlockContents properties_contents;
4410 
4411     BlockFetchHelper(properties_handle, BlockType::kProperties,
4412                      &properties_contents);
4413     Block properties_block(std::move(properties_contents));
4414 
4415     ASSERT_EQ(properties_block.NumRestarts(), 1u);
4416   }
4417 }
4418 
TEST_P(BlockBasedTableTest,PropertiesMetaBlockLast)4419 TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
4420   // The properties meta-block should come at the end since we always need to
4421   // read it when opening a file, unlike index/filter/other meta-blocks, which
4422   // are sometimes read depending on the user's configuration. This ordering
4423   // allows us to do a small readahead on the end of the file to read properties
4424   // and meta-index blocks with one I/O.
4425   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
4426   c.Add("a1", "val1");
4427   c.Add("b2", "val2");
4428   c.Add("c3", "val3");
4429   c.Add("d4", "val4");
4430   c.Add("e5", "val5");
4431   c.Add("f6", "val6");
4432   c.Add("g7", "val7");
4433   c.Add("h8", "val8");
4434   c.Add("j9", "val9");
4435 
4436   // write an SST file
4437   Options options;
4438   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
4439   table_options.filter_policy.reset(NewBloomFilterPolicy(
4440       8 /* bits_per_key */, false /* use_block_based_filter */));
4441   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
4442   ImmutableOptions ioptions(options);
4443   MutableCFOptions moptions(options);
4444   std::vector<std::string> keys;
4445   stl_wrappers::KVMap kvmap;
4446   c.Finish(options, ioptions, moptions, table_options,
4447            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
4448 
4449   // get file reader
4450   test::StringSink* table_sink = c.TEST_GetSink();
4451   std::unique_ptr<FSRandomAccessFile> source(new test::StringSource(
4452       table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */));
4453 
4454   std::unique_ptr<RandomAccessFileReader> table_reader(
4455       new RandomAccessFileReader(std::move(source), "test"));
4456   size_t table_size = table_sink->contents().size();
4457 
4458   // read footer
4459   Footer footer;
4460   IOOptions opts;
4461   ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(),
4462                                nullptr /* prefetch_buffer */, table_size,
4463                                &footer, kBlockBasedTableMagicNumber));
4464 
4465   // read metaindex
4466   auto metaindex_handle = footer.metaindex_handle();
4467   BlockContents metaindex_contents;
4468   PersistentCacheOptions pcache_opts;
4469   BlockFetcher block_fetcher(
4470       table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
4471       metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
4472       false /*maybe_compressed*/, BlockType::kMetaIndex,
4473       UncompressionDict::GetEmptyDict(), pcache_opts,
4474       nullptr /*memory_allocator*/);
4475   ASSERT_OK(block_fetcher.ReadBlockContents());
4476   Block metaindex_block(std::move(metaindex_contents));
4477 
4478   // verify properties block comes last
4479   std::unique_ptr<InternalIterator> metaindex_iter{
4480       metaindex_block.NewDataIterator(options.comparator,
4481                                       kDisableGlobalSequenceNumber)};
4482   uint64_t max_offset = 0;
4483   std::string key_at_max_offset;
4484   for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
4485        metaindex_iter->Next()) {
4486     BlockHandle handle;
4487     Slice value = metaindex_iter->value();
4488     ASSERT_OK(handle.DecodeFrom(&value));
4489     if (handle.offset() > max_offset) {
4490       max_offset = handle.offset();
4491       key_at_max_offset = metaindex_iter->key().ToString();
4492     }
4493   }
4494   ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
4495   // index handle is stored in footer rather than metaindex block, so need
4496   // separate logic to verify it comes before properties block.
4497   ASSERT_GT(max_offset, footer.index_handle().offset());
4498   c.ResetTableReader();
4499 }
4500 
TEST_P(BlockBasedTableTest,BadOptions)4501 TEST_P(BlockBasedTableTest, BadOptions) {
4502   ROCKSDB_NAMESPACE::Options options;
4503   options.compression = kNoCompression;
4504   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4505   bbto.block_size = 4000;
4506   bbto.block_align = true;
4507 
4508   const std::string kDBPath =
4509       test::PerThreadDBPath("block_based_table_bad_options_test");
4510   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4511   ASSERT_OK(DestroyDB(kDBPath, options));
4512   ROCKSDB_NAMESPACE::DB* db;
4513   ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4514 
4515   bbto.block_size = 4096;
4516   options.compression = kSnappyCompression;
4517   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4518   ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4519 }
4520 
TEST_F(BBTTailPrefetchTest,TestTailPrefetchStats)4521 TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
4522   TailPrefetchStats tpstats;
4523   ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
4524   tpstats.RecordEffectiveSize(size_t{1000});
4525   tpstats.RecordEffectiveSize(size_t{1005});
4526   tpstats.RecordEffectiveSize(size_t{1002});
4527   ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());
4528 
4529   // One single super large value shouldn't influence much
4530   tpstats.RecordEffectiveSize(size_t{1002000});
4531   tpstats.RecordEffectiveSize(size_t{999});
4532   ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
4533   ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());
4534 
4535   // Only history of 32 is kept
4536   for (int i = 0; i < 32; i++) {
4537     tpstats.RecordEffectiveSize(size_t{100});
4538   }
4539   ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());
4540 
4541   // 16 large values and 16 small values. The result should be closer
4542   // to the small value as the algorithm.
4543   for (int i = 0; i < 16; i++) {
4544     tpstats.RecordEffectiveSize(size_t{1000});
4545   }
4546   tpstats.RecordEffectiveSize(size_t{10});
4547   tpstats.RecordEffectiveSize(size_t{20});
4548   for (int i = 0; i < 6; i++) {
4549     tpstats.RecordEffectiveSize(size_t{100});
4550   }
4551   ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
4552   ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
4553 }
4554 
TEST_F(BBTTailPrefetchTest,FilePrefetchBufferMinOffset)4555 TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
4556   TailPrefetchStats tpstats;
4557   FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
4558   IOOptions opts;
4559   buffer.TryReadFromCache(opts, 500, 10, nullptr, nullptr);
4560   buffer.TryReadFromCache(opts, 480, 10, nullptr, nullptr);
4561   buffer.TryReadFromCache(opts, 490, 10, nullptr, nullptr);
4562   ASSERT_EQ(480, buffer.min_offset_read());
4563 }
4564 
TEST_P(BlockBasedTableTest,DataBlockHashIndex)4565 TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
4566   const int kNumKeys = 500;
4567   const int kKeySize = 8;
4568   const int kValSize = 40;
4569 
4570   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
4571   table_options.data_block_index_type =
4572       BlockBasedTableOptions::kDataBlockBinaryAndHash;
4573 
4574   Options options;
4575   options.comparator = BytewiseComparator();
4576 
4577   options.table_factory.reset(new BlockBasedTableFactory(table_options));
4578 
4579   TableConstructor c(options.comparator);
4580 
4581   static Random rnd(1048);
4582   for (int i = 0; i < kNumKeys; i++) {
4583     // padding one "0" to mark existent keys.
4584     std::string random_key(rnd.RandomString(kKeySize - 1) + "1");
4585     InternalKey k(random_key, 0, kTypeValue);
4586     c.Add(k.Encode().ToString(), rnd.RandomString(kValSize));
4587   }
4588 
4589   std::vector<std::string> keys;
4590   stl_wrappers::KVMap kvmap;
4591   const ImmutableOptions ioptions(options);
4592   const MutableCFOptions moptions(options);
4593   const InternalKeyComparator internal_comparator(options.comparator);
4594   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
4595            &keys, &kvmap);
4596 
4597   auto reader = c.GetTableReader();
4598 
4599   std::unique_ptr<InternalIterator> seek_iter;
4600   ReadOptions read_options;
4601   seek_iter.reset(reader->NewIterator(
4602       read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
4603       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4604   for (int i = 0; i < 2; ++i) {
4605     ReadOptions ro;
4606     // for every kv, we seek using two method: Get() and Seek()
4607     // Get() will use the SuffixIndexHash in Block. For non-existent key it
4608     //      will invalidate the iterator
4609     // Seek() will use the default BinarySeek() in Block. So for non-existent
4610     //      key it will land at the closest key that is large than target.
4611 
4612     // Search for existent keys
4613     for (auto& kv : kvmap) {
4614       if (i == 0) {
4615         // Search using Seek()
4616         seek_iter->Seek(kv.first);
4617         ASSERT_OK(seek_iter->status());
4618         ASSERT_TRUE(seek_iter->Valid());
4619         ASSERT_EQ(seek_iter->key(), kv.first);
4620         ASSERT_EQ(seek_iter->value(), kv.second);
4621       } else {
4622         // Search using Get()
4623         PinnableSlice value;
4624         std::string user_key = ExtractUserKey(kv.first).ToString();
4625         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
4626                                GetContext::kNotFound, user_key, &value, nullptr,
4627                                nullptr, true, nullptr, nullptr);
4628         ASSERT_OK(reader->Get(ro, kv.first, &get_context,
4629                               moptions.prefix_extractor.get()));
4630         ASSERT_EQ(get_context.State(), GetContext::kFound);
4631         ASSERT_EQ(value, Slice(kv.second));
4632         value.Reset();
4633       }
4634     }
4635 
4636     // Search for non-existent keys
4637     for (auto& kv : kvmap) {
4638       std::string user_key = ExtractUserKey(kv.first).ToString();
4639       user_key.back() = '0';  // make it non-existent key
4640       InternalKey internal_key(user_key, 0, kTypeValue);
4641       std::string encoded_key = internal_key.Encode().ToString();
4642       if (i == 0) {  // Search using Seek()
4643         seek_iter->Seek(encoded_key);
4644         ASSERT_OK(seek_iter->status());
4645         if (seek_iter->Valid()) {
4646           ASSERT_TRUE(BytewiseComparator()->Compare(
4647                           user_key, ExtractUserKey(seek_iter->key())) < 0);
4648         }
4649       } else {  // Search using Get()
4650         PinnableSlice value;
4651         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
4652                                GetContext::kNotFound, user_key, &value, nullptr,
4653                                nullptr, true, nullptr, nullptr);
4654         ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
4655                               moptions.prefix_extractor.get()));
4656         ASSERT_EQ(get_context.State(), GetContext::kNotFound);
4657         value.Reset();
4658       }
4659     }
4660   }
4661 }
4662 
4663 // BlockBasedTableIterator should invalidate itself and return
4664 // OutOfBound()=true immediately after Seek(), to allow LevelIterator
4665 // filter out corresponding level.
TEST_P(BlockBasedTableTest,OutOfBoundOnSeek)4666 TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
4667   TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
4668   c.Add("foo", "v1");
4669   std::vector<std::string> keys;
4670   stl_wrappers::KVMap kvmap;
4671   Options options;
4672   BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
4673   options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
4674   const ImmutableOptions ioptions(options);
4675   const MutableCFOptions moptions(options);
4676   c.Finish(options, ioptions, moptions, table_opt,
4677            GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
4678   auto* reader = c.GetTableReader();
4679   ReadOptions read_opt;
4680   std::string upper_bound = "bar";
4681   Slice upper_bound_slice(upper_bound);
4682   read_opt.iterate_upper_bound = &upper_bound_slice;
4683   std::unique_ptr<InternalIterator> iter;
4684   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4685       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4686       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4687   iter->SeekToFirst();
4688   ASSERT_FALSE(iter->Valid());
4689   ASSERT_OK(iter->status());
4690   ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4691   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4692       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4693       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4694   iter->Seek("foo");
4695   ASSERT_FALSE(iter->Valid());
4696   ASSERT_OK(iter->status());
4697   ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4698 }
4699 
4700 // BlockBasedTableIterator should invalidate itself and return
4701 // OutOfBound()=true after Next(), if it finds current index key is no smaller
4702 // than upper bound, unless it is pointing to the last data block.
TEST_P(BlockBasedTableTest,OutOfBoundOnNext)4703 TEST_P(BlockBasedTableTest, OutOfBoundOnNext) {
4704   TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
4705   c.Add("bar", "v");
4706   c.Add("foo", "v");
4707   std::vector<std::string> keys;
4708   stl_wrappers::KVMap kvmap;
4709   Options options;
4710   BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
4711   table_opt.flush_block_policy_factory =
4712       std::make_shared<FlushBlockEveryKeyPolicyFactory>();
4713   options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
4714   const ImmutableOptions ioptions(options);
4715   const MutableCFOptions moptions(options);
4716   c.Finish(options, ioptions, moptions, table_opt,
4717            GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
4718   auto* reader = c.GetTableReader();
4719   ReadOptions read_opt;
4720   std::string ub1 = "bar_after";
4721   Slice ub_slice1(ub1);
4722   read_opt.iterate_upper_bound = &ub_slice1;
4723   std::unique_ptr<InternalIterator> iter;
4724   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4725       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4726       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4727   iter->Seek("bar");
4728   ASSERT_TRUE(iter->Valid());
4729   ASSERT_EQ("bar", iter->key());
4730   iter->Next();
4731   ASSERT_FALSE(iter->Valid());
4732   ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4733   std::string ub2 = "foo_after";
4734   Slice ub_slice2(ub2);
4735   read_opt.iterate_upper_bound = &ub_slice2;
4736   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4737       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4738       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4739   iter->Seek("foo");
4740   ASSERT_TRUE(iter->Valid());
4741   ASSERT_EQ("foo", iter->key());
4742   iter->Next();
4743   ASSERT_FALSE(iter->Valid());
4744   ASSERT_FALSE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4745 }
4746 
4747 }  // namespace ROCKSDB_NAMESPACE
4748 
main(int argc,char ** argv)4749 int main(int argc, char** argv) {
4750   ::testing::InitGoogleTest(&argc, argv);
4751   return RUN_ALL_TESTS();
4752 }
4753