1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include <stdio.h>
11 #include <algorithm>
12 #include <iostream>
13 #include <map>
14 #include <memory>
15 #include <string>
16 #include <vector>
17
18 #include "block_fetcher.h"
19 #include "cache/lru_cache.h"
20 #include "db/dbformat.h"
21 #include "db/memtable.h"
22 #include "db/write_batch_internal.h"
23 #include "memtable/stl_wrappers.h"
24 #include "meta_blocks.h"
25 #include "monitoring/statistics.h"
26 #include "port/port.h"
27 #include "rocksdb/cache.h"
28 #include "rocksdb/db.h"
29 #include "rocksdb/env.h"
30 #include "rocksdb/file_checksum.h"
31 #include "rocksdb/file_system.h"
32 #include "rocksdb/iterator.h"
33 #include "rocksdb/memtablerep.h"
34 #include "rocksdb/perf_context.h"
35 #include "rocksdb/slice_transform.h"
36 #include "rocksdb/statistics.h"
37 #include "rocksdb/write_buffer_manager.h"
38 #include "table/block_based/block.h"
39 #include "table/block_based/block_based_table_builder.h"
40 #include "table/block_based/block_based_table_factory.h"
41 #include "table/block_based/block_based_table_reader.h"
42 #include "table/block_based/block_builder.h"
43 #include "table/block_based/flush_block_policy.h"
44 #include "table/format.h"
45 #include "table/get_context.h"
46 #include "table/internal_iterator.h"
47 #include "table/plain/plain_table_factory.h"
48 #include "table/scoped_arena_iterator.h"
49 #include "table/sst_file_writer_collectors.h"
50 #include "test_util/sync_point.h"
51 #include "test_util/testharness.h"
52 #include "test_util/testutil.h"
53 #include "util/compression.h"
54 #include "util/file_checksum_helper.h"
55 #include "util/random.h"
56 #include "util/string_util.h"
57 #include "utilities/merge_operators.h"
58
59 namespace ROCKSDB_NAMESPACE {
60
61 extern const uint64_t kLegacyBlockBasedTableMagicNumber;
62 extern const uint64_t kLegacyPlainTableMagicNumber;
63 extern const uint64_t kBlockBasedTableMagicNumber;
64 extern const uint64_t kPlainTableMagicNumber;
65
66 namespace {
67
68 const std::string kDummyValue(10000, 'o');
69
70 // DummyPropertiesCollector used to test BlockBasedTableProperties
71 class DummyPropertiesCollector : public TablePropertiesCollector {
72 public:
Name() const73 const char* Name() const override { return "DummyPropertiesCollector"; }
74
Finish(UserCollectedProperties *)75 Status Finish(UserCollectedProperties* /*properties*/) override {
76 return Status::OK();
77 }
78
Add(const Slice &,const Slice &)79 Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override {
80 return Status::OK();
81 }
82
GetReadableProperties() const83 UserCollectedProperties GetReadableProperties() const override {
84 return UserCollectedProperties{};
85 }
86 };
87
88 class DummyPropertiesCollectorFactory1
89 : public TablePropertiesCollectorFactory {
90 public:
CreateTablePropertiesCollector(TablePropertiesCollectorFactory::Context)91 TablePropertiesCollector* CreateTablePropertiesCollector(
92 TablePropertiesCollectorFactory::Context /*context*/) override {
93 return new DummyPropertiesCollector();
94 }
Name() const95 const char* Name() const override {
96 return "DummyPropertiesCollectorFactory1";
97 }
98 };
99
100 class DummyPropertiesCollectorFactory2
101 : public TablePropertiesCollectorFactory {
102 public:
CreateTablePropertiesCollector(TablePropertiesCollectorFactory::Context)103 TablePropertiesCollector* CreateTablePropertiesCollector(
104 TablePropertiesCollectorFactory::Context /*context*/) override {
105 return new DummyPropertiesCollector();
106 }
Name() const107 const char* Name() const override {
108 return "DummyPropertiesCollectorFactory2";
109 }
110 };
111
112 // Return reverse of "key".
113 // Used to test non-lexicographic comparators.
Reverse(const Slice & key)114 std::string Reverse(const Slice& key) {
115 auto rev = key.ToString();
116 std::reverse(rev.begin(), rev.end());
117 return rev;
118 }
119
120 class ReverseKeyComparator : public Comparator {
121 public:
Name() const122 const char* Name() const override {
123 return "rocksdb.ReverseBytewiseComparator";
124 }
125
Compare(const Slice & a,const Slice & b) const126 int Compare(const Slice& a, const Slice& b) const override {
127 return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
128 }
129
FindShortestSeparator(std::string * start,const Slice & limit) const130 void FindShortestSeparator(std::string* start,
131 const Slice& limit) const override {
132 std::string s = Reverse(*start);
133 std::string l = Reverse(limit);
134 BytewiseComparator()->FindShortestSeparator(&s, l);
135 *start = Reverse(s);
136 }
137
FindShortSuccessor(std::string * key) const138 void FindShortSuccessor(std::string* key) const override {
139 std::string s = Reverse(*key);
140 BytewiseComparator()->FindShortSuccessor(&s);
141 *key = Reverse(s);
142 }
143 };
144
145 ReverseKeyComparator reverse_key_comparator;
146
Increment(const Comparator * cmp,std::string * key)147 void Increment(const Comparator* cmp, std::string* key) {
148 if (cmp == BytewiseComparator()) {
149 key->push_back('\0');
150 } else {
151 assert(cmp == &reverse_key_comparator);
152 std::string rev = Reverse(*key);
153 rev.push_back('\0');
154 *key = Reverse(rev);
155 }
156 }
157
158 const auto kUnknownColumnFamily =
159 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily;
160
161 } // namespace
162
163 // Helper class for tests to unify the interface between
164 // BlockBuilder/TableBuilder and Block/Table.
165 class Constructor {
166 public:
Constructor(const Comparator * cmp)167 explicit Constructor(const Comparator* cmp)
168 : data_(stl_wrappers::LessOfComparator(cmp)) {}
~Constructor()169 virtual ~Constructor() { }
170
Add(const std::string & key,const Slice & value)171 void Add(const std::string& key, const Slice& value) {
172 data_[key] = value.ToString();
173 }
174
175 // Finish constructing the data structure with all the keys that have
176 // been added so far. Returns the keys in sorted order in "*keys"
177 // and stores the key/value pairs in "*kvmap"
Finish(const Options & options,const ImmutableOptions & ioptions,const MutableCFOptions & moptions,const BlockBasedTableOptions & table_options,const InternalKeyComparator & internal_comparator,std::vector<std::string> * keys,stl_wrappers::KVMap * kvmap)178 void Finish(const Options& options, const ImmutableOptions& ioptions,
179 const MutableCFOptions& moptions,
180 const BlockBasedTableOptions& table_options,
181 const InternalKeyComparator& internal_comparator,
182 std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
183 last_internal_key_ = &internal_comparator;
184 *kvmap = data_;
185 keys->clear();
186 for (const auto& kv : data_) {
187 keys->push_back(kv.first);
188 }
189 data_.clear();
190 Status s = FinishImpl(options, ioptions, moptions, table_options,
191 internal_comparator, *kvmap);
192 ASSERT_TRUE(s.ok()) << s.ToString();
193 }
194
195 // Construct the data structure from the data in "data"
196 virtual Status FinishImpl(const Options& options,
197 const ImmutableOptions& ioptions,
198 const MutableCFOptions& moptions,
199 const BlockBasedTableOptions& table_options,
200 const InternalKeyComparator& internal_comparator,
201 const stl_wrappers::KVMap& data) = 0;
202
203 virtual InternalIterator* NewIterator(
204 const SliceTransform* prefix_extractor = nullptr) const = 0;
205
data()206 virtual const stl_wrappers::KVMap& data() { return data_; }
207
IsArenaMode() const208 virtual bool IsArenaMode() const { return false; }
209
db() const210 virtual DB* db() const { return nullptr; } // Overridden in DBConstructor
211
AnywayDeleteIterator() const212 virtual bool AnywayDeleteIterator() const { return false; }
213
214 protected:
215 const InternalKeyComparator* last_internal_key_;
216
217 private:
218 stl_wrappers::KVMap data_;
219 };
220
221 // A helper class that converts internal format keys into user keys
222 class KeyConvertingIterator : public InternalIterator {
223 public:
KeyConvertingIterator(InternalIterator * iter,bool arena_mode=false)224 explicit KeyConvertingIterator(InternalIterator* iter,
225 bool arena_mode = false)
226 : iter_(iter), arena_mode_(arena_mode) {}
~KeyConvertingIterator()227 ~KeyConvertingIterator() override {
228 if (arena_mode_) {
229 iter_->~InternalIterator();
230 } else {
231 delete iter_;
232 }
233 }
Valid() const234 bool Valid() const override { return iter_->Valid() && status_.ok(); }
Seek(const Slice & target)235 void Seek(const Slice& target) override {
236 ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
237 std::string encoded;
238 AppendInternalKey(&encoded, ikey);
239 iter_->Seek(encoded);
240 }
SeekForPrev(const Slice & target)241 void SeekForPrev(const Slice& target) override {
242 ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
243 std::string encoded;
244 AppendInternalKey(&encoded, ikey);
245 iter_->SeekForPrev(encoded);
246 }
SeekToFirst()247 void SeekToFirst() override { iter_->SeekToFirst(); }
SeekToLast()248 void SeekToLast() override { iter_->SeekToLast(); }
Next()249 void Next() override { iter_->Next(); }
Prev()250 void Prev() override { iter_->Prev(); }
UpperBoundCheckResult()251 IterBoundCheck UpperBoundCheckResult() override {
252 return iter_->UpperBoundCheckResult();
253 }
254
key() const255 Slice key() const override {
256 assert(Valid());
257 ParsedInternalKey parsed_key;
258 Status pik_status =
259 ParseInternalKey(iter_->key(), &parsed_key, true /* log_err_key */);
260 if (!pik_status.ok()) {
261 status_ = pik_status;
262 return Slice(status_.getState());
263 }
264 return parsed_key.user_key;
265 }
266
value() const267 Slice value() const override { return iter_->value(); }
status() const268 Status status() const override {
269 return status_.ok() ? iter_->status() : status_;
270 }
271
272 private:
273 mutable Status status_;
274 InternalIterator* iter_;
275 bool arena_mode_;
276
277 // No copying allowed
278 KeyConvertingIterator(const KeyConvertingIterator&);
279 void operator=(const KeyConvertingIterator&);
280 };
281
282 // `BlockConstructor` APIs always accept/return user keys.
283 class BlockConstructor : public Constructor {
284 public:
BlockConstructor(const Comparator * cmp)285 explicit BlockConstructor(const Comparator* cmp)
286 : Constructor(cmp), comparator_(cmp), block_(nullptr) {}
~BlockConstructor()287 ~BlockConstructor() override { delete block_; }
FinishImpl(const Options &,const ImmutableOptions &,const MutableCFOptions &,const BlockBasedTableOptions & table_options,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)288 Status FinishImpl(const Options& /*options*/,
289 const ImmutableOptions& /*ioptions*/,
290 const MutableCFOptions& /*moptions*/,
291 const BlockBasedTableOptions& table_options,
292 const InternalKeyComparator& /*internal_comparator*/,
293 const stl_wrappers::KVMap& kv_map) override {
294 delete block_;
295 block_ = nullptr;
296 BlockBuilder builder(table_options.block_restart_interval);
297
298 for (const auto& kv : kv_map) {
299 // `DataBlockIter` assumes it reads only internal keys. `BlockConstructor`
300 // clients provide user keys, so we need to convert to internal key format
301 // before writing the data block.
302 ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
303 std::string encoded;
304 AppendInternalKey(&encoded, ikey);
305 builder.Add(encoded, kv.second);
306 }
307 // Open the block
308 data_ = builder.Finish().ToString();
309 BlockContents contents;
310 contents.data = data_;
311 block_ = new Block(std::move(contents));
312 return Status::OK();
313 }
NewIterator(const SliceTransform *) const314 InternalIterator* NewIterator(
315 const SliceTransform* /*prefix_extractor*/) const override {
316 // `DataBlockIter` returns the internal keys it reads.
317 // `KeyConvertingIterator` converts them to user keys before they are
318 // exposed to the `BlockConstructor` clients.
319 return new KeyConvertingIterator(
320 block_->NewDataIterator(comparator_, kDisableGlobalSequenceNumber));
321 }
322
323 private:
324 const Comparator* comparator_;
325 std::string data_;
326 Block* block_;
327
328 BlockConstructor();
329 };
330
331 class TableConstructor : public Constructor {
332 public:
TableConstructor(const Comparator * cmp,bool convert_to_internal_key=false,int level=-1,SequenceNumber largest_seqno=0)333 explicit TableConstructor(const Comparator* cmp,
334 bool convert_to_internal_key = false,
335 int level = -1, SequenceNumber largest_seqno = 0)
336 : Constructor(cmp),
337 largest_seqno_(largest_seqno),
338 convert_to_internal_key_(convert_to_internal_key),
339 level_(level) {
340 env_ = ROCKSDB_NAMESPACE::Env::Default();
341 }
~TableConstructor()342 ~TableConstructor() override { Reset(); }
343
FinishImpl(const Options & options,const ImmutableOptions & ioptions,const MutableCFOptions & moptions,const BlockBasedTableOptions &,const InternalKeyComparator & internal_comparator,const stl_wrappers::KVMap & kv_map)344 Status FinishImpl(const Options& options, const ImmutableOptions& ioptions,
345 const MutableCFOptions& moptions,
346 const BlockBasedTableOptions& /*table_options*/,
347 const InternalKeyComparator& internal_comparator,
348 const stl_wrappers::KVMap& kv_map) override {
349 Reset();
350 soptions.use_mmap_reads = ioptions.allow_mmap_reads;
351 std::unique_ptr<FSWritableFile> sink(new test::StringSink());
352 file_writer_.reset(new WritableFileWriter(
353 std::move(sink), "" /* don't care */, FileOptions()));
354 std::unique_ptr<TableBuilder> builder;
355 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
356
357 if (largest_seqno_ != 0) {
358 // Pretend that it's an external file written by SstFileWriter.
359 int_tbl_prop_collector_factories.emplace_back(
360 new SstFileWriterPropertiesCollectorFactory(2 /* version */,
361 0 /* global_seqno*/));
362 }
363
364 std::string column_family_name;
365 builder.reset(ioptions.table_factory->NewTableBuilder(
366 TableBuilderOptions(ioptions, moptions, internal_comparator,
367 &int_tbl_prop_collector_factories,
368 options.compression, options.compression_opts,
369 kUnknownColumnFamily, column_family_name, level_),
370 file_writer_.get()));
371
372 for (const auto& kv : kv_map) {
373 if (convert_to_internal_key_) {
374 ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
375 std::string encoded;
376 AppendInternalKey(&encoded, ikey);
377 builder->Add(encoded, kv.second);
378 } else {
379 builder->Add(kv.first, kv.second);
380 }
381 EXPECT_OK(builder->status());
382 }
383 Status s = builder->Finish();
384 EXPECT_OK(file_writer_->Flush());
385 EXPECT_TRUE(s.ok()) << s.ToString();
386
387 EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
388
389 // Open the table
390 uniq_id_ = cur_uniq_id_++;
391 std::unique_ptr<FSRandomAccessFile> source(new test::StringSource(
392 TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads));
393
394 file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
395 const bool kSkipFilters = true;
396 const bool kImmortal = true;
397 return ioptions.table_factory->NewTableReader(
398 TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
399 internal_comparator, !kSkipFilters, !kImmortal,
400 false, level_, largest_seqno_, &block_cache_tracer_,
401 moptions.write_buffer_size),
402 std::move(file_reader_), TEST_GetSink()->contents().size(),
403 &table_reader_);
404 }
405
NewIterator(const SliceTransform * prefix_extractor) const406 InternalIterator* NewIterator(
407 const SliceTransform* prefix_extractor) const override {
408 InternalIterator* iter = table_reader_->NewIterator(
409 read_options_, prefix_extractor, /*arena=*/nullptr,
410 /*skip_filters=*/false, TableReaderCaller::kUncategorized);
411 if (convert_to_internal_key_) {
412 return new KeyConvertingIterator(iter);
413 } else {
414 return iter;
415 }
416 }
417
ApproximateOffsetOf(const Slice & key) const418 uint64_t ApproximateOffsetOf(const Slice& key) const {
419 if (convert_to_internal_key_) {
420 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
421 const Slice skey = ikey.Encode();
422 return table_reader_->ApproximateOffsetOf(
423 skey, TableReaderCaller::kUncategorized);
424 }
425 return table_reader_->ApproximateOffsetOf(
426 key, TableReaderCaller::kUncategorized);
427 }
428
Reopen(const ImmutableOptions & ioptions,const MutableCFOptions & moptions)429 virtual Status Reopen(const ImmutableOptions& ioptions,
430 const MutableCFOptions& moptions) {
431 std::unique_ptr<FSRandomAccessFile> source(new test::StringSource(
432 TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads));
433
434 file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
435 return ioptions.table_factory->NewTableReader(
436 TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
437 *last_internal_key_),
438 std::move(file_reader_), TEST_GetSink()->contents().size(),
439 &table_reader_);
440 }
441
GetTableReader()442 virtual TableReader* GetTableReader() { return table_reader_.get(); }
443
AnywayDeleteIterator() const444 bool AnywayDeleteIterator() const override {
445 return convert_to_internal_key_;
446 }
447
ResetTableReader()448 void ResetTableReader() { table_reader_.reset(); }
449
ConvertToInternalKey()450 bool ConvertToInternalKey() { return convert_to_internal_key_; }
451
TEST_GetSink()452 test::StringSink* TEST_GetSink() {
453 return static_cast<test::StringSink*>(file_writer_->writable_file());
454 }
455
456 BlockCacheTracer block_cache_tracer_;
457
458 private:
Reset()459 void Reset() {
460 uniq_id_ = 0;
461 table_reader_.reset();
462 file_writer_.reset();
463 file_reader_.reset();
464 }
465
466 const ReadOptions read_options_;
467 uint64_t uniq_id_;
468 std::unique_ptr<WritableFileWriter> file_writer_;
469 std::unique_ptr<RandomAccessFileReader> file_reader_;
470 std::unique_ptr<TableReader> table_reader_;
471 SequenceNumber largest_seqno_;
472 bool convert_to_internal_key_;
473 int level_;
474
475 TableConstructor();
476
477 static uint64_t cur_uniq_id_;
478 EnvOptions soptions;
479 Env* env_;
480 };
481 uint64_t TableConstructor::cur_uniq_id_ = 1;
482
483 class MemTableConstructor: public Constructor {
484 public:
MemTableConstructor(const Comparator * cmp,WriteBufferManager * wb)485 explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
486 : Constructor(cmp),
487 internal_comparator_(cmp),
488 write_buffer_manager_(wb),
489 table_factory_(new SkipListFactory) {
490 options_.memtable_factory = table_factory_;
491 ImmutableOptions ioptions(options_);
492 memtable_ =
493 new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
494 wb, kMaxSequenceNumber, 0 /* column_family_id */);
495 memtable_->Ref();
496 }
~MemTableConstructor()497 ~MemTableConstructor() override { delete memtable_->Unref(); }
FinishImpl(const Options &,const ImmutableOptions & ioptions,const MutableCFOptions &,const BlockBasedTableOptions &,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)498 Status FinishImpl(const Options&, const ImmutableOptions& ioptions,
499 const MutableCFOptions& /*moptions*/,
500 const BlockBasedTableOptions& /*table_options*/,
501 const InternalKeyComparator& /*internal_comparator*/,
502 const stl_wrappers::KVMap& kv_map) override {
503 delete memtable_->Unref();
504 ImmutableOptions mem_ioptions(ioptions);
505 memtable_ = new MemTable(internal_comparator_, mem_ioptions,
506 MutableCFOptions(options_), write_buffer_manager_,
507 kMaxSequenceNumber, 0 /* column_family_id */);
508 memtable_->Ref();
509 int seq = 1;
510 for (const auto& kv : kv_map) {
511 Status s = memtable_->Add(seq, kTypeValue, kv.first, kv.second,
512 nullptr /* kv_prot_info */);
513 if (!s.ok()) {
514 return s;
515 }
516 seq++;
517 }
518 return Status::OK();
519 }
NewIterator(const SliceTransform *) const520 InternalIterator* NewIterator(
521 const SliceTransform* /*prefix_extractor*/) const override {
522 return new KeyConvertingIterator(
523 memtable_->NewIterator(ReadOptions(), &arena_), true);
524 }
525
AnywayDeleteIterator() const526 bool AnywayDeleteIterator() const override { return true; }
527
IsArenaMode() const528 bool IsArenaMode() const override { return true; }
529
530 private:
531 mutable Arena arena_;
532 InternalKeyComparator internal_comparator_;
533 Options options_;
534 WriteBufferManager* write_buffer_manager_;
535 MemTable* memtable_;
536 std::shared_ptr<SkipListFactory> table_factory_;
537 };
538
539 class InternalIteratorFromIterator : public InternalIterator {
540 public:
InternalIteratorFromIterator(Iterator * it)541 explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
Valid() const542 bool Valid() const override { return it_->Valid(); }
Seek(const Slice & target)543 void Seek(const Slice& target) override { it_->Seek(target); }
SeekForPrev(const Slice & target)544 void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); }
SeekToFirst()545 void SeekToFirst() override { it_->SeekToFirst(); }
SeekToLast()546 void SeekToLast() override { it_->SeekToLast(); }
Next()547 void Next() override { it_->Next(); }
Prev()548 void Prev() override { it_->Prev(); }
key() const549 Slice key() const override { return it_->key(); }
value() const550 Slice value() const override { return it_->value(); }
status() const551 Status status() const override { return it_->status(); }
552
553 private:
554 std::unique_ptr<Iterator> it_;
555 };
556
557 class DBConstructor: public Constructor {
558 public:
DBConstructor(const Comparator * cmp)559 explicit DBConstructor(const Comparator* cmp)
560 : Constructor(cmp),
561 comparator_(cmp) {
562 db_ = nullptr;
563 NewDB();
564 }
~DBConstructor()565 ~DBConstructor() override { delete db_; }
FinishImpl(const Options &,const ImmutableOptions &,const MutableCFOptions &,const BlockBasedTableOptions &,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)566 Status FinishImpl(const Options& /*options*/,
567 const ImmutableOptions& /*ioptions*/,
568 const MutableCFOptions& /*moptions*/,
569 const BlockBasedTableOptions& /*table_options*/,
570 const InternalKeyComparator& /*internal_comparator*/,
571 const stl_wrappers::KVMap& kv_map) override {
572 delete db_;
573 db_ = nullptr;
574 NewDB();
575 for (const auto& kv : kv_map) {
576 WriteBatch batch;
577 EXPECT_OK(batch.Put(kv.first, kv.second));
578 EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
579 }
580 return Status::OK();
581 }
582
NewIterator(const SliceTransform *) const583 InternalIterator* NewIterator(
584 const SliceTransform* /*prefix_extractor*/) const override {
585 return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
586 }
587
db() const588 DB* db() const override { return db_; }
589
590 private:
NewDB()591 void NewDB() {
592 std::string name = test::PerThreadDBPath("table_testdb");
593
594 Options options;
595 options.comparator = comparator_;
596 Status status = DestroyDB(name, options);
597 ASSERT_TRUE(status.ok()) << status.ToString();
598
599 options.create_if_missing = true;
600 options.error_if_exists = true;
601 options.write_buffer_size = 10000; // Something small to force merging
602 status = DB::Open(options, name, &db_);
603 ASSERT_TRUE(status.ok()) << status.ToString();
604 }
605
606 const Comparator* comparator_;
607 DB* db_;
608 };
609
610 enum TestType {
611 BLOCK_BASED_TABLE_TEST,
612 #ifndef ROCKSDB_LITE
613 PLAIN_TABLE_SEMI_FIXED_PREFIX,
614 PLAIN_TABLE_FULL_STR_PREFIX,
615 PLAIN_TABLE_TOTAL_ORDER,
616 #endif // !ROCKSDB_LITE
617 BLOCK_TEST,
618 MEMTABLE_TEST,
619 DB_TEST
620 };
621
622 struct TestArgs {
623 TestType type;
624 bool reverse_compare;
625 int restart_interval;
626 CompressionType compression;
627 uint32_t compression_parallel_threads;
628 uint32_t format_version;
629 bool use_mmap;
630 };
631
operator <<(std::ostream & os,const TestArgs & args)632 std::ostream& operator<<(std::ostream& os, const TestArgs& args) {
633 os << "type: " << args.type << " reverse_compare: " << args.reverse_compare
634 << " restart_interval: " << args.restart_interval
635 << " compression: " << args.compression
636 << " compression_parallel_threads: " << args.compression_parallel_threads
637 << " format_version: " << args.format_version
638 << " use_mmap: " << args.use_mmap;
639
640 return os;
641 }
642
GenerateArgList()643 static std::vector<TestArgs> GenerateArgList() {
644 std::vector<TestArgs> test_args;
645 std::vector<TestType> test_types = {
646 BLOCK_BASED_TABLE_TEST,
647 #ifndef ROCKSDB_LITE
648 PLAIN_TABLE_SEMI_FIXED_PREFIX,
649 PLAIN_TABLE_FULL_STR_PREFIX,
650 PLAIN_TABLE_TOTAL_ORDER,
651 #endif // !ROCKSDB_LITE
652 BLOCK_TEST,
653 MEMTABLE_TEST, DB_TEST};
654 std::vector<bool> reverse_compare_types = {false, true};
655 std::vector<int> restart_intervals = {16, 1, 1024};
656 std::vector<uint32_t> compression_parallel_threads = {1, 4};
657
658 // Only add compression if it is supported
659 std::vector<std::pair<CompressionType, bool>> compression_types;
660 compression_types.emplace_back(kNoCompression, false);
661 if (Snappy_Supported()) {
662 compression_types.emplace_back(kSnappyCompression, false);
663 }
664 if (Zlib_Supported()) {
665 compression_types.emplace_back(kZlibCompression, false);
666 compression_types.emplace_back(kZlibCompression, true);
667 }
668 if (BZip2_Supported()) {
669 compression_types.emplace_back(kBZip2Compression, false);
670 compression_types.emplace_back(kBZip2Compression, true);
671 }
672 if (LZ4_Supported()) {
673 compression_types.emplace_back(kLZ4Compression, false);
674 compression_types.emplace_back(kLZ4Compression, true);
675 compression_types.emplace_back(kLZ4HCCompression, false);
676 compression_types.emplace_back(kLZ4HCCompression, true);
677 }
678 if (XPRESS_Supported()) {
679 compression_types.emplace_back(kXpressCompression, false);
680 compression_types.emplace_back(kXpressCompression, true);
681 }
682 if (ZSTD_Supported()) {
683 compression_types.emplace_back(kZSTD, false);
684 compression_types.emplace_back(kZSTD, true);
685 }
686
687 for (auto test_type : test_types) {
688 for (auto reverse_compare : reverse_compare_types) {
689 #ifndef ROCKSDB_LITE
690 if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
691 test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
692 test_type == PLAIN_TABLE_TOTAL_ORDER) {
693 // Plain table doesn't use restart index or compression.
694 TestArgs one_arg;
695 one_arg.type = test_type;
696 one_arg.reverse_compare = reverse_compare;
697 one_arg.restart_interval = restart_intervals[0];
698 one_arg.compression = compression_types[0].first;
699 one_arg.compression_parallel_threads = 1;
700 one_arg.format_version = 0;
701 one_arg.use_mmap = true;
702 test_args.push_back(one_arg);
703 one_arg.use_mmap = false;
704 test_args.push_back(one_arg);
705 continue;
706 }
707 #endif // !ROCKSDB_LITE
708
709 for (auto restart_interval : restart_intervals) {
710 for (auto compression_type : compression_types) {
711 for (auto num_threads : compression_parallel_threads) {
712 TestArgs one_arg;
713 one_arg.type = test_type;
714 one_arg.reverse_compare = reverse_compare;
715 one_arg.restart_interval = restart_interval;
716 one_arg.compression = compression_type.first;
717 one_arg.compression_parallel_threads = num_threads;
718 one_arg.format_version = compression_type.second ? 2 : 1;
719 one_arg.use_mmap = false;
720 test_args.push_back(one_arg);
721 }
722 }
723 }
724 }
725 }
726 return test_args;
727 }
728
729 // In order to make all tests run for plain table format, including
730 // those operating on empty keys, create a new prefix transformer which
731 // return fixed prefix if the slice is not shorter than the prefix length,
732 // and the full slice if it is shorter.
733 class FixedOrLessPrefixTransform : public SliceTransform {
734 private:
735 const size_t prefix_len_;
736
737 public:
FixedOrLessPrefixTransform(size_t prefix_len)738 explicit FixedOrLessPrefixTransform(size_t prefix_len) :
739 prefix_len_(prefix_len) {
740 }
741
Name() const742 const char* Name() const override { return "rocksdb.FixedPrefix"; }
743
Transform(const Slice & src) const744 Slice Transform(const Slice& src) const override {
745 assert(InDomain(src));
746 if (src.size() < prefix_len_) {
747 return src;
748 }
749 return Slice(src.data(), prefix_len_);
750 }
751
InDomain(const Slice &) const752 bool InDomain(const Slice& /*src*/) const override { return true; }
753
InRange(const Slice & dst) const754 bool InRange(const Slice& dst) const override {
755 return (dst.size() <= prefix_len_);
756 }
FullLengthEnabled(size_t *) const757 bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
758 };
759
760 class HarnessTest : public testing::Test {
761 public:
HarnessTest(const TestArgs & args)762 explicit HarnessTest(const TestArgs& args)
763 : args_(args),
764 ioptions_(options_),
765 moptions_(options_),
766 write_buffer_(options_.db_write_buffer_size),
767 support_prev_(true),
768 only_support_prefix_seek_(false) {
769 options_.compression = args_.compression;
770 options_.compression_opts.parallel_threads =
771 args_.compression_parallel_threads;
772 // Use shorter block size for tests to exercise block boundary
773 // conditions more.
774 if (args_.reverse_compare) {
775 options_.comparator = &reverse_key_comparator;
776 }
777
778 internal_comparator_.reset(
779 new test::PlainInternalKeyComparator(options_.comparator));
780
781 options_.allow_mmap_reads = args_.use_mmap;
782 switch (args_.type) {
783 case BLOCK_BASED_TABLE_TEST:
784 table_options_.flush_block_policy_factory.reset(
785 new FlushBlockBySizePolicyFactory());
786 table_options_.block_size = 256;
787 table_options_.block_restart_interval = args_.restart_interval;
788 table_options_.index_block_restart_interval = args_.restart_interval;
789 table_options_.format_version = args_.format_version;
790 options_.table_factory.reset(
791 new BlockBasedTableFactory(table_options_));
792 constructor_.reset(new TableConstructor(
793 options_.comparator, true /* convert_to_internal_key_ */));
794 internal_comparator_.reset(
795 new InternalKeyComparator(options_.comparator));
796 break;
797 // Plain table is not supported in ROCKSDB_LITE
798 #ifndef ROCKSDB_LITE
799 case PLAIN_TABLE_SEMI_FIXED_PREFIX:
800 support_prev_ = false;
801 only_support_prefix_seek_ = true;
802 options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
803 options_.table_factory.reset(NewPlainTableFactory());
804 constructor_.reset(new TableConstructor(
805 options_.comparator, true /* convert_to_internal_key_ */));
806 internal_comparator_.reset(
807 new InternalKeyComparator(options_.comparator));
808 break;
809 case PLAIN_TABLE_FULL_STR_PREFIX:
810 support_prev_ = false;
811 only_support_prefix_seek_ = true;
812 options_.prefix_extractor.reset(NewNoopTransform());
813 options_.table_factory.reset(NewPlainTableFactory());
814 constructor_.reset(new TableConstructor(
815 options_.comparator, true /* convert_to_internal_key_ */));
816 internal_comparator_.reset(
817 new InternalKeyComparator(options_.comparator));
818 break;
819 case PLAIN_TABLE_TOTAL_ORDER:
820 support_prev_ = false;
821 only_support_prefix_seek_ = false;
822 options_.prefix_extractor = nullptr;
823
824 {
825 PlainTableOptions plain_table_options;
826 plain_table_options.user_key_len = kPlainTableVariableLength;
827 plain_table_options.bloom_bits_per_key = 0;
828 plain_table_options.hash_table_ratio = 0;
829
830 options_.table_factory.reset(
831 NewPlainTableFactory(plain_table_options));
832 }
833 constructor_.reset(new TableConstructor(
834 options_.comparator, true /* convert_to_internal_key_ */));
835 internal_comparator_.reset(
836 new InternalKeyComparator(options_.comparator));
837 break;
838 #endif // !ROCKSDB_LITE
839 case BLOCK_TEST:
840 table_options_.block_size = 256;
841 options_.table_factory.reset(
842 new BlockBasedTableFactory(table_options_));
843 constructor_.reset(new BlockConstructor(options_.comparator));
844 break;
845 case MEMTABLE_TEST:
846 table_options_.block_size = 256;
847 options_.table_factory.reset(
848 new BlockBasedTableFactory(table_options_));
849 constructor_.reset(
850 new MemTableConstructor(options_.comparator, &write_buffer_));
851 break;
852 case DB_TEST:
853 table_options_.block_size = 256;
854 options_.table_factory.reset(
855 new BlockBasedTableFactory(table_options_));
856 constructor_.reset(new DBConstructor(options_.comparator));
857 break;
858 }
859 ioptions_ = ImmutableOptions(options_);
860 moptions_ = MutableCFOptions(options_);
861 }
862
Add(const std::string & key,const std::string & value)863 void Add(const std::string& key, const std::string& value) {
864 constructor_->Add(key, value);
865 }
866
Test(Random * rnd)867 void Test(Random* rnd) {
868 std::vector<std::string> keys;
869 stl_wrappers::KVMap data;
870 constructor_->Finish(options_, ioptions_, moptions_, table_options_,
871 *internal_comparator_, &keys, &data);
872
873 TestForwardScan(keys, data);
874 if (support_prev_) {
875 TestBackwardScan(keys, data);
876 }
877 TestRandomAccess(rnd, keys, data);
878 }
879
TestForwardScan(const std::vector<std::string> &,const stl_wrappers::KVMap & data)880 void TestForwardScan(const std::vector<std::string>& /*keys*/,
881 const stl_wrappers::KVMap& data) {
882 InternalIterator* iter = constructor_->NewIterator();
883 ASSERT_TRUE(!iter->Valid());
884 iter->SeekToFirst();
885 ASSERT_OK(iter->status());
886 for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
887 model_iter != data.end(); ++model_iter) {
888 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
889 iter->Next();
890 ASSERT_OK(iter->status());
891 }
892 ASSERT_TRUE(!iter->Valid());
893 ASSERT_OK(iter->status());
894 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
895 iter->~InternalIterator();
896 } else {
897 delete iter;
898 }
899 }
900
TestBackwardScan(const std::vector<std::string> &,const stl_wrappers::KVMap & data)901 void TestBackwardScan(const std::vector<std::string>& /*keys*/,
902 const stl_wrappers::KVMap& data) {
903 InternalIterator* iter = constructor_->NewIterator();
904 ASSERT_TRUE(!iter->Valid());
905 iter->SeekToLast();
906 ASSERT_OK(iter->status());
907 for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
908 model_iter != data.rend(); ++model_iter) {
909 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
910 iter->Prev();
911 ASSERT_OK(iter->status());
912 }
913 ASSERT_TRUE(!iter->Valid());
914 ASSERT_OK(iter->status());
915 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
916 iter->~InternalIterator();
917 } else {
918 delete iter;
919 }
920 }
921
TestRandomAccess(Random * rnd,const std::vector<std::string> & keys,const stl_wrappers::KVMap & data)922 void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
923 const stl_wrappers::KVMap& data) {
924 static const bool kVerbose = false;
925 InternalIterator* iter = constructor_->NewIterator();
926 ASSERT_TRUE(!iter->Valid());
927 stl_wrappers::KVMap::const_iterator model_iter = data.begin();
928 if (kVerbose) fprintf(stderr, "---\n");
929 for (int i = 0; i < 200; i++) {
930 const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
931 switch (toss) {
932 case 0: {
933 if (iter->Valid()) {
934 if (kVerbose) fprintf(stderr, "Next\n");
935 iter->Next();
936 ASSERT_OK(iter->status());
937 ++model_iter;
938 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
939 }
940 break;
941 }
942
943 case 1: {
944 if (kVerbose) fprintf(stderr, "SeekToFirst\n");
945 iter->SeekToFirst();
946 ASSERT_OK(iter->status());
947 model_iter = data.begin();
948 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
949 break;
950 }
951
952 case 2: {
953 std::string key = PickRandomKey(rnd, keys);
954 model_iter = data.lower_bound(key);
955 if (kVerbose) fprintf(stderr, "Seek '%s'\n",
956 EscapeString(key).c_str());
957 iter->Seek(Slice(key));
958 ASSERT_OK(iter->status());
959 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
960 break;
961 }
962
963 case 3: {
964 if (iter->Valid()) {
965 if (kVerbose) fprintf(stderr, "Prev\n");
966 iter->Prev();
967 ASSERT_OK(iter->status());
968 if (model_iter == data.begin()) {
969 model_iter = data.end(); // Wrap around to invalid value
970 } else {
971 --model_iter;
972 }
973 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
974 }
975 break;
976 }
977
978 case 4: {
979 if (kVerbose) fprintf(stderr, "SeekToLast\n");
980 iter->SeekToLast();
981 ASSERT_OK(iter->status());
982 if (keys.empty()) {
983 model_iter = data.end();
984 } else {
985 std::string last = data.rbegin()->first;
986 model_iter = data.lower_bound(last);
987 }
988 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
989 break;
990 }
991 }
992 }
993 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
994 iter->~InternalIterator();
995 } else {
996 delete iter;
997 }
998 }
999
ToString(const stl_wrappers::KVMap & data,const stl_wrappers::KVMap::const_iterator & it)1000 std::string ToString(const stl_wrappers::KVMap& data,
1001 const stl_wrappers::KVMap::const_iterator& it) {
1002 if (it == data.end()) {
1003 return "END";
1004 } else {
1005 return "'" + it->first + "->" + it->second + "'";
1006 }
1007 }
1008
ToString(const stl_wrappers::KVMap & data,const stl_wrappers::KVMap::const_reverse_iterator & it)1009 std::string ToString(const stl_wrappers::KVMap& data,
1010 const stl_wrappers::KVMap::const_reverse_iterator& it) {
1011 if (it == data.rend()) {
1012 return "END";
1013 } else {
1014 return "'" + it->first + "->" + it->second + "'";
1015 }
1016 }
1017
ToString(const InternalIterator * it)1018 std::string ToString(const InternalIterator* it) {
1019 if (!it->Valid()) {
1020 return "END";
1021 } else {
1022 return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
1023 }
1024 }
1025
PickRandomKey(Random * rnd,const std::vector<std::string> & keys)1026 std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
1027 if (keys.empty()) {
1028 return "foo";
1029 } else {
1030 const int index = rnd->Uniform(static_cast<int>(keys.size()));
1031 std::string result = keys[index];
1032 switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
1033 case 0:
1034 // Return an existing key
1035 break;
1036 case 1: {
1037 // Attempt to return something smaller than an existing key
1038 if (result.size() > 0 && result[result.size() - 1] > '\0'
1039 && (!only_support_prefix_seek_
1040 || options_.prefix_extractor->Transform(result).size()
1041 < result.size())) {
1042 result[result.size() - 1]--;
1043 }
1044 break;
1045 }
1046 case 2: {
1047 // Return something larger than an existing key
1048 Increment(options_.comparator, &result);
1049 break;
1050 }
1051 }
1052 return result;
1053 }
1054 }
1055
1056 // Returns nullptr if not running against a DB
db() const1057 DB* db() const { return constructor_->db(); }
1058
1059 private:
1060 TestArgs args_;
1061 Options options_;
1062 ImmutableOptions ioptions_;
1063 MutableCFOptions moptions_;
1064 BlockBasedTableOptions table_options_;
1065 std::unique_ptr<Constructor> constructor_;
1066 WriteBufferManager write_buffer_;
1067 bool support_prev_;
1068 bool only_support_prefix_seek_;
1069 std::shared_ptr<InternalKeyComparator> internal_comparator_;
1070 };
1071
1072 class ParameterizedHarnessTest : public HarnessTest,
1073 public testing::WithParamInterface<TestArgs> {
1074 public:
ParameterizedHarnessTest()1075 ParameterizedHarnessTest() : HarnessTest(GetParam()) {}
1076 };
1077
1078 INSTANTIATE_TEST_CASE_P(TableTest, ParameterizedHarnessTest,
1079 ::testing::ValuesIn(GenerateArgList()));
1080
1081 class DBHarnessTest : public HarnessTest {
1082 public:
DBHarnessTest()1083 DBHarnessTest()
1084 : HarnessTest(TestArgs{DB_TEST, /* reverse_compare */ false,
1085 /* restart_interval */ 16, kNoCompression,
1086 /* compression_parallel_threads */ 1,
1087 /* format_version */ 0, /* use_mmap */ false}) {}
1088 };
1089
Between(uint64_t val,uint64_t low,uint64_t high)1090 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1091 bool result = (val >= low) && (val <= high);
1092 if (!result) {
1093 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1094 (unsigned long long)(val),
1095 (unsigned long long)(low),
1096 (unsigned long long)(high));
1097 }
1098 return result;
1099 }
1100
1101 // Tests against all kinds of tables
1102 class TableTest : public testing::Test {
1103 public:
GetPlainInternalComparator(const Comparator * comp)1104 const InternalKeyComparator& GetPlainInternalComparator(
1105 const Comparator* comp) {
1106 if (!plain_internal_comparator) {
1107 plain_internal_comparator.reset(
1108 new test::PlainInternalKeyComparator(comp));
1109 }
1110 return *plain_internal_comparator;
1111 }
1112 void IndexTest(BlockBasedTableOptions table_options);
1113
1114 private:
1115 std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
1116 };
1117
1118 class GeneralTableTest : public TableTest {};
1119 class BlockBasedTableTest
1120 : public TableTest,
1121 virtual public ::testing::WithParamInterface<uint32_t> {
1122 public:
BlockBasedTableTest()1123 BlockBasedTableTest() : format_(GetParam()) {
1124 env_ = ROCKSDB_NAMESPACE::Env::Default();
1125 }
1126
GetBlockBasedTableOptions()1127 BlockBasedTableOptions GetBlockBasedTableOptions() {
1128 BlockBasedTableOptions options;
1129 options.format_version = format_;
1130 return options;
1131 }
1132
SetupTracingTest(TableConstructor * c)1133 void SetupTracingTest(TableConstructor* c) {
1134 test_path_ = test::PerThreadDBPath("block_based_table_tracing_test");
1135 EXPECT_OK(env_->CreateDir(test_path_));
1136 trace_file_path_ = test_path_ + "/block_cache_trace_file";
1137 TraceOptions trace_opt;
1138 std::unique_ptr<TraceWriter> trace_writer;
1139 EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_,
1140 &trace_writer));
1141 // Always return Status::OK().
1142 assert(c->block_cache_tracer_
1143 .StartTrace(env_->GetSystemClock().get(), trace_opt,
1144 std::move(trace_writer))
1145 .ok());
1146 {
1147 std::string user_key = "k01";
1148 InternalKey internal_key(user_key, 0, kTypeValue);
1149 std::string encoded_key = internal_key.Encode().ToString();
1150 c->Add(encoded_key, kDummyValue);
1151 }
1152 {
1153 std::string user_key = "k02";
1154 InternalKey internal_key(user_key, 0, kTypeValue);
1155 std::string encoded_key = internal_key.Encode().ToString();
1156 c->Add(encoded_key, kDummyValue);
1157 }
1158 }
1159
VerifyBlockAccessTrace(TableConstructor * c,const std::vector<BlockCacheTraceRecord> & expected_records)1160 void VerifyBlockAccessTrace(
1161 TableConstructor* c,
1162 const std::vector<BlockCacheTraceRecord>& expected_records) {
1163 c->block_cache_tracer_.EndTrace();
1164
1165 {
1166 std::unique_ptr<TraceReader> trace_reader;
1167 Status s =
1168 NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
1169 EXPECT_OK(s);
1170 BlockCacheTraceReader reader(std::move(trace_reader));
1171 BlockCacheTraceHeader header;
1172 EXPECT_OK(reader.ReadHeader(&header));
1173 uint32_t index = 0;
1174 while (s.ok()) {
1175 BlockCacheTraceRecord access;
1176 s = reader.ReadAccess(&access);
1177 if (!s.ok()) {
1178 break;
1179 }
1180 ASSERT_LT(index, expected_records.size());
1181 EXPECT_NE("", access.block_key);
1182 EXPECT_EQ(access.block_type, expected_records[index].block_type);
1183 EXPECT_GT(access.block_size, 0);
1184 EXPECT_EQ(access.caller, expected_records[index].caller);
1185 EXPECT_EQ(access.no_insert, expected_records[index].no_insert);
1186 EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit);
1187 // Get
1188 if (access.caller == TableReaderCaller::kUserGet) {
1189 EXPECT_EQ(access.referenced_key,
1190 expected_records[index].referenced_key);
1191 EXPECT_EQ(access.get_id, expected_records[index].get_id);
1192 EXPECT_EQ(access.get_from_user_specified_snapshot,
1193 expected_records[index].get_from_user_specified_snapshot);
1194 if (access.block_type == TraceType::kBlockTraceDataBlock) {
1195 EXPECT_GT(access.referenced_data_size, 0);
1196 EXPECT_GT(access.num_keys_in_block, 0);
1197 EXPECT_EQ(access.referenced_key_exist_in_block,
1198 expected_records[index].referenced_key_exist_in_block);
1199 }
1200 } else {
1201 EXPECT_EQ(access.referenced_key, "");
1202 EXPECT_EQ(access.get_id, 0);
1203 EXPECT_TRUE(access.get_from_user_specified_snapshot == Boolean::kFalse);
1204 EXPECT_EQ(access.referenced_data_size, 0);
1205 EXPECT_EQ(access.num_keys_in_block, 0);
1206 EXPECT_TRUE(access.referenced_key_exist_in_block == Boolean::kFalse);
1207 }
1208 index++;
1209 }
1210 EXPECT_EQ(index, expected_records.size());
1211 }
1212 EXPECT_OK(env_->DeleteFile(trace_file_path_));
1213 EXPECT_OK(env_->DeleteDir(test_path_));
1214 }
1215
1216 protected:
1217 uint64_t IndexUncompressedHelper(bool indexCompress);
1218
1219 private:
1220 uint32_t format_;
1221 Env* env_;
1222 std::string trace_file_path_;
1223 std::string test_path_;
1224 };
1225 class PlainTableTest : public TableTest {};
1226 class TablePropertyTest : public testing::Test {};
1227 class BBTTailPrefetchTest : public TableTest {};
1228
1229 // The helper class to test the file checksum
1230 class FileChecksumTestHelper {
1231 public:
FileChecksumTestHelper(bool convert_to_internal_key=false)1232 FileChecksumTestHelper(bool convert_to_internal_key = false)
1233 : convert_to_internal_key_(convert_to_internal_key) {
1234 }
~FileChecksumTestHelper()1235 ~FileChecksumTestHelper() {}
1236
CreateWriteableFile()1237 void CreateWriteableFile() {
1238 sink_ = new test::StringSink();
1239 std::unique_ptr<FSWritableFile> holder(sink_);
1240 file_writer_.reset(new WritableFileWriter(
1241 std::move(holder), "" /* don't care */, FileOptions()));
1242 }
1243
SetFileChecksumGenerator(FileChecksumGenerator * checksum_generator)1244 void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) {
1245 if (file_writer_ != nullptr) {
1246 file_writer_->TEST_SetFileChecksumGenerator(checksum_generator);
1247 } else {
1248 delete checksum_generator;
1249 }
1250 }
1251
GetFileWriter()1252 WritableFileWriter* GetFileWriter() { return file_writer_.get(); }
1253
ResetTableBuilder(std::unique_ptr<TableBuilder> && builder)1254 Status ResetTableBuilder(std::unique_ptr<TableBuilder>&& builder) {
1255 assert(builder != nullptr);
1256 table_builder_ = std::move(builder);
1257 return Status::OK();
1258 }
1259
AddKVtoKVMap(int num_entries)1260 void AddKVtoKVMap(int num_entries) {
1261 Random rnd(test::RandomSeed());
1262 for (int i = 0; i < num_entries; i++) {
1263 std::string v = rnd.RandomString(100);
1264 kv_map_[test::RandomKey(&rnd, 20)] = v;
1265 }
1266 }
1267
WriteKVAndFlushTable()1268 Status WriteKVAndFlushTable() {
1269 for (const auto& kv : kv_map_) {
1270 if (convert_to_internal_key_) {
1271 ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
1272 std::string encoded;
1273 AppendInternalKey(&encoded, ikey);
1274 table_builder_->Add(encoded, kv.second);
1275 } else {
1276 table_builder_->Add(kv.first, kv.second);
1277 }
1278 EXPECT_TRUE(table_builder_->status().ok());
1279 }
1280 Status s = table_builder_->Finish();
1281 EXPECT_OK(file_writer_->Flush());
1282 EXPECT_OK(s);
1283
1284 EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize());
1285 return s;
1286 }
1287
GetFileChecksum()1288 std::string GetFileChecksum() {
1289 EXPECT_OK(file_writer_->Close());
1290 return table_builder_->GetFileChecksum();
1291 }
1292
GetFileChecksumFuncName()1293 const char* GetFileChecksumFuncName() {
1294 return table_builder_->GetFileChecksumFuncName();
1295 }
1296
CalculateFileChecksum(FileChecksumGenerator * file_checksum_generator,std::string * checksum)1297 Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator,
1298 std::string* checksum) {
1299 assert(file_checksum_generator != nullptr);
1300 cur_uniq_id_ = checksum_uniq_id_++;
1301 test::StringSink* ss_rw =
1302 static_cast<test::StringSink*>(file_writer_->writable_file());
1303 std::unique_ptr<FSRandomAccessFile> source(
1304 new test::StringSource(ss_rw->contents()));
1305 file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
1306
1307 std::unique_ptr<char[]> scratch(new char[2048]);
1308 Slice result;
1309 uint64_t offset = 0;
1310 Status s;
1311 s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(),
1312 nullptr, false);
1313 if (!s.ok()) {
1314 return s;
1315 }
1316 while (result.size() != 0) {
1317 file_checksum_generator->Update(scratch.get(), result.size());
1318 offset += static_cast<uint64_t>(result.size());
1319 s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(),
1320 nullptr, false);
1321 if (!s.ok()) {
1322 return s;
1323 }
1324 }
1325 EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize()));
1326 file_checksum_generator->Finalize();
1327 *checksum = file_checksum_generator->GetChecksum();
1328 return Status::OK();
1329 }
1330
1331 private:
1332 bool convert_to_internal_key_;
1333 uint64_t cur_uniq_id_;
1334 std::unique_ptr<WritableFileWriter> file_writer_;
1335 std::unique_ptr<RandomAccessFileReader> file_reader_;
1336 std::unique_ptr<TableBuilder> table_builder_;
1337 stl_wrappers::KVMap kv_map_;
1338 test::StringSink* sink_ = nullptr;
1339
1340 static uint64_t checksum_uniq_id_;
1341 };
1342
1343 uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;
1344
1345 INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
1346 testing::Values(test::kDefaultFormatVersion));
1347 INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
1348 testing::Values(test::kLatestFormatVersion));
1349
1350 // This test serves as the living tutorial for the prefix scan of user collected
1351 // properties.
TEST_F(TablePropertyTest,PrefixScanTest)1352 TEST_F(TablePropertyTest, PrefixScanTest) {
1353 UserCollectedProperties props{{"num.111.1", "1"},
1354 {"num.111.2", "2"},
1355 {"num.111.3", "3"},
1356 {"num.333.1", "1"},
1357 {"num.333.2", "2"},
1358 {"num.333.3", "3"},
1359 {"num.555.1", "1"},
1360 {"num.555.2", "2"},
1361 {"num.555.3", "3"}, };
1362
1363 // prefixes that exist
1364 for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
1365 int num = 0;
1366 for (auto pos = props.lower_bound(prefix);
1367 pos != props.end() &&
1368 pos->first.compare(0, prefix.size(), prefix) == 0;
1369 ++pos) {
1370 ++num;
1371 auto key = prefix + "." + ToString(num);
1372 ASSERT_EQ(key, pos->first);
1373 ASSERT_EQ(ToString(num), pos->second);
1374 }
1375 ASSERT_EQ(3, num);
1376 }
1377
1378 // prefixes that don't exist
1379 for (const std::string& prefix :
1380 {"num.000", "num.222", "num.444", "num.666"}) {
1381 auto pos = props.lower_bound(prefix);
1382 ASSERT_TRUE(pos == props.end() ||
1383 pos->first.compare(0, prefix.size(), prefix) != 0);
1384 }
1385 }
1386
1387 // This test include all the basic checks except those for index size and block
1388 // size, which will be conducted in separated unit tests.
TEST_P(BlockBasedTableTest,BasicBlockBasedTableProperties)1389 TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1390 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1391
1392 c.Add("a1", "val1");
1393 c.Add("b2", "val2");
1394 c.Add("c3", "val3");
1395 c.Add("d4", "val4");
1396 c.Add("e5", "val5");
1397 c.Add("f6", "val6");
1398 c.Add("g7", "val7");
1399 c.Add("h8", "val8");
1400 c.Add("j9", "val9");
1401 uint64_t diff_internal_user_bytes = 9 * 8; // 8 is seq size, 9 k-v totally
1402
1403 std::vector<std::string> keys;
1404 stl_wrappers::KVMap kvmap;
1405 Options options;
1406 options.compression = kNoCompression;
1407 options.statistics = CreateDBStatistics();
1408 options.statistics->set_stats_level(StatsLevel::kAll);
1409 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1410 table_options.block_restart_interval = 1;
1411 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1412
1413 ImmutableOptions ioptions(options);
1414 MutableCFOptions moptions(options);
1415 c.Finish(options, ioptions, moptions, table_options,
1416 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1417 ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
1418
1419 auto& props = *c.GetTableReader()->GetTableProperties();
1420 ASSERT_EQ(kvmap.size(), props.num_entries);
1421
1422 auto raw_key_size = kvmap.size() * 2ul;
1423 auto raw_value_size = kvmap.size() * 4ul;
1424
1425 ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
1426 ASSERT_EQ(raw_value_size, props.raw_value_size);
1427 ASSERT_EQ(1ul, props.num_data_blocks);
1428 ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1429
1430 // Verify data size.
1431 BlockBuilder block_builder(1);
1432 for (const auto& item : kvmap) {
1433 block_builder.Add(item.first, item.second);
1434 }
1435 Slice content = block_builder.Finish();
1436 ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
1437 props.data_size);
1438 c.ResetTableReader();
1439 }
1440
1441 #ifdef SNAPPY
IndexUncompressedHelper(bool compressed)1442 uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
1443 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1444 constexpr size_t kNumKeys = 10000;
1445
1446 for (size_t k = 0; k < kNumKeys; ++k) {
1447 c.Add("key" + ToString(k), "val" + ToString(k));
1448 }
1449
1450 std::vector<std::string> keys;
1451 stl_wrappers::KVMap kvmap;
1452 Options options;
1453 options.compression = kSnappyCompression;
1454 options.statistics = CreateDBStatistics();
1455 options.statistics->set_stats_level(StatsLevel::kAll);
1456 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1457 table_options.block_restart_interval = 1;
1458 table_options.enable_index_compression = compressed;
1459 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1460
1461 ImmutableOptions ioptions(options);
1462 MutableCFOptions moptions(options);
1463 c.Finish(options, ioptions, moptions, table_options,
1464 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1465 c.ResetTableReader();
1466 return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
1467 }
TEST_P(BlockBasedTableTest,IndexUncompressed)1468 TEST_P(BlockBasedTableTest, IndexUncompressed) {
1469 uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
1470 uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
1471 // tbl1_compressed_cnt should include 1 index block
1472 EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
1473 }
1474 #endif // SNAPPY
1475
TEST_P(BlockBasedTableTest,BlockBasedTableProperties2)1476 TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1477 TableConstructor c(&reverse_key_comparator);
1478 std::vector<std::string> keys;
1479 stl_wrappers::KVMap kvmap;
1480
1481 {
1482 Options options;
1483 options.compression = CompressionType::kNoCompression;
1484 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1485 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1486
1487 const ImmutableOptions ioptions(options);
1488 const MutableCFOptions moptions(options);
1489 c.Finish(options, ioptions, moptions, table_options,
1490 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1491
1492 auto& props = *c.GetTableReader()->GetTableProperties();
1493
1494 // Default comparator
1495 ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
1496 // No merge operator
1497 ASSERT_EQ("nullptr", props.merge_operator_name);
1498 // No prefix extractor
1499 ASSERT_EQ("nullptr", props.prefix_extractor_name);
1500 // No property collectors
1501 ASSERT_EQ("[]", props.property_collectors_names);
1502 // No filter policy is used
1503 ASSERT_EQ("", props.filter_policy_name);
1504 // Compression type == that set:
1505 ASSERT_EQ("NoCompression", props.compression_name);
1506 c.ResetTableReader();
1507 }
1508
1509 {
1510 Options options;
1511 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1512 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1513 options.comparator = &reverse_key_comparator;
1514 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1515 options.prefix_extractor.reset(NewNoopTransform());
1516 options.table_properties_collector_factories.emplace_back(
1517 new DummyPropertiesCollectorFactory1());
1518 options.table_properties_collector_factories.emplace_back(
1519 new DummyPropertiesCollectorFactory2());
1520
1521 const ImmutableOptions ioptions(options);
1522 const MutableCFOptions moptions(options);
1523 c.Finish(options, ioptions, moptions, table_options,
1524 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1525
1526 auto& props = *c.GetTableReader()->GetTableProperties();
1527
1528 ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
1529 ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
1530 ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1531 ASSERT_EQ(
1532 "[DummyPropertiesCollectorFactory1,DummyPropertiesCollectorFactory2]",
1533 props.property_collectors_names);
1534 ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1535 c.ResetTableReader();
1536 }
1537 }
1538
TEST_P(BlockBasedTableTest,RangeDelBlock)1539 TEST_P(BlockBasedTableTest, RangeDelBlock) {
1540 TableConstructor c(BytewiseComparator());
1541 std::vector<std::string> keys = {"1pika", "2chu"};
1542 std::vector<std::string> vals = {"p", "c"};
1543
1544 std::vector<RangeTombstone> expected_tombstones = {
1545 {"1pika", "2chu", 0},
1546 {"2chu", "c", 1},
1547 {"2chu", "c", 0},
1548 {"c", "p", 0},
1549 };
1550
1551 for (int i = 0; i < 2; i++) {
1552 RangeTombstone t(keys[i], vals[i], i);
1553 std::pair<InternalKey, Slice> p = t.Serialize();
1554 c.Add(p.first.Encode().ToString(), p.second);
1555 }
1556
1557 std::vector<std::string> sorted_keys;
1558 stl_wrappers::KVMap kvmap;
1559 Options options;
1560 options.compression = kNoCompression;
1561 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1562 table_options.block_restart_interval = 1;
1563 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1564
1565 const ImmutableOptions ioptions(options);
1566 const MutableCFOptions moptions(options);
1567 std::unique_ptr<InternalKeyComparator> internal_cmp(
1568 new InternalKeyComparator(options.comparator));
1569 c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
1570 &sorted_keys, &kvmap);
1571
1572 for (int j = 0; j < 2; ++j) {
1573 std::unique_ptr<InternalIterator> iter(
1574 c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
1575 if (j > 0) {
1576 // For second iteration, delete the table reader object and verify the
1577 // iterator can still access its metablock's range tombstones.
1578 c.ResetTableReader();
1579 }
1580 ASSERT_FALSE(iter->Valid());
1581 iter->SeekToFirst();
1582 ASSERT_TRUE(iter->Valid());
1583 for (size_t i = 0; i < expected_tombstones.size(); i++) {
1584 ASSERT_TRUE(iter->Valid());
1585 ParsedInternalKey parsed_key;
1586 ASSERT_OK(
1587 ParseInternalKey(iter->key(), &parsed_key, true /* log_err_key */));
1588 RangeTombstone t(parsed_key, iter->value());
1589 const auto& expected_t = expected_tombstones[i];
1590 ASSERT_EQ(t.start_key_, expected_t.start_key_);
1591 ASSERT_EQ(t.end_key_, expected_t.end_key_);
1592 ASSERT_EQ(t.seq_, expected_t.seq_);
1593 iter->Next();
1594 }
1595 ASSERT_TRUE(!iter->Valid());
1596 }
1597 }
1598
TEST_P(BlockBasedTableTest,FilterPolicyNameProperties)1599 TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1600 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1601 c.Add("a1", "val1");
1602 std::vector<std::string> keys;
1603 stl_wrappers::KVMap kvmap;
1604 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1605 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1606 Options options;
1607 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1608
1609 const ImmutableOptions ioptions(options);
1610 const MutableCFOptions moptions(options);
1611 c.Finish(options, ioptions, moptions, table_options,
1612 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1613 auto& props = *c.GetTableReader()->GetTableProperties();
1614 ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1615 c.ResetTableReader();
1616 }
1617
1618 //
1619 // BlockBasedTableTest::PrefetchTest
1620 //
AssertKeysInCache(BlockBasedTable * table_reader,const std::vector<std::string> & keys_in_cache,const std::vector<std::string> & keys_not_in_cache,bool convert=false)1621 void AssertKeysInCache(BlockBasedTable* table_reader,
1622 const std::vector<std::string>& keys_in_cache,
1623 const std::vector<std::string>& keys_not_in_cache,
1624 bool convert = false) {
1625 if (convert) {
1626 for (auto key : keys_in_cache) {
1627 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1628 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1629 }
1630 for (auto key : keys_not_in_cache) {
1631 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1632 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1633 }
1634 } else {
1635 for (auto key : keys_in_cache) {
1636 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
1637 }
1638 for (auto key : keys_not_in_cache) {
1639 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
1640 }
1641 }
1642 }
1643
PrefetchRange(TableConstructor * c,Options * opt,BlockBasedTableOptions * table_options,const char * key_begin,const char * key_end,const std::vector<std::string> & keys_in_cache,const std::vector<std::string> & keys_not_in_cache,const Status expected_status=Status::OK ())1644 void PrefetchRange(TableConstructor* c, Options* opt,
1645 BlockBasedTableOptions* table_options, const char* key_begin,
1646 const char* key_end,
1647 const std::vector<std::string>& keys_in_cache,
1648 const std::vector<std::string>& keys_not_in_cache,
1649 const Status expected_status = Status::OK()) {
1650 // reset the cache and reopen the table
1651 table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1652 opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
1653 const ImmutableOptions ioptions2(*opt);
1654 const MutableCFOptions moptions(*opt);
1655 ASSERT_OK(c->Reopen(ioptions2, moptions));
1656
1657 // prefetch
1658 auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1659 Status s;
1660 std::unique_ptr<Slice> begin, end;
1661 std::unique_ptr<InternalKey> i_begin, i_end;
1662 if (key_begin != nullptr) {
1663 if (c->ConvertToInternalKey()) {
1664 i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
1665 begin.reset(new Slice(i_begin->Encode()));
1666 } else {
1667 begin.reset(new Slice(key_begin));
1668 }
1669 }
1670 if (key_end != nullptr) {
1671 if (c->ConvertToInternalKey()) {
1672 i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
1673 end.reset(new Slice(i_end->Encode()));
1674 } else {
1675 end.reset(new Slice(key_end));
1676 }
1677 }
1678 s = table_reader->Prefetch(begin.get(), end.get());
1679
1680 ASSERT_TRUE(s.code() == expected_status.code());
1681
1682 // assert our expectation in cache warmup
1683 AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
1684 c->ConvertToInternalKey());
1685 c->ResetTableReader();
1686 }
1687
TEST_P(BlockBasedTableTest,PrefetchTest)1688 TEST_P(BlockBasedTableTest, PrefetchTest) {
1689 // The purpose of this test is to test the prefetching operation built into
1690 // BlockBasedTable.
1691 Options opt;
1692 std::unique_ptr<InternalKeyComparator> ikc;
1693 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
1694 opt.compression = kNoCompression;
1695 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1696 table_options.block_size = 1024;
1697 // big enough so we don't ever lose cached values.
1698 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1699 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
1700
1701 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1702 c.Add("k01", "hello");
1703 c.Add("k02", "hello2");
1704 c.Add("k03", std::string(10000, 'x'));
1705 c.Add("k04", std::string(200000, 'x'));
1706 c.Add("k05", std::string(300000, 'x'));
1707 c.Add("k06", "hello3");
1708 c.Add("k07", std::string(100000, 'x'));
1709 std::vector<std::string> keys;
1710 stl_wrappers::KVMap kvmap;
1711 const ImmutableOptions ioptions(opt);
1712 const MutableCFOptions moptions(opt);
1713 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1714 c.ResetTableReader();
1715
1716 // We get the following data spread :
1717 //
1718 // Data block Index
1719 // ========================
1720 // [ k01 k02 k03 ] k03
1721 // [ k04 ] k04
1722 // [ k05 ] k05
1723 // [ k06 k07 ] k07
1724
1725
1726 // Simple
1727 PrefetchRange(&c, &opt, &table_options,
1728 /*key_range=*/"k01", "k05",
1729 /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
1730 /*keys_not_in_cache=*/{"k06", "k07"});
1731 PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1732 {"k04", "k05", "k06", "k07"});
1733 // odd
1734 PrefetchRange(&c, &opt, &table_options, "a", "z",
1735 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1736 PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1737 {"k04", "k05", "k06", "k07"});
1738 // Edge cases
1739 PrefetchRange(&c, &opt, &table_options, "k00", "k06",
1740 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1741 PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
1742 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1743 // null keys
1744 PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
1745 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1746 PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
1747 {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
1748 PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
1749 {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1750 // invalid
1751 PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1752 Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1753 c.ResetTableReader();
1754 }
1755
TEST_P(BlockBasedTableTest,TotalOrderSeekOnHashIndex)1756 TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
1757 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1758 for (int i = 0; i <= 5; ++i) {
1759 Options options;
1760 // Make each key/value an individual block
1761 table_options.block_size = 64;
1762 switch (i) {
1763 case 0:
1764 // Binary search index
1765 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1766 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1767 break;
1768 case 1:
1769 // Hash search index
1770 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1771 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1772 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1773 break;
1774 case 2:
1775 // Hash search index with hash_index_allow_collision
1776 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1777 table_options.hash_index_allow_collision = true;
1778 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1779 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1780 break;
1781 case 3:
1782 // Hash search index with filter policy
1783 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1784 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1785 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1786 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1787 break;
1788 case 4:
1789 // Two-level index
1790 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1791 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1792 break;
1793 case 5:
1794 // Binary search with first key
1795 table_options.index_type =
1796 BlockBasedTableOptions::kBinarySearchWithFirstKey;
1797 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1798 break;
1799 }
1800
1801 TableConstructor c(BytewiseComparator(),
1802 true /* convert_to_internal_key_ */);
1803 c.Add("aaaa1", std::string('a', 56));
1804 c.Add("bbaa1", std::string('a', 56));
1805 c.Add("cccc1", std::string('a', 56));
1806 c.Add("bbbb1", std::string('a', 56));
1807 c.Add("baaa1", std::string('a', 56));
1808 c.Add("abbb1", std::string('a', 56));
1809 c.Add("cccc2", std::string('a', 56));
1810 std::vector<std::string> keys;
1811 stl_wrappers::KVMap kvmap;
1812 const ImmutableOptions ioptions(options);
1813 const MutableCFOptions moptions(options);
1814 c.Finish(options, ioptions, moptions, table_options,
1815 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1816 auto props = c.GetTableReader()->GetTableProperties();
1817 ASSERT_EQ(7u, props->num_data_blocks);
1818 auto* reader = c.GetTableReader();
1819 ReadOptions ro;
1820 ro.total_order_seek = true;
1821 std::unique_ptr<InternalIterator> iter(reader->NewIterator(
1822 ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1823 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1824
1825 iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
1826 ASSERT_OK(iter->status());
1827 ASSERT_TRUE(iter->Valid());
1828 ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
1829 iter->Next();
1830 ASSERT_OK(iter->status());
1831 ASSERT_TRUE(iter->Valid());
1832 ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1833
1834 iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
1835 ASSERT_OK(iter->status());
1836 ASSERT_TRUE(iter->Valid());
1837 ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1838 iter->Next();
1839 ASSERT_OK(iter->status());
1840 ASSERT_TRUE(iter->Valid());
1841 ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1842
1843 iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
1844 ASSERT_OK(iter->status());
1845 ASSERT_TRUE(iter->Valid());
1846 ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1847 iter->Next();
1848 ASSERT_OK(iter->status());
1849 ASSERT_TRUE(iter->Valid());
1850 ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
1851 }
1852 }
1853
TEST_P(BlockBasedTableTest,NoopTransformSeek)1854 TEST_P(BlockBasedTableTest, NoopTransformSeek) {
1855 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1856 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1857
1858 Options options;
1859 options.comparator = BytewiseComparator();
1860 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1861 options.prefix_extractor.reset(NewNoopTransform());
1862
1863 TableConstructor c(options.comparator);
1864 // To tickle the PrefixMayMatch bug it is important that the
1865 // user-key is a single byte so that the index key exactly matches
1866 // the user-key.
1867 InternalKey key("a", 1, kTypeValue);
1868 c.Add(key.Encode().ToString(), "b");
1869 std::vector<std::string> keys;
1870 stl_wrappers::KVMap kvmap;
1871 const ImmutableOptions ioptions(options);
1872 const MutableCFOptions moptions(options);
1873 const InternalKeyComparator internal_comparator(options.comparator);
1874 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1875 &keys, &kvmap);
1876
1877 auto* reader = c.GetTableReader();
1878 for (int i = 0; i < 2; ++i) {
1879 ReadOptions ro;
1880 ro.total_order_seek = (i == 0);
1881 std::unique_ptr<InternalIterator> iter(reader->NewIterator(
1882 ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1883 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1884
1885 iter->Seek(key.Encode());
1886 ASSERT_OK(iter->status());
1887 ASSERT_TRUE(iter->Valid());
1888 ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
1889 }
1890 }
1891
TEST_P(BlockBasedTableTest,SkipPrefixBloomFilter)1892 TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
1893 // if DB is opened with a prefix extractor of a different name,
1894 // prefix bloom is skipped when read the file
1895 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1896 table_options.filter_policy.reset(NewBloomFilterPolicy(2));
1897 table_options.whole_key_filtering = false;
1898
1899 Options options;
1900 options.comparator = BytewiseComparator();
1901 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1902 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1903
1904 TableConstructor c(options.comparator);
1905 InternalKey key("abcdefghijk", 1, kTypeValue);
1906 c.Add(key.Encode().ToString(), "test");
1907 std::vector<std::string> keys;
1908 stl_wrappers::KVMap kvmap;
1909 const ImmutableOptions ioptions(options);
1910 const MutableCFOptions moptions(options);
1911 const InternalKeyComparator internal_comparator(options.comparator);
1912 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1913 &keys, &kvmap);
1914 // TODO(Zhongyi): update test to use MutableCFOptions
1915 options.prefix_extractor.reset(NewFixedPrefixTransform(9));
1916 const ImmutableOptions new_ioptions(options);
1917 const MutableCFOptions new_moptions(options);
1918 ASSERT_OK(c.Reopen(new_ioptions, new_moptions));
1919 auto reader = c.GetTableReader();
1920 ReadOptions read_options;
1921 std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
1922 read_options, new_moptions.prefix_extractor.get(), /*arena=*/nullptr,
1923 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1924
1925 // Test point lookup
1926 // only one kv
1927 for (auto& kv : kvmap) {
1928 db_iter->Seek(kv.first);
1929 ASSERT_TRUE(db_iter->Valid());
1930 ASSERT_OK(db_iter->status());
1931 ASSERT_EQ(db_iter->key(), kv.first);
1932 ASSERT_EQ(db_iter->value(), kv.second);
1933 }
1934 }
1935
AddInternalKey(TableConstructor * c,const std::string & prefix,std::string value="v",int=800)1936 void AddInternalKey(TableConstructor* c, const std::string& prefix,
1937 std::string value = "v", int /*suffix_len*/ = 800) {
1938 static Random rnd(1023);
1939 InternalKey k(prefix + rnd.RandomString(800), 0, kTypeValue);
1940 c->Add(k.Encode().ToString(), value);
1941 }
1942
IndexTest(BlockBasedTableOptions table_options)1943 void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1944 TableConstructor c(BytewiseComparator());
1945
1946 // keys with prefix length 3, make sure the key/value is big enough to fill
1947 // one block
1948 AddInternalKey(&c, "0015");
1949 AddInternalKey(&c, "0035");
1950
1951 AddInternalKey(&c, "0054");
1952 AddInternalKey(&c, "0055");
1953
1954 AddInternalKey(&c, "0056");
1955 AddInternalKey(&c, "0057");
1956
1957 AddInternalKey(&c, "0058");
1958 AddInternalKey(&c, "0075");
1959
1960 AddInternalKey(&c, "0076");
1961 AddInternalKey(&c, "0095");
1962
1963 std::vector<std::string> keys;
1964 stl_wrappers::KVMap kvmap;
1965 Options options;
1966 options.prefix_extractor.reset(NewFixedPrefixTransform(3));
1967 table_options.block_size = 1700;
1968 table_options.block_cache = NewLRUCache(1024, 4);
1969 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1970
1971 std::unique_ptr<InternalKeyComparator> comparator(
1972 new InternalKeyComparator(BytewiseComparator()));
1973 const ImmutableOptions ioptions(options);
1974 const MutableCFOptions moptions(options);
1975 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
1976 &kvmap);
1977 auto reader = c.GetTableReader();
1978
1979 auto props = reader->GetTableProperties();
1980 ASSERT_EQ(5u, props->num_data_blocks);
1981
1982 // TODO(Zhongyi): update test to use MutableCFOptions
1983 ReadOptions read_options;
1984 std::unique_ptr<InternalIterator> index_iter(reader->NewIterator(
1985 read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1986 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1987
1988 // -- Find keys do not exist, but have common prefix.
1989 std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
1990 std::vector<std::string> lower_bound = {
1991 keys[0], keys[1], keys[2], keys[7], keys[9],
1992 };
1993
1994 // find the lower bound of the prefix
1995 for (size_t i = 0; i < prefixes.size(); ++i) {
1996 index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
1997 ASSERT_OK(index_iter->status());
1998 ASSERT_TRUE(index_iter->Valid());
1999
2000 // seek the first element in the block
2001 ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
2002 ASSERT_EQ("v", index_iter->value().ToString());
2003 }
2004
2005 // find the upper bound of prefixes
2006 std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
2007
2008 // find existing keys
2009 for (const auto& item : kvmap) {
2010 auto ukey = ExtractUserKey(item.first).ToString();
2011 index_iter->Seek(ukey);
2012
2013 // ASSERT_OK(regular_iter->status());
2014 ASSERT_OK(index_iter->status());
2015
2016 // ASSERT_TRUE(regular_iter->Valid());
2017 ASSERT_TRUE(index_iter->Valid());
2018
2019 ASSERT_EQ(item.first, index_iter->key().ToString());
2020 ASSERT_EQ(item.second, index_iter->value().ToString());
2021 }
2022
2023 for (size_t i = 0; i < prefixes.size(); ++i) {
2024 // the key is greater than any existing keys.
2025 auto key = prefixes[i] + "9";
2026 index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
2027
2028 ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound());
2029 ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid());
2030 if (i == prefixes.size() - 1) {
2031 // last key
2032 ASSERT_TRUE(!index_iter->Valid());
2033 } else {
2034 ASSERT_TRUE(index_iter->Valid());
2035 // seek the first element in the block
2036 ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
2037 ASSERT_EQ("v", index_iter->value().ToString());
2038 }
2039 }
2040
2041 // find keys with prefix that don't match any of the existing prefixes.
2042 std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
2043 for (const auto& prefix : non_exist_prefixes) {
2044 index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
2045 // regular_iter->Seek(prefix);
2046
2047 ASSERT_OK(index_iter->status());
2048 // Seek to non-existing prefixes should yield either invalid, or a
2049 // key with prefix greater than the target.
2050 if (index_iter->Valid()) {
2051 Slice ukey = ExtractUserKey(index_iter->key());
2052 Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
2053 ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
2054 }
2055 }
2056 for (const auto& prefix : non_exist_prefixes) {
2057 index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode());
2058 // regular_iter->Seek(prefix);
2059
2060 ASSERT_OK(index_iter->status());
2061 // Seek to non-existing prefixes should yield either invalid, or a
2062 // key with prefix greater than the target.
2063 if (index_iter->Valid()) {
2064 Slice ukey = ExtractUserKey(index_iter->key());
2065 Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
2066 ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0);
2067 }
2068 }
2069
2070 {
2071 // Test reseek case. It should impact partitioned index more.
2072 ReadOptions ro;
2073 ro.total_order_seek = true;
2074 std::unique_ptr<InternalIterator> index_iter2(reader->NewIterator(
2075 ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
2076 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2077
2078 // Things to cover in partitioned index:
2079 // 1. Both of Seek() and SeekToLast() has optimization to prevent
2080 // rereek leaf index block if it remains to the same one, and
2081 // they reuse the same variable.
2082 // 2. When Next() or Prev() is called, the block moves, so the
2083 // optimization should kick in only with the current one.
2084 index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
2085 ASSERT_TRUE(index_iter2->Valid());
2086 ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));
2087
2088 index_iter2->SeekToLast();
2089 ASSERT_TRUE(index_iter2->Valid());
2090 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2091
2092 index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
2093 ASSERT_TRUE(index_iter2->Valid());
2094 ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));
2095
2096 index_iter2->SeekToLast();
2097 ASSERT_TRUE(index_iter2->Valid());
2098 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2099 index_iter2->Prev();
2100 ASSERT_TRUE(index_iter2->Valid());
2101 index_iter2->Prev();
2102 ASSERT_TRUE(index_iter2->Valid());
2103 ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2104
2105 index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
2106 ASSERT_TRUE(index_iter2->Valid());
2107 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2108 index_iter2->Prev();
2109 ASSERT_TRUE(index_iter2->Valid());
2110 index_iter2->Prev();
2111 ASSERT_TRUE(index_iter2->Valid());
2112 ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2113
2114 index_iter2->SeekToLast();
2115 ASSERT_TRUE(index_iter2->Valid());
2116 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2117
2118 index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
2119 ASSERT_TRUE(index_iter2->Valid());
2120 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2121
2122 index_iter2->Prev();
2123 ASSERT_TRUE(index_iter2->Valid());
2124 index_iter2->Prev();
2125 ASSERT_TRUE(index_iter2->Valid());
2126 ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2127
2128 index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode());
2129 ASSERT_TRUE(index_iter2->Valid());
2130 ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2131
2132 index_iter2->Next();
2133 ASSERT_TRUE(index_iter2->Valid());
2134 index_iter2->Next();
2135 ASSERT_TRUE(index_iter2->Valid());
2136 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2137
2138 index_iter2->SeekToLast();
2139 ASSERT_TRUE(index_iter2->Valid());
2140 ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2141 }
2142
2143 c.ResetTableReader();
2144 }
2145
TEST_P(BlockBasedTableTest,BinaryIndexTest)2146 TEST_P(BlockBasedTableTest, BinaryIndexTest) {
2147 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2148 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
2149 IndexTest(table_options);
2150 }
2151
TEST_P(BlockBasedTableTest,HashIndexTest)2152 TEST_P(BlockBasedTableTest, HashIndexTest) {
2153 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2154 table_options.index_type = BlockBasedTableOptions::kHashSearch;
2155 IndexTest(table_options);
2156 }
2157
TEST_P(BlockBasedTableTest,PartitionIndexTest)2158 TEST_P(BlockBasedTableTest, PartitionIndexTest) {
2159 const int max_index_keys = 5;
2160 const int est_max_index_key_value_size = 32;
2161 const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
2162 for (int i = 1; i <= est_max_index_size + 1; i++) {
2163 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2164 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
2165 table_options.metadata_block_size = i;
2166 IndexTest(table_options);
2167 }
2168 }
2169
TEST_P(BlockBasedTableTest,IndexSeekOptimizationIncomplete)2170 TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) {
2171 std::unique_ptr<InternalKeyComparator> comparator(
2172 new InternalKeyComparator(BytewiseComparator()));
2173 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2174 Options options;
2175 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2176 const ImmutableOptions ioptions(options);
2177 const MutableCFOptions moptions(options);
2178
2179 TableConstructor c(BytewiseComparator());
2180 AddInternalKey(&c, "pika");
2181
2182 std::vector<std::string> keys;
2183 stl_wrappers::KVMap kvmap;
2184 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2185 &kvmap);
2186 ASSERT_EQ(1, keys.size());
2187
2188 auto reader = c.GetTableReader();
2189 ReadOptions ropt;
2190 ropt.read_tier = ReadTier::kBlockCacheTier;
2191 std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2192 ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2193 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2194
2195 auto ikey = [](Slice user_key) {
2196 return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2197 };
2198
2199 iter->Seek(ikey("pika"));
2200 ASSERT_FALSE(iter->Valid());
2201 ASSERT_TRUE(iter->status().IsIncomplete());
2202
2203 // This used to crash at some point.
2204 iter->Seek(ikey("pika"));
2205 ASSERT_FALSE(iter->Valid());
2206 ASSERT_TRUE(iter->status().IsIncomplete());
2207 }
2208
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKey1)2209 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) {
2210 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2211 table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
2212 IndexTest(table_options);
2213 }
2214
2215 class CustomFlushBlockPolicy : public FlushBlockPolicyFactory,
2216 public FlushBlockPolicy {
2217 public:
CustomFlushBlockPolicy(std::vector<int> keys_per_block)2218 explicit CustomFlushBlockPolicy(std::vector<int> keys_per_block)
2219 : keys_per_block_(keys_per_block) {}
2220
Name() const2221 const char* Name() const override { return "CustomFlushBlockPolicy"; }
2222
NewFlushBlockPolicy(const BlockBasedTableOptions &,const BlockBuilder &) const2223 FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&,
2224 const BlockBuilder&) const override {
2225 return new CustomFlushBlockPolicy(keys_per_block_);
2226 }
2227
Update(const Slice &,const Slice &)2228 bool Update(const Slice&, const Slice&) override {
2229 if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) {
2230 ++current_block_idx_;
2231 keys_in_current_block_ = 1;
2232 return true;
2233 }
2234
2235 ++keys_in_current_block_;
2236 return false;
2237 }
2238
2239 std::vector<int> keys_per_block_;
2240
2241 int current_block_idx_ = 0;
2242 int keys_in_current_block_ = 0;
2243 };
2244
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKey2)2245 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) {
2246 for (int use_first_key = 0; use_first_key < 2; ++use_first_key) {
2247 SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key));
2248 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2249 table_options.index_type =
2250 use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey
2251 : BlockBasedTableOptions::kBinarySearch;
2252 table_options.block_cache = NewLRUCache(10000); // fits all blocks
2253 table_options.index_shortening =
2254 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
2255 table_options.flush_block_policy_factory =
2256 std::make_shared<CustomFlushBlockPolicy>(std::vector<int>{2, 1, 3, 2});
2257 Options options;
2258 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2259 options.statistics = CreateDBStatistics();
2260 Statistics* stats = options.statistics.get();
2261 std::unique_ptr<InternalKeyComparator> comparator(
2262 new InternalKeyComparator(BytewiseComparator()));
2263 const ImmutableOptions ioptions(options);
2264 const MutableCFOptions moptions(options);
2265
2266 TableConstructor c(BytewiseComparator());
2267
2268 // Block 0.
2269 AddInternalKey(&c, "aaaa", "v0");
2270 AddInternalKey(&c, "aaac", "v1");
2271
2272 // Block 1.
2273 AddInternalKey(&c, "aaca", "v2");
2274
2275 // Block 2.
2276 AddInternalKey(&c, "caaa", "v3");
2277 AddInternalKey(&c, "caac", "v4");
2278 AddInternalKey(&c, "caae", "v5");
2279
2280 // Block 3.
2281 AddInternalKey(&c, "ccaa", "v6");
2282 AddInternalKey(&c, "ccac", "v7");
2283
2284 // Write the file.
2285 std::vector<std::string> keys;
2286 stl_wrappers::KVMap kvmap;
2287 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2288 &kvmap);
2289 ASSERT_EQ(8, keys.size());
2290
2291 auto reader = c.GetTableReader();
2292 auto props = reader->GetTableProperties();
2293 ASSERT_EQ(4u, props->num_data_blocks);
2294 ReadOptions read_options;
2295 std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2296 read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2297 /*skip_filters=*/false, TableReaderCaller::kUncategorized,
2298 /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true));
2299
2300 // Shouldn't have read data blocks before iterator is seeked.
2301 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2302 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2303
2304 auto ikey = [](Slice user_key) {
2305 return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2306 };
2307
2308 // Seek to a key between blocks. If index contains first key, we shouldn't
2309 // read any data blocks until value is requested.
2310 iter->Seek(ikey("aaba"));
2311 ASSERT_TRUE(iter->Valid());
2312 EXPECT_EQ(keys[2], iter->key().ToString());
2313 EXPECT_EQ(use_first_key ? 0 : 1,
2314 stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2315 ASSERT_TRUE(iter->PrepareValue());
2316 EXPECT_EQ("v2", iter->value().ToString());
2317 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2318 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2319
2320 // Seek to the middle of a block. The block should be read right away.
2321 iter->Seek(ikey("caab"));
2322 ASSERT_TRUE(iter->Valid());
2323 EXPECT_EQ(keys[4], iter->key().ToString());
2324 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2325 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2326 ASSERT_TRUE(iter->PrepareValue());
2327 EXPECT_EQ("v4", iter->value().ToString());
2328 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2329
2330 // Seek to just before the same block and don't access value.
2331 // The iterator should keep pinning the block contents.
2332 iter->Seek(ikey("baaa"));
2333 ASSERT_TRUE(iter->Valid());
2334 EXPECT_EQ(keys[3], iter->key().ToString());
2335 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2336
2337 // Seek to the same block again to check that the block is still pinned.
2338 iter->Seek(ikey("caae"));
2339 ASSERT_TRUE(iter->Valid());
2340 EXPECT_EQ(keys[5], iter->key().ToString());
2341 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2342 ASSERT_TRUE(iter->PrepareValue());
2343 EXPECT_EQ("v5", iter->value().ToString());
2344 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2345 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2346
2347 // Step forward and fall through to the next block. Don't access value.
2348 iter->Next();
2349 ASSERT_TRUE(iter->Valid());
2350 EXPECT_EQ(keys[6], iter->key().ToString());
2351 EXPECT_EQ(use_first_key ? 2 : 3,
2352 stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2353 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2354
2355 // Step forward again. Block should be read.
2356 iter->Next();
2357 ASSERT_TRUE(iter->Valid());
2358 EXPECT_EQ(keys[7], iter->key().ToString());
2359 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2360 ASSERT_TRUE(iter->PrepareValue());
2361 EXPECT_EQ("v7", iter->value().ToString());
2362 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2363
2364 // Step forward and reach the end.
2365 iter->Next();
2366 EXPECT_FALSE(iter->Valid());
2367 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2368 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2369
2370 // Seek to a single-key block and step forward without accessing value.
2371 iter->Seek(ikey("aaca"));
2372 ASSERT_TRUE(iter->Valid());
2373 EXPECT_EQ(keys[2], iter->key().ToString());
2374 EXPECT_EQ(use_first_key ? 0 : 1,
2375 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2376
2377 iter->Next();
2378 ASSERT_TRUE(iter->Valid());
2379 EXPECT_EQ(keys[3], iter->key().ToString());
2380 EXPECT_EQ(use_first_key ? 1 : 2,
2381 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2382 ASSERT_TRUE(iter->PrepareValue());
2383 EXPECT_EQ("v3", iter->value().ToString());
2384 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2385 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2386
2387 // Seek between blocks and step back without accessing value.
2388 iter->Seek(ikey("aaca"));
2389 ASSERT_TRUE(iter->Valid());
2390 EXPECT_EQ(keys[2], iter->key().ToString());
2391 EXPECT_EQ(use_first_key ? 2 : 3,
2392 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2393 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2394
2395 iter->Prev();
2396 ASSERT_TRUE(iter->Valid());
2397 EXPECT_EQ(keys[1], iter->key().ToString());
2398 EXPECT_EQ(use_first_key ? 2 : 3,
2399 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2400 // All blocks are in cache now, there'll be no more misses ever.
2401 EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2402 ASSERT_TRUE(iter->PrepareValue());
2403 EXPECT_EQ("v1", iter->value().ToString());
2404
2405 // Next into the next block again.
2406 iter->Next();
2407 ASSERT_TRUE(iter->Valid());
2408 EXPECT_EQ(keys[2], iter->key().ToString());
2409 EXPECT_EQ(use_first_key ? 2 : 4,
2410 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2411
2412 // Seek to first and step back without accessing value.
2413 iter->SeekToFirst();
2414 ASSERT_TRUE(iter->Valid());
2415 EXPECT_EQ(keys[0], iter->key().ToString());
2416 EXPECT_EQ(use_first_key ? 2 : 5,
2417 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2418
2419 iter->Prev();
2420 EXPECT_FALSE(iter->Valid());
2421 EXPECT_EQ(use_first_key ? 2 : 5,
2422 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2423
2424 // Do some SeekForPrev() and SeekToLast() just to cover all methods.
2425 iter->SeekForPrev(ikey("caad"));
2426 ASSERT_TRUE(iter->Valid());
2427 EXPECT_EQ(keys[4], iter->key().ToString());
2428 EXPECT_EQ(use_first_key ? 3 : 6,
2429 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2430 ASSERT_TRUE(iter->PrepareValue());
2431 EXPECT_EQ("v4", iter->value().ToString());
2432 EXPECT_EQ(use_first_key ? 3 : 6,
2433 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2434
2435 iter->SeekToLast();
2436 ASSERT_TRUE(iter->Valid());
2437 EXPECT_EQ(keys[7], iter->key().ToString());
2438 EXPECT_EQ(use_first_key ? 4 : 7,
2439 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2440 ASSERT_TRUE(iter->PrepareValue());
2441 EXPECT_EQ("v7", iter->value().ToString());
2442 EXPECT_EQ(use_first_key ? 4 : 7,
2443 stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2444
2445 EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2446
2447 c.ResetTableReader();
2448 }
2449 }
2450
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKeyGlobalSeqno)2451 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) {
2452 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2453 table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
2454 table_options.block_cache = NewLRUCache(10000);
2455 Options options;
2456 options.statistics = CreateDBStatistics();
2457 Statistics* stats = options.statistics.get();
2458 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2459 std::unique_ptr<InternalKeyComparator> comparator(
2460 new InternalKeyComparator(BytewiseComparator()));
2461 const ImmutableOptions ioptions(options);
2462 const MutableCFOptions moptions(options);
2463
2464 TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false,
2465 /* level */ -1, /* largest_seqno */ 42);
2466
2467 c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x");
2468 c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y");
2469
2470 std::vector<std::string> keys;
2471 stl_wrappers::KVMap kvmap;
2472 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2473 &kvmap);
2474 ASSERT_EQ(2, keys.size());
2475
2476 auto reader = c.GetTableReader();
2477 auto props = reader->GetTableProperties();
2478 ASSERT_EQ(1u, props->num_data_blocks);
2479 ReadOptions read_options;
2480 std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2481 read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2482 /*skip_filters=*/false, TableReaderCaller::kUncategorized,
2483 /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true));
2484
2485 iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString());
2486 ASSERT_TRUE(iter->Valid());
2487 EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
2488 iter->key().ToString());
2489 EXPECT_NE(keys[0], iter->key().ToString());
2490 // Key should have been served from index, without reading data blocks.
2491 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2492
2493 ASSERT_TRUE(iter->PrepareValue());
2494 EXPECT_EQ("x", iter->value().ToString());
2495 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2496 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2497 EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
2498 iter->key().ToString());
2499
2500 c.ResetTableReader();
2501 }
2502
2503 // It's very hard to figure out the index block size of a block accurately.
2504 // To make sure we get the index size, we just make sure as key number
2505 // grows, the filter block size also grows.
TEST_P(BlockBasedTableTest,IndexSizeStat)2506 TEST_P(BlockBasedTableTest, IndexSizeStat) {
2507 uint64_t last_index_size = 0;
2508
2509 // we need to use random keys since the pure human readable texts
2510 // may be well compressed, resulting insignifcant change of index
2511 // block size.
2512 Random rnd(test::RandomSeed());
2513 std::vector<std::string> keys;
2514
2515 for (int i = 0; i < 100; ++i) {
2516 keys.push_back(rnd.RandomString(10000));
2517 }
2518
2519 // Each time we load one more key to the table. the table index block
2520 // size is expected to be larger than last time's.
2521 for (size_t i = 1; i < keys.size(); ++i) {
2522 TableConstructor c(BytewiseComparator(),
2523 true /* convert_to_internal_key_ */);
2524 for (size_t j = 0; j < i; ++j) {
2525 c.Add(keys[j], "val");
2526 }
2527
2528 std::vector<std::string> ks;
2529 stl_wrappers::KVMap kvmap;
2530 Options options;
2531 options.compression = kNoCompression;
2532 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2533 table_options.block_restart_interval = 1;
2534 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2535
2536 const ImmutableOptions ioptions(options);
2537 const MutableCFOptions moptions(options);
2538 c.Finish(options, ioptions, moptions, table_options,
2539 GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2540 auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
2541 ASSERT_GT(index_size, last_index_size);
2542 last_index_size = index_size;
2543 c.ResetTableReader();
2544 }
2545 }
2546
TEST_P(BlockBasedTableTest,NumBlockStat)2547 TEST_P(BlockBasedTableTest, NumBlockStat) {
2548 Random rnd(test::RandomSeed());
2549 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2550 Options options;
2551 options.compression = kNoCompression;
2552 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2553 table_options.block_restart_interval = 1;
2554 table_options.block_size = 1000;
2555 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2556
2557 for (int i = 0; i < 10; ++i) {
2558 // the key/val are slightly smaller than block size, so that each block
2559 // holds roughly one key/value pair.
2560 c.Add(rnd.RandomString(900), "val");
2561 }
2562
2563 std::vector<std::string> ks;
2564 stl_wrappers::KVMap kvmap;
2565 const ImmutableOptions ioptions(options);
2566 const MutableCFOptions moptions(options);
2567 c.Finish(options, ioptions, moptions, table_options,
2568 GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2569 ASSERT_EQ(kvmap.size(),
2570 c.GetTableReader()->GetTableProperties()->num_data_blocks);
2571 c.ResetTableReader();
2572 }
2573
TEST_P(BlockBasedTableTest,TracingGetTest)2574 TEST_P(BlockBasedTableTest, TracingGetTest) {
2575 TableConstructor c(BytewiseComparator());
2576 Options options;
2577 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2578 options.create_if_missing = true;
2579 table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2580 table_options.cache_index_and_filter_blocks = true;
2581 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2582 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2583 SetupTracingTest(&c);
2584 std::vector<std::string> keys;
2585 stl_wrappers::KVMap kvmap;
2586 ImmutableOptions ioptions(options);
2587 MutableCFOptions moptions(options);
2588 c.Finish(options, ioptions, moptions, table_options,
2589 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2590 std::string user_key = "k01";
2591 InternalKey internal_key(user_key, 0, kTypeValue);
2592 std::string encoded_key = internal_key.Encode().ToString();
2593 for (uint32_t i = 1; i <= 2; i++) {
2594 PinnableSlice value;
2595 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2596 GetContext::kNotFound, user_key, &value, nullptr,
2597 nullptr, true, nullptr, nullptr, nullptr, nullptr,
2598 nullptr, nullptr, /*tracing_get_id=*/i);
2599 get_perf_context()->Reset();
2600 ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context,
2601 moptions.prefix_extractor.get()));
2602 ASSERT_EQ(get_context.State(), GetContext::kFound);
2603 ASSERT_EQ(value.ToString(), kDummyValue);
2604 }
2605
2606 // Verify traces.
2607 std::vector<BlockCacheTraceRecord> expected_records;
2608 // The first two records should be prefetching index and filter blocks.
2609 BlockCacheTraceRecord record;
2610 record.block_type = TraceType::kBlockTraceIndexBlock;
2611 record.caller = TableReaderCaller::kPrefetch;
2612 record.is_cache_hit = Boolean::kFalse;
2613 record.no_insert = Boolean::kFalse;
2614 expected_records.push_back(record);
2615 record.block_type = TraceType::kBlockTraceFilterBlock;
2616 expected_records.push_back(record);
2617 // Then we should have three records for one index, one filter, and one data
2618 // block access.
2619 record.get_id = 1;
2620 record.block_type = TraceType::kBlockTraceIndexBlock;
2621 record.caller = TableReaderCaller::kUserGet;
2622 record.get_from_user_specified_snapshot = Boolean::kFalse;
2623 record.referenced_key = encoded_key;
2624 record.referenced_key_exist_in_block = Boolean::kTrue;
2625 record.is_cache_hit = Boolean::kTrue;
2626 expected_records.push_back(record);
2627 record.block_type = TraceType::kBlockTraceFilterBlock;
2628 expected_records.push_back(record);
2629 record.is_cache_hit = Boolean::kFalse;
2630 record.block_type = TraceType::kBlockTraceDataBlock;
2631 expected_records.push_back(record);
2632 // The second get should all observe cache hits.
2633 record.is_cache_hit = Boolean::kTrue;
2634 record.get_id = 2;
2635 record.block_type = TraceType::kBlockTraceIndexBlock;
2636 record.caller = TableReaderCaller::kUserGet;
2637 record.get_from_user_specified_snapshot = Boolean::kFalse;
2638 record.referenced_key = encoded_key;
2639 expected_records.push_back(record);
2640 record.block_type = TraceType::kBlockTraceFilterBlock;
2641 expected_records.push_back(record);
2642 record.block_type = TraceType::kBlockTraceDataBlock;
2643 expected_records.push_back(record);
2644 VerifyBlockAccessTrace(&c, expected_records);
2645 c.ResetTableReader();
2646 }
2647
TEST_P(BlockBasedTableTest,TracingApproximateOffsetOfTest)2648 TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) {
2649 TableConstructor c(BytewiseComparator());
2650 Options options;
2651 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2652 options.create_if_missing = true;
2653 table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2654 table_options.cache_index_and_filter_blocks = true;
2655 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2656 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2657 SetupTracingTest(&c);
2658 std::vector<std::string> keys;
2659 stl_wrappers::KVMap kvmap;
2660 ImmutableOptions ioptions(options);
2661 MutableCFOptions moptions(options);
2662 c.Finish(options, ioptions, moptions, table_options,
2663 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2664 for (uint32_t i = 1; i <= 2; i++) {
2665 std::string user_key = "k01";
2666 InternalKey internal_key(user_key, 0, kTypeValue);
2667 std::string encoded_key = internal_key.Encode().ToString();
2668 c.GetTableReader()->ApproximateOffsetOf(
2669 encoded_key, TableReaderCaller::kUserApproximateSize);
2670 }
2671 // Verify traces.
2672 std::vector<BlockCacheTraceRecord> expected_records;
2673 // The first two records should be prefetching index and filter blocks.
2674 BlockCacheTraceRecord record;
2675 record.block_type = TraceType::kBlockTraceIndexBlock;
2676 record.caller = TableReaderCaller::kPrefetch;
2677 record.is_cache_hit = Boolean::kFalse;
2678 record.no_insert = Boolean::kFalse;
2679 expected_records.push_back(record);
2680 record.block_type = TraceType::kBlockTraceFilterBlock;
2681 expected_records.push_back(record);
2682 // Then we should have two records for only index blocks.
2683 record.block_type = TraceType::kBlockTraceIndexBlock;
2684 record.caller = TableReaderCaller::kUserApproximateSize;
2685 record.is_cache_hit = Boolean::kTrue;
2686 expected_records.push_back(record);
2687 expected_records.push_back(record);
2688 VerifyBlockAccessTrace(&c, expected_records);
2689 c.ResetTableReader();
2690 }
2691
TEST_P(BlockBasedTableTest,TracingIterator)2692 TEST_P(BlockBasedTableTest, TracingIterator) {
2693 TableConstructor c(BytewiseComparator());
2694 Options options;
2695 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2696 options.create_if_missing = true;
2697 table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2698 table_options.cache_index_and_filter_blocks = true;
2699 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2700 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2701 SetupTracingTest(&c);
2702 std::vector<std::string> keys;
2703 stl_wrappers::KVMap kvmap;
2704 ImmutableOptions ioptions(options);
2705 MutableCFOptions moptions(options);
2706 c.Finish(options, ioptions, moptions, table_options,
2707 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2708
2709 for (uint32_t i = 1; i <= 2; i++) {
2710 ReadOptions read_options;
2711 std::unique_ptr<InternalIterator> iter(c.GetTableReader()->NewIterator(
2712 read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
2713 /*skip_filters=*/false, TableReaderCaller::kUserIterator));
2714 iter->SeekToFirst();
2715 while (iter->Valid()) {
2716 iter->key();
2717 iter->value();
2718 iter->Next();
2719 }
2720 ASSERT_OK(iter->status());
2721 iter.reset();
2722 }
2723
2724 // Verify traces.
2725 std::vector<BlockCacheTraceRecord> expected_records;
2726 // The first two records should be prefetching index and filter blocks.
2727 BlockCacheTraceRecord record;
2728 record.block_type = TraceType::kBlockTraceIndexBlock;
2729 record.caller = TableReaderCaller::kPrefetch;
2730 record.is_cache_hit = Boolean::kFalse;
2731 record.no_insert = Boolean::kFalse;
2732 expected_records.push_back(record);
2733 record.block_type = TraceType::kBlockTraceFilterBlock;
2734 expected_records.push_back(record);
2735 // Then we should have three records for index and two data block access.
2736 record.block_type = TraceType::kBlockTraceIndexBlock;
2737 record.caller = TableReaderCaller::kUserIterator;
2738 record.is_cache_hit = Boolean::kTrue;
2739 expected_records.push_back(record);
2740 record.block_type = TraceType::kBlockTraceDataBlock;
2741 record.is_cache_hit = Boolean::kFalse;
2742 expected_records.push_back(record);
2743 expected_records.push_back(record);
2744 // When we iterate this file for the second time, we should observe all cache
2745 // hits.
2746 record.block_type = TraceType::kBlockTraceIndexBlock;
2747 record.is_cache_hit = Boolean::kTrue;
2748 expected_records.push_back(record);
2749 record.block_type = TraceType::kBlockTraceDataBlock;
2750 expected_records.push_back(record);
2751 expected_records.push_back(record);
2752 VerifyBlockAccessTrace(&c, expected_records);
2753 c.ResetTableReader();
2754 }
2755
2756 // A simple tool that takes the snapshot of block cache statistics.
2757 class BlockCachePropertiesSnapshot {
2758 public:
BlockCachePropertiesSnapshot(Statistics * statistics)2759 explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
2760 block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
2761 block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
2762 index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
2763 index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
2764 data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
2765 data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
2766 filter_block_cache_miss =
2767 statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
2768 filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
2769 block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
2770 block_cache_bytes_write =
2771 statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
2772 }
2773
AssertIndexBlockStat(int64_t expected_index_block_cache_miss,int64_t expected_index_block_cache_hit)2774 void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
2775 int64_t expected_index_block_cache_hit) {
2776 ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
2777 ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2778 }
2779
AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,int64_t expected_filter_block_cache_hit)2780 void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
2781 int64_t expected_filter_block_cache_hit) {
2782 ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
2783 ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
2784 }
2785
2786 // Check if the fetched props matches the expected ones.
2787 // TODO(kailiu) Use this only when you disabled filter policy!
AssertEqual(int64_t expected_index_block_cache_miss,int64_t expected_index_block_cache_hit,int64_t expected_data_block_cache_miss,int64_t expected_data_block_cache_hit) const2788 void AssertEqual(int64_t expected_index_block_cache_miss,
2789 int64_t expected_index_block_cache_hit,
2790 int64_t expected_data_block_cache_miss,
2791 int64_t expected_data_block_cache_hit) const {
2792 ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
2793 ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2794 ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
2795 ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
2796 ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
2797 block_cache_miss);
2798 ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
2799 block_cache_hit);
2800 }
2801
GetCacheBytesRead()2802 int64_t GetCacheBytesRead() { return block_cache_bytes_read; }
2803
GetCacheBytesWrite()2804 int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }
2805
2806 private:
2807 int64_t block_cache_miss = 0;
2808 int64_t block_cache_hit = 0;
2809 int64_t index_block_cache_miss = 0;
2810 int64_t index_block_cache_hit = 0;
2811 int64_t data_block_cache_miss = 0;
2812 int64_t data_block_cache_hit = 0;
2813 int64_t filter_block_cache_miss = 0;
2814 int64_t filter_block_cache_hit = 0;
2815 int64_t block_cache_bytes_read = 0;
2816 int64_t block_cache_bytes_write = 0;
2817 };
2818
2819 // Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
2820 // use block cache to store them).
TEST_P(BlockBasedTableTest,BlockCacheDisabledTest)2821 TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
2822 Options options;
2823 options.create_if_missing = true;
2824 options.statistics = CreateDBStatistics();
2825 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2826 table_options.block_cache = NewLRUCache(1024, 4);
2827 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
2828 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2829 std::vector<std::string> keys;
2830 stl_wrappers::KVMap kvmap;
2831
2832 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2833 c.Add("key", "value");
2834 const ImmutableOptions ioptions(options);
2835 const MutableCFOptions moptions(options);
2836 c.Finish(options, ioptions, moptions, table_options,
2837 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2838
2839 // preloading filter/index blocks is enabled.
2840 auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2841 ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2842 ASSERT_FALSE(reader->TEST_IndexBlockInCache());
2843
2844 {
2845 // nothing happens in the beginning
2846 BlockCachePropertiesSnapshot props(options.statistics.get());
2847 props.AssertIndexBlockStat(0, 0);
2848 props.AssertFilterBlockStat(0, 0);
2849 }
2850
2851 {
2852 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2853 GetContext::kNotFound, Slice(), nullptr, nullptr,
2854 nullptr, true, nullptr, nullptr);
2855 // a hack that just to trigger BlockBasedTable::GetFilter.
2856 ASSERT_OK(reader->Get(ReadOptions(), "non-exist-key", &get_context,
2857 moptions.prefix_extractor.get()));
2858 BlockCachePropertiesSnapshot props(options.statistics.get());
2859 props.AssertIndexBlockStat(0, 0);
2860 props.AssertFilterBlockStat(0, 0);
2861 }
2862 }
2863
2864 // Due to the difficulities of the intersaction between statistics, this test
2865 // only tests the case when "index block is put to block cache"
TEST_P(BlockBasedTableTest,FilterBlockInBlockCache)2866 TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
2867 // -- Table construction
2868 Options options;
2869 options.create_if_missing = true;
2870 options.statistics = CreateDBStatistics();
2871
2872 // Enable the cache for index/filter blocks
2873 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2874 LRUCacheOptions co;
2875 co.capacity = 2048;
2876 co.num_shard_bits = 2;
2877 co.metadata_charge_policy = kDontChargeCacheMetadata;
2878 table_options.block_cache = NewLRUCache(co);
2879 table_options.cache_index_and_filter_blocks = true;
2880 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2881 std::vector<std::string> keys;
2882 stl_wrappers::KVMap kvmap;
2883
2884 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2885 c.Add("key", "value");
2886 const ImmutableOptions ioptions(options);
2887 const MutableCFOptions moptions(options);
2888 c.Finish(options, ioptions, moptions, table_options,
2889 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2890 // preloading filter/index blocks is prohibited.
2891 auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2892 ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2893 ASSERT_TRUE(reader->TEST_IndexBlockInCache());
2894
2895 // -- PART 1: Open with regular block cache.
2896 // Since block_cache is disabled, no cache activities will be involved.
2897 std::unique_ptr<InternalIterator> iter;
2898
2899 int64_t last_cache_bytes_read = 0;
2900 // At first, no block will be accessed.
2901 {
2902 BlockCachePropertiesSnapshot props(options.statistics.get());
2903 // index will be added to block cache.
2904 props.AssertEqual(1, // index block miss
2905 0, 0, 0);
2906 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2907 ASSERT_EQ(props.GetCacheBytesWrite(),
2908 static_cast<int64_t>(table_options.block_cache->GetUsage()));
2909 last_cache_bytes_read = props.GetCacheBytesRead();
2910 }
2911
2912 // Only index block will be accessed
2913 {
2914 iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2915 BlockCachePropertiesSnapshot props(options.statistics.get());
2916 // NOTE: to help better highlight the "detla" of each ticker, I use
2917 // <last_value> + <added_value> to indicate the increment of changed
2918 // value; other numbers remain the same.
2919 props.AssertEqual(1, 0 + 1, // index block hit
2920 0, 0);
2921 // Cache hit, bytes read from cache should increase
2922 ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2923 ASSERT_EQ(props.GetCacheBytesWrite(),
2924 static_cast<int64_t>(table_options.block_cache->GetUsage()));
2925 last_cache_bytes_read = props.GetCacheBytesRead();
2926 }
2927
2928 // Only data block will be accessed
2929 {
2930 iter->SeekToFirst();
2931 ASSERT_OK(iter->status());
2932 BlockCachePropertiesSnapshot props(options.statistics.get());
2933 props.AssertEqual(1, 1, 0 + 1, // data block miss
2934 0);
2935 // Cache miss, Bytes read from cache should not change
2936 ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
2937 ASSERT_EQ(props.GetCacheBytesWrite(),
2938 static_cast<int64_t>(table_options.block_cache->GetUsage()));
2939 last_cache_bytes_read = props.GetCacheBytesRead();
2940 }
2941
2942 // Data block will be in cache
2943 {
2944 iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2945 iter->SeekToFirst();
2946 ASSERT_OK(iter->status());
2947 BlockCachePropertiesSnapshot props(options.statistics.get());
2948 props.AssertEqual(1, 1 + 1, /* index block hit */
2949 1, 0 + 1 /* data block hit */);
2950 // Cache hit, bytes read from cache should increase
2951 ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2952 ASSERT_EQ(props.GetCacheBytesWrite(),
2953 static_cast<int64_t>(table_options.block_cache->GetUsage()));
2954 }
2955 // release the iterator so that the block cache can reset correctly.
2956 iter.reset();
2957
2958 c.ResetTableReader();
2959
2960 // -- PART 2: Open with very small block cache
2961 // In this test, no block will ever get hit since the block cache is
2962 // too small to fit even one entry.
2963 table_options.block_cache = NewLRUCache(1, 4);
2964 options.statistics = CreateDBStatistics();
2965 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2966 const ImmutableOptions ioptions2(options);
2967 const MutableCFOptions moptions2(options);
2968 ASSERT_OK(c.Reopen(ioptions2, moptions2));
2969 {
2970 BlockCachePropertiesSnapshot props(options.statistics.get());
2971 props.AssertEqual(1, // index block miss
2972 0, 0, 0);
2973 // Cache miss, Bytes read from cache should not change
2974 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2975 }
2976
2977 {
2978 // Both index and data block get accessed.
2979 // It first cache index block then data block. But since the cache size
2980 // is only 1, index block will be purged after data block is inserted.
2981 iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2982 BlockCachePropertiesSnapshot props(options.statistics.get());
2983 props.AssertEqual(1 + 1, // index block miss
2984 0, 0, // data block miss
2985 0);
2986 // Cache hit, bytes read from cache should increase
2987 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2988 }
2989
2990 {
2991 // SeekToFirst() accesses data block. With similar reason, we expect data
2992 // block's cache miss.
2993 iter->SeekToFirst();
2994 ASSERT_OK(iter->status());
2995 BlockCachePropertiesSnapshot props(options.statistics.get());
2996 props.AssertEqual(2, 0, 0 + 1, // data block miss
2997 0);
2998 // Cache miss, Bytes read from cache should not change
2999 ASSERT_EQ(props.GetCacheBytesRead(), 0);
3000 }
3001 iter.reset();
3002 c.ResetTableReader();
3003
3004 // -- PART 3: Open table with bloom filter enabled but not in SST file
3005 table_options.block_cache = NewLRUCache(4096, 4);
3006 table_options.cache_index_and_filter_blocks = false;
3007 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3008
3009 TableConstructor c3(BytewiseComparator());
3010 std::string user_key = "k01";
3011 InternalKey internal_key(user_key, 0, kTypeValue);
3012 c3.Add(internal_key.Encode().ToString(), "hello");
3013 ImmutableOptions ioptions3(options);
3014 MutableCFOptions moptions3(options);
3015 // Generate table without filter policy
3016 c3.Finish(options, ioptions3, moptions3, table_options,
3017 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3018 c3.ResetTableReader();
3019
3020 // Open table with filter policy
3021 table_options.filter_policy.reset(NewBloomFilterPolicy(1));
3022 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3023 options.statistics = CreateDBStatistics();
3024 ImmutableOptions ioptions4(options);
3025 MutableCFOptions moptions4(options);
3026 ASSERT_OK(c3.Reopen(ioptions4, moptions4));
3027 reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
3028 ASSERT_FALSE(reader->TEST_FilterBlockInCache());
3029 PinnableSlice value;
3030 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3031 GetContext::kNotFound, user_key, &value, nullptr,
3032 nullptr, true, nullptr, nullptr);
3033 ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
3034 moptions4.prefix_extractor.get()));
3035 ASSERT_STREQ(value.data(), "hello");
3036 BlockCachePropertiesSnapshot props(options.statistics.get());
3037 props.AssertFilterBlockStat(0, 0);
3038 c3.ResetTableReader();
3039 }
3040
ValidateBlockSizeDeviation(int value,int expected)3041 void ValidateBlockSizeDeviation(int value, int expected) {
3042 BlockBasedTableOptions table_options;
3043 table_options.block_size_deviation = value;
3044 BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
3045
3046 const BlockBasedTableOptions* normalized_table_options =
3047 factory->GetOptions<BlockBasedTableOptions>();
3048 ASSERT_EQ(normalized_table_options->block_size_deviation, expected);
3049
3050 delete factory;
3051 }
3052
ValidateBlockRestartInterval(int value,int expected)3053 void ValidateBlockRestartInterval(int value, int expected) {
3054 BlockBasedTableOptions table_options;
3055 table_options.block_restart_interval = value;
3056 BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
3057
3058 const BlockBasedTableOptions* normalized_table_options =
3059 factory->GetOptions<BlockBasedTableOptions>();
3060 ASSERT_EQ(normalized_table_options->block_restart_interval, expected);
3061
3062 delete factory;
3063 }
3064
TEST_P(BlockBasedTableTest,InvalidOptions)3065 TEST_P(BlockBasedTableTest, InvalidOptions) {
3066 // invalid values for block_size_deviation (<0 or >100) are silently set to 0
3067 ValidateBlockSizeDeviation(-10, 0);
3068 ValidateBlockSizeDeviation(-1, 0);
3069 ValidateBlockSizeDeviation(0, 0);
3070 ValidateBlockSizeDeviation(1, 1);
3071 ValidateBlockSizeDeviation(99, 99);
3072 ValidateBlockSizeDeviation(100, 100);
3073 ValidateBlockSizeDeviation(101, 0);
3074 ValidateBlockSizeDeviation(1000, 0);
3075
3076 // invalid values for block_restart_interval (<1) are silently set to 1
3077 ValidateBlockRestartInterval(-10, 1);
3078 ValidateBlockRestartInterval(-1, 1);
3079 ValidateBlockRestartInterval(0, 1);
3080 ValidateBlockRestartInterval(1, 1);
3081 ValidateBlockRestartInterval(2, 2);
3082 ValidateBlockRestartInterval(1000, 1000);
3083 }
3084
TEST_P(BlockBasedTableTest,BlockReadCountTest)3085 TEST_P(BlockBasedTableTest, BlockReadCountTest) {
3086 // bloom_filter_type = 0 -- block-based filter
3087 // bloom_filter_type = 0 -- full filter
3088 for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
3089 for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
3090 ++index_and_filter_in_cache) {
3091 Options options;
3092 options.create_if_missing = true;
3093
3094 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3095 table_options.block_cache = NewLRUCache(1, 0);
3096 table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
3097 table_options.filter_policy.reset(
3098 NewBloomFilterPolicy(10, bloom_filter_type == 0));
3099 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3100 std::vector<std::string> keys;
3101 stl_wrappers::KVMap kvmap;
3102
3103 TableConstructor c(BytewiseComparator());
3104 std::string user_key = "k04";
3105 InternalKey internal_key(user_key, 0, kTypeValue);
3106 std::string encoded_key = internal_key.Encode().ToString();
3107 c.Add(encoded_key, "hello");
3108 ImmutableOptions ioptions(options);
3109 MutableCFOptions moptions(options);
3110 // Generate table with filter policy
3111 c.Finish(options, ioptions, moptions, table_options,
3112 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3113 auto reader = c.GetTableReader();
3114 PinnableSlice value;
3115 {
3116 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3117 GetContext::kNotFound, user_key, &value, nullptr,
3118 nullptr, true, nullptr, nullptr);
3119 get_perf_context()->Reset();
3120 ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
3121 moptions.prefix_extractor.get()));
3122 if (index_and_filter_in_cache) {
3123 // data, index and filter block
3124 ASSERT_EQ(get_perf_context()->block_read_count, 3);
3125 ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
3126 ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3127 } else {
3128 // just the data block
3129 ASSERT_EQ(get_perf_context()->block_read_count, 1);
3130 }
3131 ASSERT_EQ(get_context.State(), GetContext::kFound);
3132 ASSERT_STREQ(value.data(), "hello");
3133 }
3134
3135 // Get non-existing key
3136 user_key = "does-not-exist";
3137 internal_key = InternalKey(user_key, 0, kTypeValue);
3138 encoded_key = internal_key.Encode().ToString();
3139
3140 value.Reset();
3141 {
3142 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3143 GetContext::kNotFound, user_key, &value, nullptr,
3144 nullptr, true, nullptr, nullptr);
3145 get_perf_context()->Reset();
3146 ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
3147 moptions.prefix_extractor.get()));
3148 ASSERT_EQ(get_context.State(), GetContext::kNotFound);
3149 }
3150
3151 if (index_and_filter_in_cache) {
3152 if (bloom_filter_type == 0) {
3153 // with block-based, we read index and then the filter
3154 ASSERT_EQ(get_perf_context()->block_read_count, 2);
3155 ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
3156 ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3157 } else {
3158 // with full-filter, we read filter first and then we stop
3159 ASSERT_EQ(get_perf_context()->block_read_count, 1);
3160 ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3161 }
3162 } else {
3163 // filter is already in memory and it figures out that the key doesn't
3164 // exist
3165 ASSERT_EQ(get_perf_context()->block_read_count, 0);
3166 }
3167 }
3168 }
3169 }
3170
TEST_P(BlockBasedTableTest,BlockCacheLeak)3171 TEST_P(BlockBasedTableTest, BlockCacheLeak) {
3172 // Check that when we reopen a table we don't lose access to blocks already
3173 // in the cache. This test checks whether the Table actually makes use of the
3174 // unique ID from the file.
3175
3176 Options opt;
3177 std::unique_ptr<InternalKeyComparator> ikc;
3178 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
3179 opt.compression = kNoCompression;
3180 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3181 table_options.block_size = 1024;
3182 // big enough so we don't ever lose cached values.
3183 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3184 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3185
3186 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3187 c.Add("k01", "hello");
3188 c.Add("k02", "hello2");
3189 c.Add("k03", std::string(10000, 'x'));
3190 c.Add("k04", std::string(200000, 'x'));
3191 c.Add("k05", std::string(300000, 'x'));
3192 c.Add("k06", "hello3");
3193 c.Add("k07", std::string(100000, 'x'));
3194 std::vector<std::string> keys;
3195 stl_wrappers::KVMap kvmap;
3196 const ImmutableOptions ioptions(opt);
3197 const MutableCFOptions moptions(opt);
3198 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
3199
3200 std::unique_ptr<InternalIterator> iter(
3201 c.NewIterator(moptions.prefix_extractor.get()));
3202 iter->SeekToFirst();
3203 while (iter->Valid()) {
3204 iter->key();
3205 iter->value();
3206 iter->Next();
3207 }
3208 ASSERT_OK(iter->status());
3209 iter.reset();
3210
3211 const ImmutableOptions ioptions1(opt);
3212 const MutableCFOptions moptions1(opt);
3213 ASSERT_OK(c.Reopen(ioptions1, moptions1));
3214 auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
3215 for (const std::string& key : keys) {
3216 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
3217 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
3218 }
3219 c.ResetTableReader();
3220
3221 // rerun with different block cache
3222 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3223 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3224 const ImmutableOptions ioptions2(opt);
3225 const MutableCFOptions moptions2(opt);
3226 ASSERT_OK(c.Reopen(ioptions2, moptions2));
3227 table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
3228 for (const std::string& key : keys) {
3229 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
3230 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
3231 }
3232 c.ResetTableReader();
3233 }
3234
3235 namespace {
3236 class CustomMemoryAllocator : public MemoryAllocator {
3237 public:
Name() const3238 const char* Name() const override { return "CustomMemoryAllocator"; }
3239
Allocate(size_t size)3240 void* Allocate(size_t size) override {
3241 ++numAllocations;
3242 auto ptr = new char[size + 16];
3243 memcpy(ptr, "memory_allocator_", 16); // mangle first 16 bytes
3244 return reinterpret_cast<void*>(ptr + 16);
3245 }
Deallocate(void * p)3246 void Deallocate(void* p) override {
3247 ++numDeallocations;
3248 char* ptr = reinterpret_cast<char*>(p) - 16;
3249 delete[] ptr;
3250 }
3251
3252 std::atomic<int> numAllocations;
3253 std::atomic<int> numDeallocations;
3254 };
3255 } // namespace
3256
TEST_P(BlockBasedTableTest,MemoryAllocator)3257 TEST_P(BlockBasedTableTest, MemoryAllocator) {
3258 auto custom_memory_allocator = std::make_shared<CustomMemoryAllocator>();
3259 {
3260 Options opt;
3261 std::unique_ptr<InternalKeyComparator> ikc;
3262 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
3263 opt.compression = kNoCompression;
3264 BlockBasedTableOptions table_options;
3265 table_options.block_size = 1024;
3266 LRUCacheOptions lruOptions;
3267 lruOptions.memory_allocator = custom_memory_allocator;
3268 lruOptions.capacity = 16 * 1024 * 1024;
3269 lruOptions.num_shard_bits = 4;
3270 table_options.block_cache = NewLRUCache(std::move(lruOptions));
3271 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3272
3273 TableConstructor c(BytewiseComparator(),
3274 true /* convert_to_internal_key_ */);
3275 c.Add("k01", "hello");
3276 c.Add("k02", "hello2");
3277 c.Add("k03", std::string(10000, 'x'));
3278 c.Add("k04", std::string(200000, 'x'));
3279 c.Add("k05", std::string(300000, 'x'));
3280 c.Add("k06", "hello3");
3281 c.Add("k07", std::string(100000, 'x'));
3282 std::vector<std::string> keys;
3283 stl_wrappers::KVMap kvmap;
3284 const ImmutableOptions ioptions(opt);
3285 const MutableCFOptions moptions(opt);
3286 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
3287
3288 std::unique_ptr<InternalIterator> iter(
3289 c.NewIterator(moptions.prefix_extractor.get()));
3290 iter->SeekToFirst();
3291 while (iter->Valid()) {
3292 iter->key();
3293 iter->value();
3294 iter->Next();
3295 }
3296 ASSERT_OK(iter->status());
3297 }
3298
3299 // out of scope, block cache should have been deleted, all allocations
3300 // deallocated
3301 EXPECT_EQ(custom_memory_allocator->numAllocations.load(),
3302 custom_memory_allocator->numDeallocations.load());
3303 // make sure that allocations actually happened through the cache allocator
3304 EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0);
3305 }
3306
3307 // Test the file checksum of block based table
TEST_P(BlockBasedTableTest,NoFileChecksum)3308 TEST_P(BlockBasedTableTest, NoFileChecksum) {
3309 Options options;
3310 ImmutableOptions ioptions(options);
3311 MutableCFOptions moptions(options);
3312 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3313 std::unique_ptr<InternalKeyComparator> comparator(
3314 new InternalKeyComparator(BytewiseComparator()));
3315 int level = 0;
3316 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3317 std::string column_family_name;
3318
3319 FileChecksumTestHelper f(true);
3320 f.CreateWriteableFile();
3321 std::unique_ptr<TableBuilder> builder;
3322 builder.reset(ioptions.table_factory->NewTableBuilder(
3323 TableBuilderOptions(ioptions, moptions, *comparator,
3324 &int_tbl_prop_collector_factories,
3325 options.compression, options.compression_opts,
3326 kUnknownColumnFamily, column_family_name, level),
3327 f.GetFileWriter()));
3328 ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3329 f.AddKVtoKVMap(1000);
3330 ASSERT_OK(f.WriteKVAndFlushTable());
3331 ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName);
3332 ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum);
3333 }
3334
TEST_P(BlockBasedTableTest,Crc32cFileChecksum)3335 TEST_P(BlockBasedTableTest, Crc32cFileChecksum) {
3336 FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
3337 new FileChecksumGenCrc32cFactory();
3338 Options options;
3339 options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3340 ImmutableOptions ioptions(options);
3341 MutableCFOptions moptions(options);
3342 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3343 std::unique_ptr<InternalKeyComparator> comparator(
3344 new InternalKeyComparator(BytewiseComparator()));
3345 int level = 0;
3346 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3347 std::string column_family_name;
3348
3349 FileChecksumGenContext gen_context;
3350 gen_context.file_name = "db/tmp";
3351 std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
3352 options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3353 gen_context);
3354 FileChecksumTestHelper f(true);
3355 f.CreateWriteableFile();
3356 f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
3357 std::unique_ptr<TableBuilder> builder;
3358 builder.reset(ioptions.table_factory->NewTableBuilder(
3359 TableBuilderOptions(ioptions, moptions, *comparator,
3360 &int_tbl_prop_collector_factories,
3361 options.compression, options.compression_opts,
3362 kUnknownColumnFamily, column_family_name, level),
3363 f.GetFileWriter()));
3364 ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3365 f.AddKVtoKVMap(1000);
3366 ASSERT_OK(f.WriteKVAndFlushTable());
3367 ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3368
3369 std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
3370 options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3371 gen_context);
3372 std::string checksum;
3373 ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
3374 ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3375
3376 // Unit test the generator itself for schema stability
3377 std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen3 =
3378 options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3379 gen_context);
3380 const char data[] = "here is some data";
3381 checksum_crc32c_gen3->Update(data, sizeof(data));
3382 checksum_crc32c_gen3->Finalize();
3383 checksum = checksum_crc32c_gen3->GetChecksum();
3384 ASSERT_STREQ(checksum.c_str(), "\345\245\277\110");
3385 }
3386
3387 // Plain table is not supported in ROCKSDB_LITE
3388 #ifndef ROCKSDB_LITE
TEST_F(PlainTableTest,BasicPlainTableProperties)3389 TEST_F(PlainTableTest, BasicPlainTableProperties) {
3390 PlainTableOptions plain_table_options;
3391 plain_table_options.user_key_len = 8;
3392 plain_table_options.bloom_bits_per_key = 8;
3393 plain_table_options.hash_table_ratio = 0;
3394
3395 PlainTableFactory factory(plain_table_options);
3396 std::unique_ptr<FSWritableFile> sink(new test::StringSink());
3397 std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
3398 std::move(sink), "" /* don't care */, FileOptions()));
3399 Options options;
3400 const ImmutableOptions ioptions(options);
3401 const MutableCFOptions moptions(options);
3402 InternalKeyComparator ikc(options.comparator);
3403 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3404 std::string column_family_name;
3405 int unknown_level = -1;
3406 std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3407 TableBuilderOptions(ioptions, moptions, ikc,
3408 &int_tbl_prop_collector_factories, kNoCompression,
3409 CompressionOptions(), kUnknownColumnFamily,
3410 column_family_name, unknown_level),
3411 file_writer.get()));
3412
3413 for (char c = 'a'; c <= 'z'; ++c) {
3414 std::string key(8, c);
3415 key.append("\1 "); // PlainTable expects internal key structure
3416 std::string value(28, c + 42);
3417 builder->Add(key, value);
3418 }
3419 ASSERT_OK(builder->Finish());
3420 ASSERT_OK(file_writer->Flush());
3421
3422 test::StringSink* ss =
3423 static_cast<test::StringSink*>(file_writer->writable_file());
3424 std::unique_ptr<FSRandomAccessFile> source(
3425 new test::StringSource(ss->contents(), 72242, true));
3426 std::unique_ptr<RandomAccessFileReader> file_reader(
3427 new RandomAccessFileReader(std::move(source), "test"));
3428
3429 TableProperties* props = nullptr;
3430 auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
3431 kPlainTableMagicNumber, ioptions,
3432 &props, true /* compression_type_missing */);
3433 std::unique_ptr<TableProperties> props_guard(props);
3434 ASSERT_OK(s);
3435
3436 ASSERT_EQ(0ul, props->index_size);
3437 ASSERT_EQ(0ul, props->filter_size);
3438 ASSERT_EQ(16ul * 26, props->raw_key_size);
3439 ASSERT_EQ(28ul * 26, props->raw_value_size);
3440 ASSERT_EQ(26ul, props->num_entries);
3441 ASSERT_EQ(1ul, props->num_data_blocks);
3442 }
3443
TEST_F(PlainTableTest,NoFileChecksum)3444 TEST_F(PlainTableTest, NoFileChecksum) {
3445 PlainTableOptions plain_table_options;
3446 plain_table_options.user_key_len = 20;
3447 plain_table_options.bloom_bits_per_key = 8;
3448 plain_table_options.hash_table_ratio = 0;
3449 PlainTableFactory factory(plain_table_options);
3450
3451 Options options;
3452 const ImmutableOptions ioptions(options);
3453 const MutableCFOptions moptions(options);
3454 InternalKeyComparator ikc(options.comparator);
3455 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3456 std::string column_family_name;
3457 int unknown_level = -1;
3458 FileChecksumTestHelper f(true);
3459 f.CreateWriteableFile();
3460
3461 std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3462 TableBuilderOptions(ioptions, moptions, ikc,
3463 &int_tbl_prop_collector_factories, kNoCompression,
3464 CompressionOptions(), kUnknownColumnFamily,
3465 column_family_name, unknown_level),
3466 f.GetFileWriter()));
3467 ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3468 f.AddKVtoKVMap(1000);
3469 ASSERT_OK(f.WriteKVAndFlushTable());
3470 ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName);
3471 EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum);
3472 }
3473
TEST_F(PlainTableTest,Crc32cFileChecksum)3474 TEST_F(PlainTableTest, Crc32cFileChecksum) {
3475 PlainTableOptions plain_table_options;
3476 plain_table_options.user_key_len = 20;
3477 plain_table_options.bloom_bits_per_key = 8;
3478 plain_table_options.hash_table_ratio = 0;
3479 PlainTableFactory factory(plain_table_options);
3480
3481 FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
3482 new FileChecksumGenCrc32cFactory();
3483 Options options;
3484 options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3485 const ImmutableOptions ioptions(options);
3486 const MutableCFOptions moptions(options);
3487 InternalKeyComparator ikc(options.comparator);
3488 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
3489 std::string column_family_name;
3490 int unknown_level = -1;
3491
3492 FileChecksumGenContext gen_context;
3493 gen_context.file_name = "db/tmp";
3494 std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
3495 options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3496 gen_context);
3497 FileChecksumTestHelper f(true);
3498 f.CreateWriteableFile();
3499 f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
3500
3501 std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3502 TableBuilderOptions(ioptions, moptions, ikc,
3503 &int_tbl_prop_collector_factories, kNoCompression,
3504 CompressionOptions(), kUnknownColumnFamily,
3505 column_family_name, unknown_level),
3506 f.GetFileWriter()));
3507 ASSERT_OK(f.ResetTableBuilder(std::move(builder)));
3508 f.AddKVtoKVMap(1000);
3509 ASSERT_OK(f.WriteKVAndFlushTable());
3510 ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3511
3512 std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
3513 options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3514 gen_context);
3515 std::string checksum;
3516 ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
3517 EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3518 }
3519
3520 #endif // !ROCKSDB_LITE
3521
TEST_F(GeneralTableTest,ApproximateOffsetOfPlain)3522 TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
3523 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3524 c.Add("k01", "hello");
3525 c.Add("k02", "hello2");
3526 c.Add("k03", std::string(10000, 'x'));
3527 c.Add("k04", std::string(200000, 'x'));
3528 c.Add("k05", std::string(300000, 'x'));
3529 c.Add("k06", "hello3");
3530 c.Add("k07", std::string(100000, 'x'));
3531 std::vector<std::string> keys;
3532 stl_wrappers::KVMap kvmap;
3533 Options options;
3534 options.db_host_id = "";
3535 test::PlainInternalKeyComparator internal_comparator(options.comparator);
3536 options.compression = kNoCompression;
3537 BlockBasedTableOptions table_options;
3538 table_options.block_size = 1024;
3539 const ImmutableOptions ioptions(options);
3540 const MutableCFOptions moptions(options);
3541 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
3542 &keys, &kvmap);
3543
3544 ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
3545 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
3546 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0));
3547 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
3548 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0));
3549 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000));
3550 // k04 and k05 will be in two consecutive blocks, the index is
3551 // an arbitrary slice between k04 and k05, either before or after k04a
3552 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
3553 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
3554 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
3555 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
3556 ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
3557 c.ResetTableReader();
3558 }
3559
DoCompressionTest(CompressionType comp)3560 static void DoCompressionTest(CompressionType comp) {
3561 Random rnd(301);
3562 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3563 std::string tmp;
3564 c.Add("k01", "hello");
3565 c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
3566 c.Add("k03", "hello3");
3567 c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
3568 std::vector<std::string> keys;
3569 stl_wrappers::KVMap kvmap;
3570 Options options;
3571 test::PlainInternalKeyComparator ikc(options.comparator);
3572 options.compression = comp;
3573 BlockBasedTableOptions table_options;
3574 table_options.block_size = 1024;
3575 const ImmutableOptions ioptions(options);
3576 const MutableCFOptions moptions(options);
3577 c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
3578
3579 ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
3580 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
3581 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
3582 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3500));
3583 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3500));
3584 ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7000));
3585 c.ResetTableReader();
3586 }
3587
TEST_F(GeneralTableTest,ApproximateOffsetOfCompressed)3588 TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
3589 std::vector<CompressionType> compression_state;
3590 if (!Snappy_Supported()) {
3591 fprintf(stderr, "skipping snappy compression tests\n");
3592 } else {
3593 compression_state.push_back(kSnappyCompression);
3594 }
3595
3596 if (!Zlib_Supported()) {
3597 fprintf(stderr, "skipping zlib compression tests\n");
3598 } else {
3599 compression_state.push_back(kZlibCompression);
3600 }
3601
3602 // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
3603 /*
3604 if (!BZip2_Supported()) {
3605 fprintf(stderr, "skipping bzip2 compression tests\n");
3606 } else {
3607 compression_state.push_back(kBZip2Compression);
3608 }
3609 */
3610
3611 if (!LZ4_Supported()) {
3612 fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
3613 } else {
3614 compression_state.push_back(kLZ4Compression);
3615 compression_state.push_back(kLZ4HCCompression);
3616 }
3617
3618 if (!XPRESS_Supported()) {
3619 fprintf(stderr, "skipping xpress and xpress compression tests\n");
3620 }
3621 else {
3622 compression_state.push_back(kXpressCompression);
3623 }
3624
3625 for (auto state : compression_state) {
3626 DoCompressionTest(state);
3627 }
3628 }
3629
3630 #ifndef ROCKSDB_VALGRIND_RUN
TEST_P(ParameterizedHarnessTest,RandomizedHarnessTest)3631 TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) {
3632 Random rnd(test::RandomSeed() + 5);
3633 for (int num_entries = 0; num_entries < 2000;
3634 num_entries += (num_entries < 50 ? 1 : 200)) {
3635 for (int e = 0; e < num_entries; e++) {
3636 Add(test::RandomKey(&rnd, rnd.Skewed(4)),
3637 rnd.RandomString(rnd.Skewed(5)));
3638 }
3639 Test(&rnd);
3640 }
3641 }
3642
3643 #ifndef ROCKSDB_LITE
TEST_F(DBHarnessTest,RandomizedLongDB)3644 TEST_F(DBHarnessTest, RandomizedLongDB) {
3645 Random rnd(test::RandomSeed());
3646 int num_entries = 100000;
3647 for (int e = 0; e < num_entries; e++) {
3648 std::string v;
3649 Add(test::RandomKey(&rnd, rnd.Skewed(4)), rnd.RandomString(rnd.Skewed(5)));
3650 }
3651 Test(&rnd);
3652
3653 // We must have created enough data to force merging
3654 int files = 0;
3655 for (int level = 0; level < db()->NumberLevels(); level++) {
3656 std::string value;
3657 char name[100];
3658 snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
3659 ASSERT_TRUE(db()->GetProperty(name, &value));
3660 files += atoi(value.c_str());
3661 }
3662 ASSERT_GT(files, 0);
3663 }
3664 #endif // ROCKSDB_LITE
3665 #endif // ROCKSDB_VALGRIND_RUN
3666
3667 class MemTableTest : public testing::Test {
3668 public:
MemTableTest()3669 MemTableTest() {
3670 InternalKeyComparator cmp(BytewiseComparator());
3671 auto table_factory = std::make_shared<SkipListFactory>();
3672 options_.memtable_factory = table_factory;
3673 ImmutableOptions ioptions(options_);
3674 wb_ = new WriteBufferManager(options_.db_write_buffer_size);
3675 memtable_ = new MemTable(cmp, ioptions, MutableCFOptions(options_), wb_,
3676 kMaxSequenceNumber, 0 /* column_family_id */);
3677 memtable_->Ref();
3678 }
3679
~MemTableTest()3680 ~MemTableTest() {
3681 delete memtable_->Unref();
3682 delete wb_;
3683 }
3684
GetMemTable()3685 MemTable* GetMemTable() { return memtable_; }
3686
3687 private:
3688 MemTable* memtable_;
3689 Options options_;
3690 WriteBufferManager* wb_;
3691 };
3692
TEST_F(MemTableTest,Simple)3693 TEST_F(MemTableTest, Simple) {
3694 WriteBatch batch;
3695 WriteBatchInternal::SetSequence(&batch, 100);
3696 ASSERT_OK(batch.Put(std::string("k1"), std::string("v1")));
3697 ASSERT_OK(batch.Put(std::string("k2"), std::string("v2")));
3698 ASSERT_OK(batch.Put(std::string("k3"), std::string("v3")));
3699 ASSERT_OK(batch.Put(std::string("largekey"), std::string("vlarge")));
3700 ASSERT_OK(batch.DeleteRange(std::string("chi"), std::string("xigua")));
3701 ASSERT_OK(batch.DeleteRange(std::string("begin"), std::string("end")));
3702 ColumnFamilyMemTablesDefault cf_mems_default(GetMemTable());
3703 ASSERT_TRUE(
3704 WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr)
3705 .ok());
3706
3707 for (int i = 0; i < 2; ++i) {
3708 Arena arena;
3709 ScopedArenaIterator arena_iter_guard;
3710 std::unique_ptr<InternalIterator> iter_guard;
3711 InternalIterator* iter;
3712 if (i == 0) {
3713 iter = GetMemTable()->NewIterator(ReadOptions(), &arena);
3714 arena_iter_guard.set(iter);
3715 } else {
3716 iter = GetMemTable()->NewRangeTombstoneIterator(
3717 ReadOptions(), kMaxSequenceNumber /* read_seq */);
3718 iter_guard.reset(iter);
3719 }
3720 if (iter == nullptr) {
3721 continue;
3722 }
3723 iter->SeekToFirst();
3724 while (iter->Valid()) {
3725 fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
3726 iter->value().ToString().c_str());
3727 iter->Next();
3728 }
3729 }
3730 }
3731
3732 // Test the empty key
TEST_P(ParameterizedHarnessTest,SimpleEmptyKey)3733 TEST_P(ParameterizedHarnessTest, SimpleEmptyKey) {
3734 Random rnd(test::RandomSeed() + 1);
3735 Add("", "v");
3736 Test(&rnd);
3737 }
3738
TEST_P(ParameterizedHarnessTest,SimpleSingle)3739 TEST_P(ParameterizedHarnessTest, SimpleSingle) {
3740 Random rnd(test::RandomSeed() + 2);
3741 Add("abc", "v");
3742 Test(&rnd);
3743 }
3744
TEST_P(ParameterizedHarnessTest,SimpleMulti)3745 TEST_P(ParameterizedHarnessTest, SimpleMulti) {
3746 Random rnd(test::RandomSeed() + 3);
3747 Add("abc", "v");
3748 Add("abcd", "v");
3749 Add("ac", "v2");
3750 Test(&rnd);
3751 }
3752
TEST_P(ParameterizedHarnessTest,SimpleSpecialKey)3753 TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) {
3754 Random rnd(test::RandomSeed() + 4);
3755 Add("\xff\xff", "v3");
3756 Test(&rnd);
3757 }
3758
TEST(TableTest,FooterTests)3759 TEST(TableTest, FooterTests) {
3760 {
3761 // upconvert legacy block based
3762 std::string encoded;
3763 Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
3764 BlockHandle meta_index(10, 5), index(20, 15);
3765 footer.set_metaindex_handle(meta_index);
3766 footer.set_index_handle(index);
3767 footer.EncodeTo(&encoded);
3768 Footer decoded_footer;
3769 Slice encoded_slice(encoded);
3770 ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3771 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3772 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3773 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3774 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3775 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3776 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3777 ASSERT_EQ(decoded_footer.version(), 0U);
3778 }
3779 {
3780 // xxhash block based
3781 std::string encoded;
3782 Footer footer(kBlockBasedTableMagicNumber, 1);
3783 BlockHandle meta_index(10, 5), index(20, 15);
3784 footer.set_metaindex_handle(meta_index);
3785 footer.set_index_handle(index);
3786 footer.set_checksum(kxxHash);
3787 footer.EncodeTo(&encoded);
3788 Footer decoded_footer;
3789 Slice encoded_slice(encoded);
3790 ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3791 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3792 ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3793 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3794 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3795 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3796 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3797 ASSERT_EQ(decoded_footer.version(), 1U);
3798 }
3799 {
3800 // xxhash64 block based
3801 std::string encoded;
3802 Footer footer(kBlockBasedTableMagicNumber, 1);
3803 BlockHandle meta_index(10, 5), index(20, 15);
3804 footer.set_metaindex_handle(meta_index);
3805 footer.set_index_handle(index);
3806 footer.set_checksum(kxxHash64);
3807 footer.EncodeTo(&encoded);
3808 Footer decoded_footer;
3809 Slice encoded_slice(encoded);
3810 ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3811 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3812 ASSERT_EQ(decoded_footer.checksum(), kxxHash64);
3813 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3814 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3815 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3816 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3817 ASSERT_EQ(decoded_footer.version(), 1U);
3818 }
3819 // Plain table is not supported in ROCKSDB_LITE
3820 #ifndef ROCKSDB_LITE
3821 {
3822 // upconvert legacy plain table
3823 std::string encoded;
3824 Footer footer(kLegacyPlainTableMagicNumber, 0);
3825 BlockHandle meta_index(10, 5), index(20, 15);
3826 footer.set_metaindex_handle(meta_index);
3827 footer.set_index_handle(index);
3828 footer.EncodeTo(&encoded);
3829 Footer decoded_footer;
3830 Slice encoded_slice(encoded);
3831 ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3832 ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3833 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3834 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3835 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3836 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3837 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3838 ASSERT_EQ(decoded_footer.version(), 0U);
3839 }
3840 {
3841 // xxhash block based
3842 std::string encoded;
3843 Footer footer(kPlainTableMagicNumber, 1);
3844 BlockHandle meta_index(10, 5), index(20, 15);
3845 footer.set_metaindex_handle(meta_index);
3846 footer.set_index_handle(index);
3847 footer.set_checksum(kxxHash);
3848 footer.EncodeTo(&encoded);
3849 Footer decoded_footer;
3850 Slice encoded_slice(encoded);
3851 ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3852 ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3853 ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3854 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3855 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3856 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3857 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3858 ASSERT_EQ(decoded_footer.version(), 1U);
3859 }
3860 #endif // !ROCKSDB_LITE
3861 {
3862 // version == 2
3863 std::string encoded;
3864 Footer footer(kBlockBasedTableMagicNumber, 2);
3865 BlockHandle meta_index(10, 5), index(20, 15);
3866 footer.set_metaindex_handle(meta_index);
3867 footer.set_index_handle(index);
3868 footer.EncodeTo(&encoded);
3869 Footer decoded_footer;
3870 Slice encoded_slice(encoded);
3871 ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
3872 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3873 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3874 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3875 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3876 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3877 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3878 ASSERT_EQ(decoded_footer.version(), 2U);
3879 }
3880 }
3881
3882 class IndexBlockRestartIntervalTest
3883 : public TableTest,
3884 public ::testing::WithParamInterface<std::pair<int, bool>> {
3885 public:
GetRestartValues()3886 static std::vector<std::pair<int, bool>> GetRestartValues() {
3887 return {{-1, false}, {0, false}, {1, false}, {8, false},
3888 {16, false}, {32, false}, {-1, true}, {0, true},
3889 {1, true}, {8, true}, {16, true}, {32, true}};
3890 }
3891 };
3892
3893 INSTANTIATE_TEST_CASE_P(
3894 IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
3895 ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
3896
TEST_P(IndexBlockRestartIntervalTest,IndexBlockRestartInterval)3897 TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
3898 const int kKeysInTable = 10000;
3899 const int kKeySize = 100;
3900 const int kValSize = 500;
3901
3902 const int index_block_restart_interval = std::get<0>(GetParam());
3903 const bool value_delta_encoding = std::get<1>(GetParam());
3904
3905 Options options;
3906 BlockBasedTableOptions table_options;
3907 table_options.block_size = 64; // small block size to get big index block
3908 table_options.index_block_restart_interval = index_block_restart_interval;
3909 if (value_delta_encoding) {
3910 table_options.format_version = 4;
3911 } else {
3912 table_options.format_version = 3;
3913 }
3914 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3915
3916 TableConstructor c(BytewiseComparator());
3917 static Random rnd(301);
3918 for (int i = 0; i < kKeysInTable; i++) {
3919 InternalKey k(rnd.RandomString(kKeySize), 0, kTypeValue);
3920 c.Add(k.Encode().ToString(), rnd.RandomString(kValSize));
3921 }
3922
3923 std::vector<std::string> keys;
3924 stl_wrappers::KVMap kvmap;
3925 std::unique_ptr<InternalKeyComparator> comparator(
3926 new InternalKeyComparator(BytewiseComparator()));
3927 const ImmutableOptions ioptions(options);
3928 const MutableCFOptions moptions(options);
3929 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
3930 &kvmap);
3931 auto reader = c.GetTableReader();
3932
3933 ReadOptions read_options;
3934 std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
3935 read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
3936 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
3937
3938 // Test point lookup
3939 for (auto& kv : kvmap) {
3940 db_iter->Seek(kv.first);
3941
3942 ASSERT_TRUE(db_iter->Valid());
3943 ASSERT_OK(db_iter->status());
3944 ASSERT_EQ(db_iter->key(), kv.first);
3945 ASSERT_EQ(db_iter->value(), kv.second);
3946 }
3947
3948 // Test iterating
3949 auto kv_iter = kvmap.begin();
3950 for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3951 ASSERT_EQ(db_iter->key(), kv_iter->first);
3952 ASSERT_EQ(db_iter->value(), kv_iter->second);
3953 kv_iter++;
3954 }
3955 ASSERT_EQ(kv_iter, kvmap.end());
3956 c.ResetTableReader();
3957 }
3958
3959 class PrefixTest : public testing::Test {
3960 public:
PrefixTest()3961 PrefixTest() : testing::Test() {}
~PrefixTest()3962 ~PrefixTest() override {}
3963 };
3964
3965 namespace {
3966 // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
3967 class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform {
3968 public:
~TestPrefixExtractor()3969 ~TestPrefixExtractor() override{};
Name() const3970 const char* Name() const override { return "TestPrefixExtractor"; }
3971
Transform(const ROCKSDB_NAMESPACE::Slice & src) const3972 ROCKSDB_NAMESPACE::Slice Transform(
3973 const ROCKSDB_NAMESPACE::Slice& src) const override {
3974 assert(IsValid(src));
3975 return ROCKSDB_NAMESPACE::Slice(src.data(), 3);
3976 }
3977
InDomain(const ROCKSDB_NAMESPACE::Slice & src) const3978 bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override {
3979 return IsValid(src);
3980 }
3981
InRange(const ROCKSDB_NAMESPACE::Slice &) const3982 bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override {
3983 return true;
3984 }
3985
IsValid(const ROCKSDB_NAMESPACE::Slice & src) const3986 bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const {
3987 if (src.size() != 4) {
3988 return false;
3989 }
3990 if (src[0] != '[') {
3991 return false;
3992 }
3993 if (src[1] < '0' || src[1] > '9') {
3994 return false;
3995 }
3996 if (src[2] != ']') {
3997 return false;
3998 }
3999 if (src[3] < '0' || src[3] > '9') {
4000 return false;
4001 }
4002 return true;
4003 }
4004 };
4005 } // namespace
4006
TEST_F(PrefixTest,PrefixAndWholeKeyTest)4007 TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
4008 ROCKSDB_NAMESPACE::Options options;
4009 options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal;
4010 options.num_levels = 20;
4011 options.create_if_missing = true;
4012 options.optimize_filters_for_hits = false;
4013 options.target_file_size_base = 268435456;
4014 options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
4015 ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto;
4016 bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
4017 bbto.block_size = 262144;
4018 bbto.whole_key_filtering = true;
4019
4020 const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
4021 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4022 ASSERT_OK(DestroyDB(kDBPath, options));
4023 ROCKSDB_NAMESPACE::DB* db;
4024 ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4025
4026 // Create a bunch of keys with 10 filters.
4027 for (int i = 0; i < 10; i++) {
4028 std::string prefix = "[" + std::to_string(i) + "]";
4029 for (int j = 0; j < 10; j++) {
4030 std::string key = prefix + std::to_string(j);
4031 ASSERT_OK(db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1"));
4032 }
4033 }
4034
4035 // Trigger compaction.
4036 ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4037 delete db;
4038 // In the second round, turn whole_key_filtering off and expect
4039 // rocksdb still works.
4040 }
4041
4042 /*
4043 * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
4044 * the SST file any more. Instead, RocksDB deduces global_seqno from the
4045 * MANIFEST while reading from an SST. Therefore, it's not possible to test the
4046 * functionality of global_seqno in a single, isolated unit test without the
4047 * involvement of Version, VersionSet, etc.
4048 */
TEST_P(BlockBasedTableTest,DISABLED_TableWithGlobalSeqno)4049 TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
4050 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4051 test::StringSink* sink = new test::StringSink();
4052 std::unique_ptr<FSWritableFile> holder(sink);
4053 std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
4054 std::move(holder), "" /* don't care */, FileOptions()));
4055 Options options;
4056 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4057 const ImmutableOptions ioptions(options);
4058 const MutableCFOptions moptions(options);
4059 InternalKeyComparator ikc(options.comparator);
4060 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
4061 int_tbl_prop_collector_factories.emplace_back(
4062 new SstFileWriterPropertiesCollectorFactory(2 /* version */,
4063 0 /* global_seqno*/));
4064 std::string column_family_name;
4065 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4066 TableBuilderOptions(ioptions, moptions, ikc,
4067 &int_tbl_prop_collector_factories, kNoCompression,
4068 CompressionOptions(), kUnknownColumnFamily,
4069 column_family_name, -1),
4070 file_writer.get()));
4071
4072 for (char c = 'a'; c <= 'z'; ++c) {
4073 std::string key(8, c);
4074 std::string value = key;
4075 InternalKey ik(key, 0, kTypeValue);
4076
4077 builder->Add(ik.Encode(), value);
4078 }
4079 ASSERT_OK(builder->Finish());
4080 ASSERT_OK(file_writer->Flush());
4081
4082 test::RandomRWStringSink ss_rw(sink);
4083 uint32_t version;
4084 uint64_t global_seqno;
4085 uint64_t global_seqno_offset;
4086
4087 // Helper function to get version, global_seqno, global_seqno_offset
4088 std::function<void()> GetVersionAndGlobalSeqno = [&]() {
4089 std::unique_ptr<FSRandomAccessFile> source(
4090 new test::StringSource(ss_rw.contents(), 73342, true));
4091 std::unique_ptr<RandomAccessFileReader> file_reader(
4092 new RandomAccessFileReader(std::move(source), ""));
4093
4094 TableProperties* props = nullptr;
4095 ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
4096 kBlockBasedTableMagicNumber, ioptions,
4097 &props, true /* compression_type_missing */));
4098
4099 UserCollectedProperties user_props = props->user_collected_properties;
4100 version = DecodeFixed32(
4101 user_props[ExternalSstFilePropertyNames::kVersion].c_str());
4102 global_seqno = DecodeFixed64(
4103 user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
4104 global_seqno_offset =
4105 props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];
4106
4107 delete props;
4108 };
4109
4110 // Helper function to update the value of the global seqno in the file
4111 std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
4112 std::string new_global_seqno;
4113 PutFixed64(&new_global_seqno, val);
4114
4115 ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno, IOOptions(),
4116 nullptr));
4117 };
4118
4119 // Helper function to get the contents of the table InternalIterator
4120 std::unique_ptr<TableReader> table_reader;
4121 const ReadOptions read_options;
4122 std::function<InternalIterator*()> GetTableInternalIter = [&]() {
4123 std::unique_ptr<FSRandomAccessFile> source(
4124 new test::StringSource(ss_rw.contents(), 73342, true));
4125 std::unique_ptr<RandomAccessFileReader> file_reader(
4126 new RandomAccessFileReader(std::move(source), ""));
4127
4128 options.table_factory->NewTableReader(
4129 TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
4130 EnvOptions(), ikc),
4131 std::move(file_reader), ss_rw.contents().size(), &table_reader);
4132
4133 return table_reader->NewIterator(
4134 read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
4135 /*skip_filters=*/false, TableReaderCaller::kUncategorized);
4136 };
4137
4138 GetVersionAndGlobalSeqno();
4139 ASSERT_EQ(2u, version);
4140 ASSERT_EQ(0u, global_seqno);
4141
4142 InternalIterator* iter = GetTableInternalIter();
4143 char current_c = 'a';
4144 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4145 ParsedInternalKey pik;
4146 ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4147
4148 ASSERT_EQ(pik.type, ValueType::kTypeValue);
4149 ASSERT_EQ(pik.sequence, 0);
4150 ASSERT_EQ(pik.user_key, iter->value());
4151 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4152 current_c++;
4153 }
4154 ASSERT_EQ(current_c, 'z' + 1);
4155 delete iter;
4156
4157 // Update global sequence number to 10
4158 SetGlobalSeqno(10);
4159 GetVersionAndGlobalSeqno();
4160 ASSERT_EQ(2u, version);
4161 ASSERT_EQ(10u, global_seqno);
4162
4163 iter = GetTableInternalIter();
4164 current_c = 'a';
4165 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4166 ParsedInternalKey pik;
4167 ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4168
4169 ASSERT_EQ(pik.type, ValueType::kTypeValue);
4170 ASSERT_EQ(pik.sequence, 10);
4171 ASSERT_EQ(pik.user_key, iter->value());
4172 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4173 current_c++;
4174 }
4175 ASSERT_EQ(current_c, 'z' + 1);
4176
4177 // Verify Seek
4178 for (char c = 'a'; c <= 'z'; c++) {
4179 std::string k = std::string(8, c);
4180 InternalKey ik(k, 10, kValueTypeForSeek);
4181 iter->Seek(ik.Encode());
4182 ASSERT_TRUE(iter->Valid());
4183
4184 ParsedInternalKey pik;
4185 ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4186
4187 ASSERT_EQ(pik.type, ValueType::kTypeValue);
4188 ASSERT_EQ(pik.sequence, 10);
4189 ASSERT_EQ(pik.user_key.ToString(), k);
4190 ASSERT_EQ(iter->value().ToString(), k);
4191 }
4192 delete iter;
4193
4194 // Update global sequence number to 3
4195 SetGlobalSeqno(3);
4196 GetVersionAndGlobalSeqno();
4197 ASSERT_EQ(2u, version);
4198 ASSERT_EQ(3u, global_seqno);
4199
4200 iter = GetTableInternalIter();
4201 current_c = 'a';
4202 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4203 ParsedInternalKey pik;
4204 ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4205
4206 ASSERT_EQ(pik.type, ValueType::kTypeValue);
4207 ASSERT_EQ(pik.sequence, 3);
4208 ASSERT_EQ(pik.user_key, iter->value());
4209 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4210 current_c++;
4211 }
4212 ASSERT_EQ(current_c, 'z' + 1);
4213
4214 // Verify Seek
4215 for (char c = 'a'; c <= 'z'; c++) {
4216 std::string k = std::string(8, c);
4217 // seqno=4 is less than 3 so we still should get our key
4218 InternalKey ik(k, 4, kValueTypeForSeek);
4219 iter->Seek(ik.Encode());
4220 ASSERT_TRUE(iter->Valid());
4221
4222 ParsedInternalKey pik;
4223 ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */));
4224
4225 ASSERT_EQ(pik.type, ValueType::kTypeValue);
4226 ASSERT_EQ(pik.sequence, 3);
4227 ASSERT_EQ(pik.user_key.ToString(), k);
4228 ASSERT_EQ(iter->value().ToString(), k);
4229 }
4230
4231 delete iter;
4232 }
4233
TEST_P(BlockBasedTableTest,BlockAlignTest)4234 TEST_P(BlockBasedTableTest, BlockAlignTest) {
4235 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4236 bbto.block_align = true;
4237 test::StringSink* sink = new test::StringSink();
4238 std::unique_ptr<FSWritableFile> holder(sink);
4239 std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
4240 std::move(holder), "" /* don't care */, FileOptions()));
4241 Options options;
4242 options.compression = kNoCompression;
4243 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4244 const ImmutableOptions ioptions(options);
4245 const MutableCFOptions moptions(options);
4246 InternalKeyComparator ikc(options.comparator);
4247 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
4248 std::string column_family_name;
4249 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4250 TableBuilderOptions(ioptions, moptions, ikc,
4251 &int_tbl_prop_collector_factories, kNoCompression,
4252 CompressionOptions(), kUnknownColumnFamily,
4253 column_family_name, -1),
4254 file_writer.get()));
4255
4256 for (int i = 1; i <= 10000; ++i) {
4257 std::ostringstream ostr;
4258 ostr << std::setfill('0') << std::setw(5) << i;
4259 std::string key = ostr.str();
4260 std::string value = "val";
4261 InternalKey ik(key, 0, kTypeValue);
4262
4263 builder->Add(ik.Encode(), value);
4264 }
4265 ASSERT_OK(builder->Finish());
4266 ASSERT_OK(file_writer->Flush());
4267
4268 std::unique_ptr<FSRandomAccessFile> source(
4269 new test::StringSource(sink->contents(), 73342, false));
4270 std::unique_ptr<RandomAccessFileReader> file_reader(
4271 new RandomAccessFileReader(std::move(source), "test"));
4272 // Helper function to get version, global_seqno, global_seqno_offset
4273 std::function<void()> VerifyBlockAlignment = [&]() {
4274 TableProperties* props = nullptr;
4275 ASSERT_OK(ReadTableProperties(file_reader.get(), sink->contents().size(),
4276 kBlockBasedTableMagicNumber, ioptions, &props,
4277 true /* compression_type_missing */));
4278
4279 uint64_t data_block_size = props->data_size / props->num_data_blocks;
4280 ASSERT_EQ(data_block_size, 4096);
4281 ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
4282 delete props;
4283 };
4284
4285 VerifyBlockAlignment();
4286
4287 // The below block of code verifies that we can read back the keys. Set
4288 // block_align to false when creating the reader to ensure we can flip between
4289 // the two modes without any issues
4290 std::unique_ptr<TableReader> table_reader;
4291 bbto.block_align = false;
4292 Options options2;
4293 options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
4294 ImmutableOptions ioptions2(options2);
4295 const MutableCFOptions moptions2(options2);
4296
4297 ASSERT_OK(ioptions.table_factory->NewTableReader(
4298 TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
4299 EnvOptions(),
4300 GetPlainInternalComparator(options2.comparator)),
4301 std::move(file_reader), sink->contents().size(), &table_reader));
4302
4303 ReadOptions read_options;
4304 std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
4305 read_options, moptions2.prefix_extractor.get(), /*arena=*/nullptr,
4306 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4307
4308 int expected_key = 1;
4309 for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
4310 std::ostringstream ostr;
4311 ostr << std::setfill('0') << std::setw(5) << expected_key++;
4312 std::string key = ostr.str();
4313 std::string value = "val";
4314
4315 ASSERT_OK(db_iter->status());
4316 ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
4317 ASSERT_EQ(db_iter->value().ToString(), value);
4318 }
4319 expected_key--;
4320 ASSERT_EQ(expected_key, 10000);
4321 table_reader.reset();
4322 }
4323
TEST_P(BlockBasedTableTest,PropertiesBlockRestartPointTest)4324 TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
4325 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4326 bbto.block_align = true;
4327 test::StringSink* sink = new test::StringSink();
4328 std::unique_ptr<FSWritableFile> holder(sink);
4329 std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
4330 std::move(holder), "" /* don't care */, FileOptions()));
4331
4332 Options options;
4333 options.compression = kNoCompression;
4334 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4335
4336 const ImmutableOptions ioptions(options);
4337 const MutableCFOptions moptions(options);
4338 InternalKeyComparator ikc(options.comparator);
4339 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
4340 std::string column_family_name;
4341
4342 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4343 TableBuilderOptions(ioptions, moptions, ikc,
4344 &int_tbl_prop_collector_factories, kNoCompression,
4345 CompressionOptions(), kUnknownColumnFamily,
4346 column_family_name, -1),
4347 file_writer.get()));
4348
4349 for (int i = 1; i <= 10000; ++i) {
4350 std::ostringstream ostr;
4351 ostr << std::setfill('0') << std::setw(5) << i;
4352 std::string key = ostr.str();
4353 std::string value = "val";
4354 InternalKey ik(key, 0, kTypeValue);
4355
4356 builder->Add(ik.Encode(), value);
4357 }
4358 ASSERT_OK(builder->Finish());
4359 ASSERT_OK(file_writer->Flush());
4360
4361 std::unique_ptr<FSRandomAccessFile> source(
4362 new test::StringSource(sink->contents(), 73342, true));
4363 std::unique_ptr<RandomAccessFileReader> file_reader(
4364 new RandomAccessFileReader(std::move(source), "test"));
4365
4366 {
4367 RandomAccessFileReader* file = file_reader.get();
4368 uint64_t file_size = sink->contents().size();
4369
4370 Footer footer;
4371 IOOptions opts;
4372 ASSERT_OK(ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */,
4373 file_size, &footer,
4374 kBlockBasedTableMagicNumber));
4375
4376 auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
4377 BlockContents* contents) {
4378 ReadOptions read_options;
4379 read_options.verify_checksums = false;
4380 PersistentCacheOptions cache_options;
4381
4382 BlockFetcher block_fetcher(
4383 file, nullptr /* prefetch_buffer */, footer, read_options, handle,
4384 contents, ioptions, false /* decompress */,
4385 false /*maybe_compressed*/, block_type,
4386 UncompressionDict::GetEmptyDict(), cache_options);
4387
4388 ASSERT_OK(block_fetcher.ReadBlockContents());
4389 };
4390
4391 // -- Read metaindex block
4392 auto metaindex_handle = footer.metaindex_handle();
4393 BlockContents metaindex_contents;
4394
4395 BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
4396 &metaindex_contents);
4397 Block metaindex_block(std::move(metaindex_contents));
4398
4399 std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
4400 BytewiseComparator(), kDisableGlobalSequenceNumber));
4401 bool found_properties_block = true;
4402 ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
4403 ASSERT_TRUE(found_properties_block);
4404
4405 // -- Read properties block
4406 Slice v = meta_iter->value();
4407 BlockHandle properties_handle;
4408 ASSERT_OK(properties_handle.DecodeFrom(&v));
4409 BlockContents properties_contents;
4410
4411 BlockFetchHelper(properties_handle, BlockType::kProperties,
4412 &properties_contents);
4413 Block properties_block(std::move(properties_contents));
4414
4415 ASSERT_EQ(properties_block.NumRestarts(), 1u);
4416 }
4417 }
4418
TEST_P(BlockBasedTableTest,PropertiesMetaBlockLast)4419 TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
4420 // The properties meta-block should come at the end since we always need to
4421 // read it when opening a file, unlike index/filter/other meta-blocks, which
4422 // are sometimes read depending on the user's configuration. This ordering
4423 // allows us to do a small readahead on the end of the file to read properties
4424 // and meta-index blocks with one I/O.
4425 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
4426 c.Add("a1", "val1");
4427 c.Add("b2", "val2");
4428 c.Add("c3", "val3");
4429 c.Add("d4", "val4");
4430 c.Add("e5", "val5");
4431 c.Add("f6", "val6");
4432 c.Add("g7", "val7");
4433 c.Add("h8", "val8");
4434 c.Add("j9", "val9");
4435
4436 // write an SST file
4437 Options options;
4438 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
4439 table_options.filter_policy.reset(NewBloomFilterPolicy(
4440 8 /* bits_per_key */, false /* use_block_based_filter */));
4441 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
4442 ImmutableOptions ioptions(options);
4443 MutableCFOptions moptions(options);
4444 std::vector<std::string> keys;
4445 stl_wrappers::KVMap kvmap;
4446 c.Finish(options, ioptions, moptions, table_options,
4447 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
4448
4449 // get file reader
4450 test::StringSink* table_sink = c.TEST_GetSink();
4451 std::unique_ptr<FSRandomAccessFile> source(new test::StringSource(
4452 table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */));
4453
4454 std::unique_ptr<RandomAccessFileReader> table_reader(
4455 new RandomAccessFileReader(std::move(source), "test"));
4456 size_t table_size = table_sink->contents().size();
4457
4458 // read footer
4459 Footer footer;
4460 IOOptions opts;
4461 ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(),
4462 nullptr /* prefetch_buffer */, table_size,
4463 &footer, kBlockBasedTableMagicNumber));
4464
4465 // read metaindex
4466 auto metaindex_handle = footer.metaindex_handle();
4467 BlockContents metaindex_contents;
4468 PersistentCacheOptions pcache_opts;
4469 BlockFetcher block_fetcher(
4470 table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
4471 metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
4472 false /*maybe_compressed*/, BlockType::kMetaIndex,
4473 UncompressionDict::GetEmptyDict(), pcache_opts,
4474 nullptr /*memory_allocator*/);
4475 ASSERT_OK(block_fetcher.ReadBlockContents());
4476 Block metaindex_block(std::move(metaindex_contents));
4477
4478 // verify properties block comes last
4479 std::unique_ptr<InternalIterator> metaindex_iter{
4480 metaindex_block.NewDataIterator(options.comparator,
4481 kDisableGlobalSequenceNumber)};
4482 uint64_t max_offset = 0;
4483 std::string key_at_max_offset;
4484 for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
4485 metaindex_iter->Next()) {
4486 BlockHandle handle;
4487 Slice value = metaindex_iter->value();
4488 ASSERT_OK(handle.DecodeFrom(&value));
4489 if (handle.offset() > max_offset) {
4490 max_offset = handle.offset();
4491 key_at_max_offset = metaindex_iter->key().ToString();
4492 }
4493 }
4494 ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
4495 // index handle is stored in footer rather than metaindex block, so need
4496 // separate logic to verify it comes before properties block.
4497 ASSERT_GT(max_offset, footer.index_handle().offset());
4498 c.ResetTableReader();
4499 }
4500
TEST_P(BlockBasedTableTest,BadOptions)4501 TEST_P(BlockBasedTableTest, BadOptions) {
4502 ROCKSDB_NAMESPACE::Options options;
4503 options.compression = kNoCompression;
4504 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4505 bbto.block_size = 4000;
4506 bbto.block_align = true;
4507
4508 const std::string kDBPath =
4509 test::PerThreadDBPath("block_based_table_bad_options_test");
4510 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4511 ASSERT_OK(DestroyDB(kDBPath, options));
4512 ROCKSDB_NAMESPACE::DB* db;
4513 ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4514
4515 bbto.block_size = 4096;
4516 options.compression = kSnappyCompression;
4517 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4518 ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4519 }
4520
TEST_F(BBTTailPrefetchTest,TestTailPrefetchStats)4521 TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
4522 TailPrefetchStats tpstats;
4523 ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
4524 tpstats.RecordEffectiveSize(size_t{1000});
4525 tpstats.RecordEffectiveSize(size_t{1005});
4526 tpstats.RecordEffectiveSize(size_t{1002});
4527 ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());
4528
4529 // One single super large value shouldn't influence much
4530 tpstats.RecordEffectiveSize(size_t{1002000});
4531 tpstats.RecordEffectiveSize(size_t{999});
4532 ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
4533 ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());
4534
4535 // Only history of 32 is kept
4536 for (int i = 0; i < 32; i++) {
4537 tpstats.RecordEffectiveSize(size_t{100});
4538 }
4539 ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());
4540
4541 // 16 large values and 16 small values. The result should be closer
4542 // to the small value as the algorithm.
4543 for (int i = 0; i < 16; i++) {
4544 tpstats.RecordEffectiveSize(size_t{1000});
4545 }
4546 tpstats.RecordEffectiveSize(size_t{10});
4547 tpstats.RecordEffectiveSize(size_t{20});
4548 for (int i = 0; i < 6; i++) {
4549 tpstats.RecordEffectiveSize(size_t{100});
4550 }
4551 ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
4552 ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
4553 }
4554
TEST_F(BBTTailPrefetchTest,FilePrefetchBufferMinOffset)4555 TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
4556 TailPrefetchStats tpstats;
4557 FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
4558 IOOptions opts;
4559 buffer.TryReadFromCache(opts, 500, 10, nullptr, nullptr);
4560 buffer.TryReadFromCache(opts, 480, 10, nullptr, nullptr);
4561 buffer.TryReadFromCache(opts, 490, 10, nullptr, nullptr);
4562 ASSERT_EQ(480, buffer.min_offset_read());
4563 }
4564
TEST_P(BlockBasedTableTest,DataBlockHashIndex)4565 TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
4566 const int kNumKeys = 500;
4567 const int kKeySize = 8;
4568 const int kValSize = 40;
4569
4570 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
4571 table_options.data_block_index_type =
4572 BlockBasedTableOptions::kDataBlockBinaryAndHash;
4573
4574 Options options;
4575 options.comparator = BytewiseComparator();
4576
4577 options.table_factory.reset(new BlockBasedTableFactory(table_options));
4578
4579 TableConstructor c(options.comparator);
4580
4581 static Random rnd(1048);
4582 for (int i = 0; i < kNumKeys; i++) {
4583 // padding one "0" to mark existent keys.
4584 std::string random_key(rnd.RandomString(kKeySize - 1) + "1");
4585 InternalKey k(random_key, 0, kTypeValue);
4586 c.Add(k.Encode().ToString(), rnd.RandomString(kValSize));
4587 }
4588
4589 std::vector<std::string> keys;
4590 stl_wrappers::KVMap kvmap;
4591 const ImmutableOptions ioptions(options);
4592 const MutableCFOptions moptions(options);
4593 const InternalKeyComparator internal_comparator(options.comparator);
4594 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
4595 &keys, &kvmap);
4596
4597 auto reader = c.GetTableReader();
4598
4599 std::unique_ptr<InternalIterator> seek_iter;
4600 ReadOptions read_options;
4601 seek_iter.reset(reader->NewIterator(
4602 read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
4603 /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4604 for (int i = 0; i < 2; ++i) {
4605 ReadOptions ro;
4606 // for every kv, we seek using two method: Get() and Seek()
4607 // Get() will use the SuffixIndexHash in Block. For non-existent key it
4608 // will invalidate the iterator
4609 // Seek() will use the default BinarySeek() in Block. So for non-existent
4610 // key it will land at the closest key that is large than target.
4611
4612 // Search for existent keys
4613 for (auto& kv : kvmap) {
4614 if (i == 0) {
4615 // Search using Seek()
4616 seek_iter->Seek(kv.first);
4617 ASSERT_OK(seek_iter->status());
4618 ASSERT_TRUE(seek_iter->Valid());
4619 ASSERT_EQ(seek_iter->key(), kv.first);
4620 ASSERT_EQ(seek_iter->value(), kv.second);
4621 } else {
4622 // Search using Get()
4623 PinnableSlice value;
4624 std::string user_key = ExtractUserKey(kv.first).ToString();
4625 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
4626 GetContext::kNotFound, user_key, &value, nullptr,
4627 nullptr, true, nullptr, nullptr);
4628 ASSERT_OK(reader->Get(ro, kv.first, &get_context,
4629 moptions.prefix_extractor.get()));
4630 ASSERT_EQ(get_context.State(), GetContext::kFound);
4631 ASSERT_EQ(value, Slice(kv.second));
4632 value.Reset();
4633 }
4634 }
4635
4636 // Search for non-existent keys
4637 for (auto& kv : kvmap) {
4638 std::string user_key = ExtractUserKey(kv.first).ToString();
4639 user_key.back() = '0'; // make it non-existent key
4640 InternalKey internal_key(user_key, 0, kTypeValue);
4641 std::string encoded_key = internal_key.Encode().ToString();
4642 if (i == 0) { // Search using Seek()
4643 seek_iter->Seek(encoded_key);
4644 ASSERT_OK(seek_iter->status());
4645 if (seek_iter->Valid()) {
4646 ASSERT_TRUE(BytewiseComparator()->Compare(
4647 user_key, ExtractUserKey(seek_iter->key())) < 0);
4648 }
4649 } else { // Search using Get()
4650 PinnableSlice value;
4651 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
4652 GetContext::kNotFound, user_key, &value, nullptr,
4653 nullptr, true, nullptr, nullptr);
4654 ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
4655 moptions.prefix_extractor.get()));
4656 ASSERT_EQ(get_context.State(), GetContext::kNotFound);
4657 value.Reset();
4658 }
4659 }
4660 }
4661 }
4662
4663 // BlockBasedTableIterator should invalidate itself and return
4664 // OutOfBound()=true immediately after Seek(), to allow LevelIterator
4665 // filter out corresponding level.
TEST_P(BlockBasedTableTest,OutOfBoundOnSeek)4666 TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
4667 TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
4668 c.Add("foo", "v1");
4669 std::vector<std::string> keys;
4670 stl_wrappers::KVMap kvmap;
4671 Options options;
4672 BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
4673 options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
4674 const ImmutableOptions ioptions(options);
4675 const MutableCFOptions moptions(options);
4676 c.Finish(options, ioptions, moptions, table_opt,
4677 GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
4678 auto* reader = c.GetTableReader();
4679 ReadOptions read_opt;
4680 std::string upper_bound = "bar";
4681 Slice upper_bound_slice(upper_bound);
4682 read_opt.iterate_upper_bound = &upper_bound_slice;
4683 std::unique_ptr<InternalIterator> iter;
4684 iter.reset(new KeyConvertingIterator(reader->NewIterator(
4685 read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4686 /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4687 iter->SeekToFirst();
4688 ASSERT_FALSE(iter->Valid());
4689 ASSERT_OK(iter->status());
4690 ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4691 iter.reset(new KeyConvertingIterator(reader->NewIterator(
4692 read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4693 /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4694 iter->Seek("foo");
4695 ASSERT_FALSE(iter->Valid());
4696 ASSERT_OK(iter->status());
4697 ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4698 }
4699
4700 // BlockBasedTableIterator should invalidate itself and return
4701 // OutOfBound()=true after Next(), if it finds current index key is no smaller
4702 // than upper bound, unless it is pointing to the last data block.
TEST_P(BlockBasedTableTest,OutOfBoundOnNext)4703 TEST_P(BlockBasedTableTest, OutOfBoundOnNext) {
4704 TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
4705 c.Add("bar", "v");
4706 c.Add("foo", "v");
4707 std::vector<std::string> keys;
4708 stl_wrappers::KVMap kvmap;
4709 Options options;
4710 BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
4711 table_opt.flush_block_policy_factory =
4712 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
4713 options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
4714 const ImmutableOptions ioptions(options);
4715 const MutableCFOptions moptions(options);
4716 c.Finish(options, ioptions, moptions, table_opt,
4717 GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
4718 auto* reader = c.GetTableReader();
4719 ReadOptions read_opt;
4720 std::string ub1 = "bar_after";
4721 Slice ub_slice1(ub1);
4722 read_opt.iterate_upper_bound = &ub_slice1;
4723 std::unique_ptr<InternalIterator> iter;
4724 iter.reset(new KeyConvertingIterator(reader->NewIterator(
4725 read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4726 /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4727 iter->Seek("bar");
4728 ASSERT_TRUE(iter->Valid());
4729 ASSERT_EQ("bar", iter->key());
4730 iter->Next();
4731 ASSERT_FALSE(iter->Valid());
4732 ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4733 std::string ub2 = "foo_after";
4734 Slice ub_slice2(ub2);
4735 read_opt.iterate_upper_bound = &ub_slice2;
4736 iter.reset(new KeyConvertingIterator(reader->NewIterator(
4737 read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4738 /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4739 iter->Seek("foo");
4740 ASSERT_TRUE(iter->Valid());
4741 ASSERT_EQ("foo", iter->key());
4742 iter->Next();
4743 ASSERT_FALSE(iter->Valid());
4744 ASSERT_FALSE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
4745 }
4746
4747 } // namespace ROCKSDB_NAMESPACE
4748
main(int argc,char ** argv)4749 int main(int argc, char** argv) {
4750 ::testing::InitGoogleTest(&argc, argv);
4751 return RUN_ALL_TESTS();
4752 }
4753