1 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file. See the AUTHORS file for names of contributors.
5
6 #ifndef ROCKSDB_LITE
7
8 #include "table/plain/plain_table_reader.h"
9
10 #include <string>
11 #include <vector>
12
13 #include "db/dbformat.h"
14
15 #include "rocksdb/cache.h"
16 #include "rocksdb/comparator.h"
17 #include "rocksdb/env.h"
18 #include "rocksdb/filter_policy.h"
19 #include "rocksdb/options.h"
20 #include "rocksdb/statistics.h"
21
22 #include "table/block_based/block.h"
23 #include "table/block_based/filter_block.h"
24 #include "table/format.h"
25 #include "table/get_context.h"
26 #include "table/internal_iterator.h"
27 #include "table/meta_blocks.h"
28 #include "table/plain/plain_table_bloom.h"
29 #include "table/plain/plain_table_factory.h"
30 #include "table/plain/plain_table_key_coding.h"
31 #include "table/two_level_iterator.h"
32
33 #include "memory/arena.h"
34 #include "monitoring/histogram.h"
35 #include "monitoring/perf_context_imp.h"
36 #include "util/coding.h"
37 #include "util/dynamic_bloom.h"
38 #include "util/hash.h"
39 #include "util/stop_watch.h"
40 #include "util/string_util.h"
41
42 namespace ROCKSDB_NAMESPACE {
43
44 namespace {
45
46 // Safely getting a uint32_t element from a char array, where, starting from
47 // `base`, every 4 bytes are considered as an fixed 32 bit integer.
GetFixed32Element(const char * base,size_t offset)48 inline uint32_t GetFixed32Element(const char* base, size_t offset) {
49 return DecodeFixed32(base + offset * sizeof(uint32_t));
50 }
51 } // namespace
52
53 // Iterator to iterate IndexedTable
54 class PlainTableIterator : public InternalIterator {
55 public:
56 explicit PlainTableIterator(PlainTableReader* table, bool use_prefix_seek);
57 // No copying allowed
58 PlainTableIterator(const PlainTableIterator&) = delete;
59 void operator=(const Iterator&) = delete;
60
61 ~PlainTableIterator() override;
62
63 bool Valid() const override;
64
65 void SeekToFirst() override;
66
67 void SeekToLast() override;
68
69 void Seek(const Slice& target) override;
70
71 void SeekForPrev(const Slice& target) override;
72
73 void Next() override;
74
75 void Prev() override;
76
77 Slice key() const override;
78
79 Slice value() const override;
80
81 Status status() const override;
82
83 private:
84 PlainTableReader* table_;
85 PlainTableKeyDecoder decoder_;
86 bool use_prefix_seek_;
87 uint32_t offset_;
88 uint32_t next_offset_;
89 Slice key_;
90 Slice value_;
91 Status status_;
92 };
93
94 extern const uint64_t kPlainTableMagicNumber;
PlainTableReader(const ImmutableOptions & ioptions,std::unique_ptr<RandomAccessFileReader> && file,const EnvOptions & storage_options,const InternalKeyComparator & icomparator,EncodingType encoding_type,uint64_t file_size,const TableProperties * table_properties,const SliceTransform * prefix_extractor)95 PlainTableReader::PlainTableReader(
96 const ImmutableOptions& ioptions,
97 std::unique_ptr<RandomAccessFileReader>&& file,
98 const EnvOptions& storage_options, const InternalKeyComparator& icomparator,
99 EncodingType encoding_type, uint64_t file_size,
100 const TableProperties* table_properties,
101 const SliceTransform* prefix_extractor)
102 : internal_comparator_(icomparator),
103 encoding_type_(encoding_type),
104 full_scan_mode_(false),
105 user_key_len_(static_cast<uint32_t>(table_properties->fixed_key_len)),
106 prefix_extractor_(prefix_extractor),
107 enable_bloom_(false),
108 bloom_(6),
109 file_info_(std::move(file), storage_options,
110 static_cast<uint32_t>(table_properties->data_size)),
111 ioptions_(ioptions),
112 file_size_(file_size),
113 table_properties_(nullptr) {}
114
~PlainTableReader()115 PlainTableReader::~PlainTableReader() {
116 // Should fix?
117 status_.PermitUncheckedError();
118 }
119
Open(const ImmutableOptions & ioptions,const EnvOptions & env_options,const InternalKeyComparator & internal_comparator,std::unique_ptr<RandomAccessFileReader> && file,uint64_t file_size,std::unique_ptr<TableReader> * table_reader,const int bloom_bits_per_key,double hash_table_ratio,size_t index_sparseness,size_t huge_page_tlb_size,bool full_scan_mode,const bool immortal_table,const SliceTransform * prefix_extractor)120 Status PlainTableReader::Open(
121 const ImmutableOptions& ioptions, const EnvOptions& env_options,
122 const InternalKeyComparator& internal_comparator,
123 std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
124 std::unique_ptr<TableReader>* table_reader, const int bloom_bits_per_key,
125 double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size,
126 bool full_scan_mode, const bool immortal_table,
127 const SliceTransform* prefix_extractor) {
128 if (file_size > PlainTableIndex::kMaxFileSize) {
129 return Status::NotSupported("File is too large for PlainTableReader!");
130 }
131
132 TableProperties* props_ptr = nullptr;
133 auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
134 ioptions, &props_ptr,
135 true /* compression_type_missing */);
136 std::shared_ptr<TableProperties> props(props_ptr);
137 if (!s.ok()) {
138 return s;
139 }
140
141 assert(hash_table_ratio >= 0.0);
142 auto& user_props = props->user_collected_properties;
143 auto prefix_extractor_in_file = props->prefix_extractor_name;
144
145 if (!full_scan_mode &&
146 !prefix_extractor_in_file.empty() /* old version sst file*/
147 && prefix_extractor_in_file != "nullptr") {
148 if (!prefix_extractor) {
149 return Status::InvalidArgument(
150 "Prefix extractor is missing when opening a PlainTable built "
151 "using a prefix extractor");
152 } else if (prefix_extractor_in_file != prefix_extractor->AsString()) {
153 return Status::InvalidArgument(
154 "Prefix extractor given doesn't match the one used to build "
155 "PlainTable");
156 }
157 }
158
159 EncodingType encoding_type = kPlain;
160 auto encoding_type_prop =
161 user_props.find(PlainTablePropertyNames::kEncodingType);
162 if (encoding_type_prop != user_props.end()) {
163 encoding_type = static_cast<EncodingType>(
164 DecodeFixed32(encoding_type_prop->second.c_str()));
165 }
166
167 std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
168 ioptions, std::move(file), env_options, internal_comparator,
169 encoding_type, file_size, props.get(), prefix_extractor));
170
171 s = new_reader->MmapDataIfNeeded();
172 if (!s.ok()) {
173 return s;
174 }
175
176 if (!full_scan_mode) {
177 s = new_reader->PopulateIndex(props.get(), bloom_bits_per_key,
178 hash_table_ratio, index_sparseness,
179 huge_page_tlb_size);
180 if (!s.ok()) {
181 return s;
182 }
183 } else {
184 // Flag to indicate it is a full scan mode so that none of the indexes
185 // can be used.
186 new_reader->full_scan_mode_ = true;
187 }
188 // PopulateIndex can add to the props, so don't store them until now
189 new_reader->table_properties_ = props;
190
191 if (immortal_table && new_reader->file_info_.is_mmap_mode) {
192 new_reader->dummy_cleanable_.reset(new Cleanable());
193 }
194
195 *table_reader = std::move(new_reader);
196 return s;
197 }
198
SetupForCompaction()199 void PlainTableReader::SetupForCompaction() {
200 }
201
NewIterator(const ReadOptions & options,const SliceTransform *,Arena * arena,bool,TableReaderCaller,size_t,bool)202 InternalIterator* PlainTableReader::NewIterator(
203 const ReadOptions& options, const SliceTransform* /* prefix_extractor */,
204 Arena* arena, bool /*skip_filters*/, TableReaderCaller /*caller*/,
205 size_t /*compaction_readahead_size*/,
206 bool /* allow_unprepared_value */) {
207 // Not necessarily used here, but make sure this has been initialized
208 assert(table_properties_);
209
210 // Auto prefix mode is not implemented in PlainTable.
211 bool use_prefix_seek = !IsTotalOrderMode() && !options.total_order_seek &&
212 !options.auto_prefix_mode;
213 if (arena == nullptr) {
214 return new PlainTableIterator(this, use_prefix_seek);
215 } else {
216 auto mem = arena->AllocateAligned(sizeof(PlainTableIterator));
217 return new (mem) PlainTableIterator(this, use_prefix_seek);
218 }
219 }
220
PopulateIndexRecordList(PlainTableIndexBuilder * index_builder,std::vector<uint32_t> * prefix_hashes)221 Status PlainTableReader::PopulateIndexRecordList(
222 PlainTableIndexBuilder* index_builder,
223 std::vector<uint32_t>* prefix_hashes) {
224 Slice prev_key_prefix_slice;
225 std::string prev_key_prefix_buf;
226 uint32_t pos = data_start_offset_;
227
228 bool is_first_record = true;
229 Slice key_prefix_slice;
230 PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
231 prefix_extractor_);
232 while (pos < file_info_.data_end_offset) {
233 uint32_t key_offset = pos;
234 ParsedInternalKey key;
235 Slice value_slice;
236 bool seekable = false;
237 Status s = Next(&decoder, &pos, &key, nullptr, &value_slice, &seekable);
238 if (!s.ok()) {
239 return s;
240 }
241
242 key_prefix_slice = GetPrefix(key);
243 if (enable_bloom_) {
244 bloom_.AddHash(GetSliceHash(key.user_key));
245 } else {
246 if (is_first_record || prev_key_prefix_slice != key_prefix_slice) {
247 if (!is_first_record) {
248 prefix_hashes->push_back(GetSliceHash(prev_key_prefix_slice));
249 }
250 if (file_info_.is_mmap_mode) {
251 prev_key_prefix_slice = key_prefix_slice;
252 } else {
253 prev_key_prefix_buf = key_prefix_slice.ToString();
254 prev_key_prefix_slice = prev_key_prefix_buf;
255 }
256 }
257 }
258
259 index_builder->AddKeyPrefix(GetPrefix(key), key_offset);
260
261 if (!seekable && is_first_record) {
262 return Status::Corruption("Key for a prefix is not seekable");
263 }
264
265 is_first_record = false;
266 }
267
268 prefix_hashes->push_back(GetSliceHash(key_prefix_slice));
269 auto s = index_.InitFromRawData(index_builder->Finish());
270 return s;
271 }
272
AllocateBloom(int bloom_bits_per_key,int num_keys,size_t huge_page_tlb_size)273 void PlainTableReader::AllocateBloom(int bloom_bits_per_key, int num_keys,
274 size_t huge_page_tlb_size) {
275 uint32_t bloom_total_bits = num_keys * bloom_bits_per_key;
276 if (bloom_total_bits > 0) {
277 enable_bloom_ = true;
278 bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality,
279 huge_page_tlb_size, ioptions_.logger);
280 }
281 }
282
FillBloom(const std::vector<uint32_t> & prefix_hashes)283 void PlainTableReader::FillBloom(const std::vector<uint32_t>& prefix_hashes) {
284 assert(bloom_.IsInitialized());
285 for (const auto prefix_hash : prefix_hashes) {
286 bloom_.AddHash(prefix_hash);
287 }
288 }
289
MmapDataIfNeeded()290 Status PlainTableReader::MmapDataIfNeeded() {
291 if (file_info_.is_mmap_mode) {
292 // Get mmapped memory.
293 return file_info_.file->Read(IOOptions(), 0,
294 static_cast<size_t>(file_size_),
295 &file_info_.file_data, nullptr, nullptr);
296 }
297 return Status::OK();
298 }
299
PopulateIndex(TableProperties * props,int bloom_bits_per_key,double hash_table_ratio,size_t index_sparseness,size_t huge_page_tlb_size)300 Status PlainTableReader::PopulateIndex(TableProperties* props,
301 int bloom_bits_per_key,
302 double hash_table_ratio,
303 size_t index_sparseness,
304 size_t huge_page_tlb_size) {
305 assert(props != nullptr);
306
307 BlockContents index_block_contents;
308 Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
309 file_size_, kPlainTableMagicNumber, ioptions_,
310 PlainTableIndexBuilder::kPlainTableIndexBlock,
311 BlockType::kIndex, &index_block_contents,
312 true /* compression_type_missing */);
313
314 bool index_in_file = s.ok();
315
316 BlockContents bloom_block_contents;
317 bool bloom_in_file = false;
318 // We only need to read the bloom block if index block is in file.
319 if (index_in_file) {
320 s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
321 file_size_, kPlainTableMagicNumber, ioptions_,
322 BloomBlockBuilder::kBloomBlock, BlockType::kFilter,
323 &bloom_block_contents,
324 true /* compression_type_missing */);
325 bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0;
326 }
327
328 Slice* bloom_block;
329 if (bloom_in_file) {
330 // If bloom_block_contents.allocation is not empty (which will be the case
331 // for non-mmap mode), it holds the alloated memory for the bloom block.
332 // It needs to be kept alive to keep `bloom_block` valid.
333 bloom_block_alloc_ = std::move(bloom_block_contents.allocation);
334 bloom_block = &bloom_block_contents.data;
335 } else {
336 bloom_block = nullptr;
337 }
338
339 Slice* index_block;
340 if (index_in_file) {
341 // If index_block_contents.allocation is not empty (which will be the case
342 // for non-mmap mode), it holds the alloated memory for the index block.
343 // It needs to be kept alive to keep `index_block` valid.
344 index_block_alloc_ = std::move(index_block_contents.allocation);
345 index_block = &index_block_contents.data;
346 } else {
347 index_block = nullptr;
348 }
349
350 if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) {
351 // moptions.prefix_extractor is requried for a hash-based look-up.
352 return Status::NotSupported(
353 "PlainTable requires a prefix extractor enable prefix hash mode.");
354 }
355
356 // First, read the whole file, for every kIndexIntervalForSamePrefixKeys rows
357 // for a prefix (starting from the first one), generate a record of (hash,
358 // offset) and append it to IndexRecordList, which is a data structure created
359 // to store them.
360
361 if (!index_in_file) {
362 // Allocate bloom filter here for total order mode.
363 if (IsTotalOrderMode()) {
364 AllocateBloom(bloom_bits_per_key,
365 static_cast<uint32_t>(props->num_entries),
366 huge_page_tlb_size);
367 }
368 } else if (bloom_in_file) {
369 enable_bloom_ = true;
370 auto num_blocks_property = props->user_collected_properties.find(
371 PlainTablePropertyNames::kNumBloomBlocks);
372
373 uint32_t num_blocks = 0;
374 if (num_blocks_property != props->user_collected_properties.end()) {
375 Slice temp_slice(num_blocks_property->second);
376 if (!GetVarint32(&temp_slice, &num_blocks)) {
377 num_blocks = 0;
378 }
379 }
380 // cast away const qualifier, because bloom_ won't be changed
381 bloom_.SetRawData(const_cast<char*>(bloom_block->data()),
382 static_cast<uint32_t>(bloom_block->size()) * 8,
383 num_blocks);
384 } else {
385 // Index in file but no bloom in file. Disable bloom filter in this case.
386 enable_bloom_ = false;
387 bloom_bits_per_key = 0;
388 }
389
390 PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_,
391 index_sparseness, hash_table_ratio,
392 huge_page_tlb_size);
393
394 std::vector<uint32_t> prefix_hashes;
395 if (!index_in_file) {
396 // Populates _bloom if enabled (total order mode)
397 s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
398 if (!s.ok()) {
399 return s;
400 }
401 } else {
402 s = index_.InitFromRawData(*index_block);
403 if (!s.ok()) {
404 return s;
405 }
406 }
407
408 if (!index_in_file) {
409 if (!IsTotalOrderMode()) {
410 // Calculated bloom filter size and allocate memory for
411 // bloom filter based on the number of prefixes, then fill it.
412 AllocateBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
413 huge_page_tlb_size);
414 if (enable_bloom_) {
415 FillBloom(prefix_hashes);
416 }
417 }
418 }
419
420 // Fill two table properties.
421 if (!index_in_file) {
422 props->user_collected_properties["plain_table_hash_table_size"] =
423 ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen);
424 props->user_collected_properties["plain_table_sub_index_size"] =
425 ToString(index_.GetSubIndexSize());
426 } else {
427 props->user_collected_properties["plain_table_hash_table_size"] =
428 ToString(0);
429 props->user_collected_properties["plain_table_sub_index_size"] =
430 ToString(0);
431 }
432
433 return Status::OK();
434 }
435
GetOffset(PlainTableKeyDecoder * decoder,const Slice & target,const Slice & prefix,uint32_t prefix_hash,bool & prefix_matched,uint32_t * offset) const436 Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder,
437 const Slice& target, const Slice& prefix,
438 uint32_t prefix_hash, bool& prefix_matched,
439 uint32_t* offset) const {
440 prefix_matched = false;
441 uint32_t prefix_index_offset;
442 auto res = index_.GetOffset(prefix_hash, &prefix_index_offset);
443 if (res == PlainTableIndex::kNoPrefixForBucket) {
444 *offset = file_info_.data_end_offset;
445 return Status::OK();
446 } else if (res == PlainTableIndex::kDirectToFile) {
447 *offset = prefix_index_offset;
448 return Status::OK();
449 }
450
451 // point to sub-index, need to do a binary search
452 uint32_t upper_bound = 0;
453 const char* base_ptr =
454 index_.GetSubIndexBasePtrAndUpperBound(prefix_index_offset, &upper_bound);
455 uint32_t low = 0;
456 uint32_t high = upper_bound;
457 ParsedInternalKey mid_key;
458 ParsedInternalKey parsed_target;
459 Status s = ParseInternalKey(target, &parsed_target,
460 false /* log_err_key */); // TODO
461 if (!s.ok()) return s;
462
463 // The key is between [low, high). Do a binary search between it.
464 while (high - low > 1) {
465 uint32_t mid = (high + low) / 2;
466 uint32_t file_offset = GetFixed32Element(base_ptr, mid);
467 uint32_t tmp;
468 s = decoder->NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp);
469 if (!s.ok()) {
470 return s;
471 }
472 int cmp_result = internal_comparator_.Compare(mid_key, parsed_target);
473 if (cmp_result < 0) {
474 low = mid;
475 } else {
476 if (cmp_result == 0) {
477 // Happen to have found the exact key or target is smaller than the
478 // first key after base_offset.
479 prefix_matched = true;
480 *offset = file_offset;
481 return Status::OK();
482 } else {
483 high = mid;
484 }
485 }
486 }
487 // Both of the key at the position low or low+1 could share the same
488 // prefix as target. We need to rule out one of them to avoid to go
489 // to the wrong prefix.
490 ParsedInternalKey low_key;
491 uint32_t tmp;
492 uint32_t low_key_offset = GetFixed32Element(base_ptr, low);
493 s = decoder->NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp);
494 if (!s.ok()) {
495 return s;
496 }
497
498 if (GetPrefix(low_key) == prefix) {
499 prefix_matched = true;
500 *offset = low_key_offset;
501 } else if (low + 1 < upper_bound) {
502 // There is possible a next prefix, return it
503 prefix_matched = false;
504 *offset = GetFixed32Element(base_ptr, low + 1);
505 } else {
506 // target is larger than a key of the last prefix in this bucket
507 // but with a different prefix. Key does not exist.
508 *offset = file_info_.data_end_offset;
509 }
510 return Status::OK();
511 }
512
MatchBloom(uint32_t hash) const513 bool PlainTableReader::MatchBloom(uint32_t hash) const {
514 if (!enable_bloom_) {
515 return true;
516 }
517
518 if (bloom_.MayContainHash(hash)) {
519 PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
520 return true;
521 } else {
522 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
523 return false;
524 }
525 }
526
Next(PlainTableKeyDecoder * decoder,uint32_t * offset,ParsedInternalKey * parsed_key,Slice * internal_key,Slice * value,bool * seekable) const527 Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset,
528 ParsedInternalKey* parsed_key,
529 Slice* internal_key, Slice* value,
530 bool* seekable) const {
531 if (*offset == file_info_.data_end_offset) {
532 *offset = file_info_.data_end_offset;
533 return Status::OK();
534 }
535
536 if (*offset > file_info_.data_end_offset) {
537 return Status::Corruption("Offset is out of file size");
538 }
539
540 uint32_t bytes_read;
541 Status s = decoder->NextKey(*offset, parsed_key, internal_key, value,
542 &bytes_read, seekable);
543 if (!s.ok()) {
544 return s;
545 }
546 *offset = *offset + bytes_read;
547 return Status::OK();
548 }
549
Prepare(const Slice & target)550 void PlainTableReader::Prepare(const Slice& target) {
551 if (enable_bloom_) {
552 uint32_t prefix_hash = GetSliceHash(GetPrefix(target));
553 bloom_.Prefetch(prefix_hash);
554 }
555 }
556
Get(const ReadOptions &,const Slice & target,GetContext * get_context,const SliceTransform *,bool)557 Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
558 GetContext* get_context,
559 const SliceTransform* /* prefix_extractor */,
560 bool /*skip_filters*/) {
561 // Check bloom filter first.
562 Slice prefix_slice;
563 uint32_t prefix_hash;
564 if (IsTotalOrderMode()) {
565 if (full_scan_mode_) {
566 status_ =
567 Status::InvalidArgument("Get() is not allowed in full scan mode.");
568 }
569 // Match whole user key for bloom filter check.
570 if (!MatchBloom(GetSliceHash(GetUserKey(target)))) {
571 return Status::OK();
572 }
573 // in total order mode, there is only one bucket 0, and we always use empty
574 // prefix.
575 prefix_slice = Slice();
576 prefix_hash = 0;
577 } else {
578 prefix_slice = GetPrefix(target);
579 prefix_hash = GetSliceHash(prefix_slice);
580 if (!MatchBloom(prefix_hash)) {
581 return Status::OK();
582 }
583 }
584 uint32_t offset;
585 bool prefix_match;
586 PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
587 prefix_extractor_);
588 Status s = GetOffset(&decoder, target, prefix_slice, prefix_hash,
589 prefix_match, &offset);
590
591 if (!s.ok()) {
592 return s;
593 }
594 ParsedInternalKey found_key;
595 ParsedInternalKey parsed_target;
596 s = ParseInternalKey(target, &parsed_target,
597 false /* log_err_key */); // TODO
598 if (!s.ok()) return s;
599
600 Slice found_value;
601 while (offset < file_info_.data_end_offset) {
602 s = Next(&decoder, &offset, &found_key, nullptr, &found_value);
603 if (!s.ok()) {
604 return s;
605 }
606 if (!prefix_match) {
607 // Need to verify prefix for the first key found if it is not yet
608 // checked.
609 if (GetPrefix(found_key) != prefix_slice) {
610 return Status::OK();
611 }
612 prefix_match = true;
613 }
614 // TODO(ljin): since we know the key comparison result here,
615 // can we enable the fast path?
616 if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
617 bool dont_care __attribute__((__unused__));
618 if (!get_context->SaveValue(found_key, found_value, &dont_care,
619 dummy_cleanable_.get())) {
620 break;
621 }
622 }
623 }
624 return Status::OK();
625 }
626
ApproximateOffsetOf(const Slice &,TableReaderCaller)627 uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/,
628 TableReaderCaller /*caller*/) {
629 return 0;
630 }
631
ApproximateSize(const Slice &,const Slice &,TableReaderCaller)632 uint64_t PlainTableReader::ApproximateSize(const Slice& /*start*/,
633 const Slice& /*end*/,
634 TableReaderCaller /*caller*/) {
635 return 0;
636 }
637
PlainTableIterator(PlainTableReader * table,bool use_prefix_seek)638 PlainTableIterator::PlainTableIterator(PlainTableReader* table,
639 bool use_prefix_seek)
640 : table_(table),
641 decoder_(&table_->file_info_, table_->encoding_type_,
642 table_->user_key_len_, table_->prefix_extractor_),
643 use_prefix_seek_(use_prefix_seek) {
644 next_offset_ = offset_ = table_->file_info_.data_end_offset;
645 }
646
~PlainTableIterator()647 PlainTableIterator::~PlainTableIterator() {
648 }
649
Valid() const650 bool PlainTableIterator::Valid() const {
651 return offset_ < table_->file_info_.data_end_offset &&
652 offset_ >= table_->data_start_offset_;
653 }
654
SeekToFirst()655 void PlainTableIterator::SeekToFirst() {
656 status_ = Status::OK();
657 next_offset_ = table_->data_start_offset_;
658 if (next_offset_ >= table_->file_info_.data_end_offset) {
659 next_offset_ = offset_ = table_->file_info_.data_end_offset;
660 } else {
661 Next();
662 }
663 }
664
SeekToLast()665 void PlainTableIterator::SeekToLast() {
666 assert(false);
667 status_ = Status::NotSupported("SeekToLast() is not supported in PlainTable");
668 next_offset_ = offset_ = table_->file_info_.data_end_offset;
669 }
670
Seek(const Slice & target)671 void PlainTableIterator::Seek(const Slice& target) {
672 if (use_prefix_seek_ != !table_->IsTotalOrderMode()) {
673 // This check is done here instead of NewIterator() to permit creating an
674 // iterator with total_order_seek = true even if we won't be able to Seek()
675 // it. This is needed for compaction: it creates iterator with
676 // total_order_seek = true but usually never does Seek() on it,
677 // only SeekToFirst().
678 status_ =
679 Status::InvalidArgument(
680 "total_order_seek not implemented for PlainTable.");
681 offset_ = next_offset_ = table_->file_info_.data_end_offset;
682 return;
683 }
684
685 // If the user doesn't set prefix seek option and we are not able to do a
686 // total Seek(). assert failure.
687 if (table_->IsTotalOrderMode()) {
688 if (table_->full_scan_mode_) {
689 status_ =
690 Status::InvalidArgument("Seek() is not allowed in full scan mode.");
691 offset_ = next_offset_ = table_->file_info_.data_end_offset;
692 return;
693 } else if (table_->GetIndexSize() > 1) {
694 assert(false);
695 status_ = Status::NotSupported(
696 "PlainTable cannot issue non-prefix seek unless in total order "
697 "mode.");
698 offset_ = next_offset_ = table_->file_info_.data_end_offset;
699 return;
700 }
701 }
702
703 Slice prefix_slice = table_->GetPrefix(target);
704 uint32_t prefix_hash = 0;
705 // Bloom filter is ignored in total-order mode.
706 if (!table_->IsTotalOrderMode()) {
707 prefix_hash = GetSliceHash(prefix_slice);
708 if (!table_->MatchBloom(prefix_hash)) {
709 status_ = Status::OK();
710 offset_ = next_offset_ = table_->file_info_.data_end_offset;
711 return;
712 }
713 }
714 bool prefix_match;
715 status_ = table_->GetOffset(&decoder_, target, prefix_slice, prefix_hash,
716 prefix_match, &next_offset_);
717 if (!status_.ok()) {
718 offset_ = next_offset_ = table_->file_info_.data_end_offset;
719 return;
720 }
721
722 if (next_offset_ < table_->file_info_.data_end_offset) {
723 for (Next(); status_.ok() && Valid(); Next()) {
724 if (!prefix_match) {
725 // Need to verify the first key's prefix
726 if (table_->GetPrefix(key()) != prefix_slice) {
727 offset_ = next_offset_ = table_->file_info_.data_end_offset;
728 break;
729 }
730 prefix_match = true;
731 }
732 if (table_->internal_comparator_.Compare(key(), target) >= 0) {
733 break;
734 }
735 }
736 } else {
737 offset_ = table_->file_info_.data_end_offset;
738 }
739 }
740
SeekForPrev(const Slice &)741 void PlainTableIterator::SeekForPrev(const Slice& /*target*/) {
742 assert(false);
743 status_ =
744 Status::NotSupported("SeekForPrev() is not supported in PlainTable");
745 offset_ = next_offset_ = table_->file_info_.data_end_offset;
746 }
747
Next()748 void PlainTableIterator::Next() {
749 offset_ = next_offset_;
750 if (offset_ < table_->file_info_.data_end_offset) {
751 Slice tmp_slice;
752 ParsedInternalKey parsed_key;
753 status_ =
754 table_->Next(&decoder_, &next_offset_, &parsed_key, &key_, &value_);
755 if (!status_.ok()) {
756 offset_ = next_offset_ = table_->file_info_.data_end_offset;
757 }
758 }
759 }
760
Prev()761 void PlainTableIterator::Prev() {
762 assert(false);
763 }
764
key() const765 Slice PlainTableIterator::key() const {
766 assert(Valid());
767 return key_;
768 }
769
value() const770 Slice PlainTableIterator::value() const {
771 assert(Valid());
772 return value_;
773 }
774
status() const775 Status PlainTableIterator::status() const {
776 return status_;
777 }
778
779 } // namespace ROCKSDB_NAMESPACE
780 #endif // ROCKSDB_LITE
781