1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "table/block_based/index_builder.h"
11
12 #include <assert.h>
13 #include <cinttypes>
14
15 #include <list>
16 #include <string>
17
18 #include "rocksdb/comparator.h"
19 #include "rocksdb/flush_block_policy.h"
20 #include "table/block_based/partitioned_filter_block.h"
21 #include "table/format.h"
22
23 // Without anonymous namespace here, we fail the warning -Wmissing-prototypes
24 namespace ROCKSDB_NAMESPACE {
25 // using namespace rocksdb;
26 // Create a index builder based on its type.
CreateIndexBuilder(BlockBasedTableOptions::IndexType index_type,const InternalKeyComparator * comparator,const InternalKeySliceTransform * int_key_slice_transform,const bool use_value_delta_encoding,const BlockBasedTableOptions & table_opt)27 IndexBuilder* IndexBuilder::CreateIndexBuilder(
28 BlockBasedTableOptions::IndexType index_type,
29 const InternalKeyComparator* comparator,
30 const InternalKeySliceTransform* int_key_slice_transform,
31 const bool use_value_delta_encoding,
32 const BlockBasedTableOptions& table_opt) {
33 IndexBuilder* result = nullptr;
34 switch (index_type) {
35 case BlockBasedTableOptions::kBinarySearch: {
36 result = new ShortenedIndexBuilder(
37 comparator, table_opt.index_block_restart_interval,
38 table_opt.format_version, use_value_delta_encoding,
39 table_opt.index_shortening, /* include_first_key */ false);
40 } break;
41 case BlockBasedTableOptions::kHashSearch: {
42 // Currently kHashSearch is incompatible with index_block_restart_interval
43 // > 1
44 assert(table_opt.index_block_restart_interval == 1);
45 result = new HashIndexBuilder(
46 comparator, int_key_slice_transform,
47 table_opt.index_block_restart_interval, table_opt.format_version,
48 use_value_delta_encoding, table_opt.index_shortening);
49 } break;
50 case BlockBasedTableOptions::kTwoLevelIndexSearch: {
51 result = PartitionedIndexBuilder::CreateIndexBuilder(
52 comparator, use_value_delta_encoding, table_opt);
53 } break;
54 case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
55 result = new ShortenedIndexBuilder(
56 comparator, table_opt.index_block_restart_interval,
57 table_opt.format_version, use_value_delta_encoding,
58 table_opt.index_shortening, /* include_first_key */ true);
59 } break;
60 default: {
61 assert(!"Do not recognize the index type ");
62 } break;
63 }
64 return result;
65 }
66
CreateIndexBuilder(const InternalKeyComparator * comparator,const bool use_value_delta_encoding,const BlockBasedTableOptions & table_opt)67 PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
68 const InternalKeyComparator* comparator,
69 const bool use_value_delta_encoding,
70 const BlockBasedTableOptions& table_opt) {
71 return new PartitionedIndexBuilder(comparator, table_opt,
72 use_value_delta_encoding);
73 }
74
PartitionedIndexBuilder(const InternalKeyComparator * comparator,const BlockBasedTableOptions & table_opt,const bool use_value_delta_encoding)75 PartitionedIndexBuilder::PartitionedIndexBuilder(
76 const InternalKeyComparator* comparator,
77 const BlockBasedTableOptions& table_opt,
78 const bool use_value_delta_encoding)
79 : IndexBuilder(comparator),
80 index_block_builder_(table_opt.index_block_restart_interval,
81 true /*use_delta_encoding*/,
82 use_value_delta_encoding),
83 index_block_builder_without_seq_(table_opt.index_block_restart_interval,
84 true /*use_delta_encoding*/,
85 use_value_delta_encoding),
86 sub_index_builder_(nullptr),
87 table_opt_(table_opt),
88 // We start by false. After each partition we revise the value based on
89 // what the sub_index_builder has decided. If the feature is disabled
90 // entirely, this will be set to true after switching the first
91 // sub_index_builder. Otherwise, it could be set to true even one of the
92 // sub_index_builders could not safely exclude seq from the keys, then it
93 // wil be enforced on all sub_index_builders on ::Finish.
94 seperator_is_key_plus_seq_(false),
95 use_value_delta_encoding_(use_value_delta_encoding) {}
96
~PartitionedIndexBuilder()97 PartitionedIndexBuilder::~PartitionedIndexBuilder() {
98 delete sub_index_builder_;
99 }
100
MakeNewSubIndexBuilder()101 void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
102 assert(sub_index_builder_ == nullptr);
103 sub_index_builder_ = new ShortenedIndexBuilder(
104 comparator_, table_opt_.index_block_restart_interval,
105 table_opt_.format_version, use_value_delta_encoding_,
106 table_opt_.index_shortening, /* include_first_key */ false);
107 flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
108 table_opt_.metadata_block_size, table_opt_.block_size_deviation,
109 // Note: this is sub-optimal since sub_index_builder_ could later reset
110 // seperator_is_key_plus_seq_ but the probability of that is low.
111 sub_index_builder_->seperator_is_key_plus_seq_
112 ? sub_index_builder_->index_block_builder_
113 : sub_index_builder_->index_block_builder_without_seq_));
114 partition_cut_requested_ = false;
115 }
116
RequestPartitionCut()117 void PartitionedIndexBuilder::RequestPartitionCut() {
118 partition_cut_requested_ = true;
119 }
120
AddIndexEntry(std::string * last_key_in_current_block,const Slice * first_key_in_next_block,const BlockHandle & block_handle)121 void PartitionedIndexBuilder::AddIndexEntry(
122 std::string* last_key_in_current_block,
123 const Slice* first_key_in_next_block, const BlockHandle& block_handle) {
124 // Note: to avoid two consecuitive flush in the same method call, we do not
125 // check flush policy when adding the last key
126 if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys
127 if (sub_index_builder_ == nullptr) {
128 MakeNewSubIndexBuilder();
129 }
130 sub_index_builder_->AddIndexEntry(last_key_in_current_block,
131 first_key_in_next_block, block_handle);
132 if (sub_index_builder_->seperator_is_key_plus_seq_) {
133 // then we need to apply it to all sub-index builders
134 seperator_is_key_plus_seq_ = true;
135 }
136 sub_index_last_key_ = std::string(*last_key_in_current_block);
137 entries_.push_back(
138 {sub_index_last_key_,
139 std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
140 sub_index_builder_ = nullptr;
141 cut_filter_block = true;
142 } else {
143 // apply flush policy only to non-empty sub_index_builder_
144 if (sub_index_builder_ != nullptr) {
145 std::string handle_encoding;
146 block_handle.EncodeTo(&handle_encoding);
147 bool do_flush =
148 partition_cut_requested_ ||
149 flush_policy_->Update(*last_key_in_current_block, handle_encoding);
150 if (do_flush) {
151 entries_.push_back(
152 {sub_index_last_key_,
153 std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
154 cut_filter_block = true;
155 sub_index_builder_ = nullptr;
156 }
157 }
158 if (sub_index_builder_ == nullptr) {
159 MakeNewSubIndexBuilder();
160 }
161 sub_index_builder_->AddIndexEntry(last_key_in_current_block,
162 first_key_in_next_block, block_handle);
163 sub_index_last_key_ = std::string(*last_key_in_current_block);
164 if (sub_index_builder_->seperator_is_key_plus_seq_) {
165 // then we need to apply it to all sub-index builders
166 seperator_is_key_plus_seq_ = true;
167 }
168 }
169 }
170
Finish(IndexBlocks * index_blocks,const BlockHandle & last_partition_block_handle)171 Status PartitionedIndexBuilder::Finish(
172 IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
173 if (partition_cnt_ == 0) {
174 partition_cnt_ = entries_.size();
175 }
176 // It must be set to null after last key is added
177 assert(sub_index_builder_ == nullptr);
178 if (finishing_indexes == true) {
179 Entry& last_entry = entries_.front();
180 std::string handle_encoding;
181 last_partition_block_handle.EncodeTo(&handle_encoding);
182 std::string handle_delta_encoding;
183 PutVarsignedint64(
184 &handle_delta_encoding,
185 last_partition_block_handle.size() - last_encoded_handle_.size());
186 last_encoded_handle_ = last_partition_block_handle;
187 const Slice handle_delta_encoding_slice(handle_delta_encoding);
188 index_block_builder_.Add(last_entry.key, handle_encoding,
189 &handle_delta_encoding_slice);
190 if (!seperator_is_key_plus_seq_) {
191 index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
192 handle_encoding,
193 &handle_delta_encoding_slice);
194 }
195 entries_.pop_front();
196 }
197 // If there is no sub_index left, then return the 2nd level index.
198 if (UNLIKELY(entries_.empty())) {
199 if (seperator_is_key_plus_seq_) {
200 index_blocks->index_block_contents = index_block_builder_.Finish();
201 } else {
202 index_blocks->index_block_contents =
203 index_block_builder_without_seq_.Finish();
204 }
205 top_level_index_size_ = index_blocks->index_block_contents.size();
206 index_size_ += top_level_index_size_;
207 return Status::OK();
208 } else {
209 // Finish the next partition index in line and Incomplete() to indicate we
210 // expect more calls to Finish
211 Entry& entry = entries_.front();
212 // Apply the policy to all sub-indexes
213 entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
214 auto s = entry.value->Finish(index_blocks);
215 index_size_ += index_blocks->index_block_contents.size();
216 finishing_indexes = true;
217 return s.ok() ? Status::Incomplete() : s;
218 }
219 }
220
NumPartitions() const221 size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
222 } // namespace ROCKSDB_NAMESPACE
223