1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include "table/block_based/index_builder.h"
11 
12 #include <assert.h>
13 #include <cinttypes>
14 
15 #include <list>
16 #include <string>
17 
18 #include "rocksdb/comparator.h"
19 #include "rocksdb/flush_block_policy.h"
20 #include "table/block_based/partitioned_filter_block.h"
21 #include "table/format.h"
22 
23 // Without anonymous namespace here, we fail the warning -Wmissing-prototypes
24 namespace ROCKSDB_NAMESPACE {
25 // using namespace rocksdb;
26 // Create a index builder based on its type.
CreateIndexBuilder(BlockBasedTableOptions::IndexType index_type,const InternalKeyComparator * comparator,const InternalKeySliceTransform * int_key_slice_transform,const bool use_value_delta_encoding,const BlockBasedTableOptions & table_opt)27 IndexBuilder* IndexBuilder::CreateIndexBuilder(
28     BlockBasedTableOptions::IndexType index_type,
29     const InternalKeyComparator* comparator,
30     const InternalKeySliceTransform* int_key_slice_transform,
31     const bool use_value_delta_encoding,
32     const BlockBasedTableOptions& table_opt) {
33   IndexBuilder* result = nullptr;
34   switch (index_type) {
35     case BlockBasedTableOptions::kBinarySearch: {
36       result = new ShortenedIndexBuilder(
37           comparator, table_opt.index_block_restart_interval,
38           table_opt.format_version, use_value_delta_encoding,
39           table_opt.index_shortening, /* include_first_key */ false);
40     } break;
41     case BlockBasedTableOptions::kHashSearch: {
42       // Currently kHashSearch is incompatible with index_block_restart_interval
43       // > 1
44       assert(table_opt.index_block_restart_interval == 1);
45       result = new HashIndexBuilder(
46           comparator, int_key_slice_transform,
47           table_opt.index_block_restart_interval, table_opt.format_version,
48           use_value_delta_encoding, table_opt.index_shortening);
49     } break;
50     case BlockBasedTableOptions::kTwoLevelIndexSearch: {
51       result = PartitionedIndexBuilder::CreateIndexBuilder(
52           comparator, use_value_delta_encoding, table_opt);
53     } break;
54     case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
55       result = new ShortenedIndexBuilder(
56           comparator, table_opt.index_block_restart_interval,
57           table_opt.format_version, use_value_delta_encoding,
58           table_opt.index_shortening, /* include_first_key */ true);
59     } break;
60     default: {
61       assert(!"Do not recognize the index type ");
62     } break;
63   }
64   return result;
65 }
66 
CreateIndexBuilder(const InternalKeyComparator * comparator,const bool use_value_delta_encoding,const BlockBasedTableOptions & table_opt)67 PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
68     const InternalKeyComparator* comparator,
69     const bool use_value_delta_encoding,
70     const BlockBasedTableOptions& table_opt) {
71   return new PartitionedIndexBuilder(comparator, table_opt,
72                                      use_value_delta_encoding);
73 }
74 
PartitionedIndexBuilder(const InternalKeyComparator * comparator,const BlockBasedTableOptions & table_opt,const bool use_value_delta_encoding)75 PartitionedIndexBuilder::PartitionedIndexBuilder(
76     const InternalKeyComparator* comparator,
77     const BlockBasedTableOptions& table_opt,
78     const bool use_value_delta_encoding)
79     : IndexBuilder(comparator),
80       index_block_builder_(table_opt.index_block_restart_interval,
81                            true /*use_delta_encoding*/,
82                            use_value_delta_encoding),
83       index_block_builder_without_seq_(table_opt.index_block_restart_interval,
84                                        true /*use_delta_encoding*/,
85                                        use_value_delta_encoding),
86       sub_index_builder_(nullptr),
87       table_opt_(table_opt),
88       // We start by false. After each partition we revise the value based on
89       // what the sub_index_builder has decided. If the feature is disabled
90       // entirely, this will be set to true after switching the first
91       // sub_index_builder. Otherwise, it could be set to true even one of the
92       // sub_index_builders could not safely exclude seq from the keys, then it
93       // wil be enforced on all sub_index_builders on ::Finish.
94       seperator_is_key_plus_seq_(false),
95       use_value_delta_encoding_(use_value_delta_encoding) {}
96 
~PartitionedIndexBuilder()97 PartitionedIndexBuilder::~PartitionedIndexBuilder() {
98   delete sub_index_builder_;
99 }
100 
MakeNewSubIndexBuilder()101 void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
102   assert(sub_index_builder_ == nullptr);
103   sub_index_builder_ = new ShortenedIndexBuilder(
104       comparator_, table_opt_.index_block_restart_interval,
105       table_opt_.format_version, use_value_delta_encoding_,
106       table_opt_.index_shortening, /* include_first_key */ false);
107   flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
108       table_opt_.metadata_block_size, table_opt_.block_size_deviation,
109       // Note: this is sub-optimal since sub_index_builder_ could later reset
110       // seperator_is_key_plus_seq_ but the probability of that is low.
111       sub_index_builder_->seperator_is_key_plus_seq_
112           ? sub_index_builder_->index_block_builder_
113           : sub_index_builder_->index_block_builder_without_seq_));
114   partition_cut_requested_ = false;
115 }
116 
RequestPartitionCut()117 void PartitionedIndexBuilder::RequestPartitionCut() {
118   partition_cut_requested_ = true;
119 }
120 
AddIndexEntry(std::string * last_key_in_current_block,const Slice * first_key_in_next_block,const BlockHandle & block_handle)121 void PartitionedIndexBuilder::AddIndexEntry(
122     std::string* last_key_in_current_block,
123     const Slice* first_key_in_next_block, const BlockHandle& block_handle) {
124   // Note: to avoid two consecuitive flush in the same method call, we do not
125   // check flush policy when adding the last key
126   if (UNLIKELY(first_key_in_next_block == nullptr)) {  // no more keys
127     if (sub_index_builder_ == nullptr) {
128       MakeNewSubIndexBuilder();
129     }
130     sub_index_builder_->AddIndexEntry(last_key_in_current_block,
131                                       first_key_in_next_block, block_handle);
132     if (sub_index_builder_->seperator_is_key_plus_seq_) {
133       // then we need to apply it to all sub-index builders
134       seperator_is_key_plus_seq_ = true;
135     }
136     sub_index_last_key_ = std::string(*last_key_in_current_block);
137     entries_.push_back(
138         {sub_index_last_key_,
139          std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
140     sub_index_builder_ = nullptr;
141     cut_filter_block = true;
142   } else {
143     // apply flush policy only to non-empty sub_index_builder_
144     if (sub_index_builder_ != nullptr) {
145       std::string handle_encoding;
146       block_handle.EncodeTo(&handle_encoding);
147       bool do_flush =
148           partition_cut_requested_ ||
149           flush_policy_->Update(*last_key_in_current_block, handle_encoding);
150       if (do_flush) {
151         entries_.push_back(
152             {sub_index_last_key_,
153              std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
154         cut_filter_block = true;
155         sub_index_builder_ = nullptr;
156       }
157     }
158     if (sub_index_builder_ == nullptr) {
159       MakeNewSubIndexBuilder();
160     }
161     sub_index_builder_->AddIndexEntry(last_key_in_current_block,
162                                       first_key_in_next_block, block_handle);
163     sub_index_last_key_ = std::string(*last_key_in_current_block);
164     if (sub_index_builder_->seperator_is_key_plus_seq_) {
165       // then we need to apply it to all sub-index builders
166       seperator_is_key_plus_seq_ = true;
167     }
168   }
169 }
170 
Finish(IndexBlocks * index_blocks,const BlockHandle & last_partition_block_handle)171 Status PartitionedIndexBuilder::Finish(
172     IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
173   if (partition_cnt_ == 0) {
174     partition_cnt_ = entries_.size();
175   }
176   // It must be set to null after last key is added
177   assert(sub_index_builder_ == nullptr);
178   if (finishing_indexes == true) {
179     Entry& last_entry = entries_.front();
180     std::string handle_encoding;
181     last_partition_block_handle.EncodeTo(&handle_encoding);
182     std::string handle_delta_encoding;
183     PutVarsignedint64(
184         &handle_delta_encoding,
185         last_partition_block_handle.size() - last_encoded_handle_.size());
186     last_encoded_handle_ = last_partition_block_handle;
187     const Slice handle_delta_encoding_slice(handle_delta_encoding);
188     index_block_builder_.Add(last_entry.key, handle_encoding,
189                              &handle_delta_encoding_slice);
190     if (!seperator_is_key_plus_seq_) {
191       index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
192                                            handle_encoding,
193                                            &handle_delta_encoding_slice);
194     }
195     entries_.pop_front();
196   }
197   // If there is no sub_index left, then return the 2nd level index.
198   if (UNLIKELY(entries_.empty())) {
199     if (seperator_is_key_plus_seq_) {
200       index_blocks->index_block_contents = index_block_builder_.Finish();
201     } else {
202       index_blocks->index_block_contents =
203           index_block_builder_without_seq_.Finish();
204     }
205     top_level_index_size_ = index_blocks->index_block_contents.size();
206     index_size_ += top_level_index_size_;
207     return Status::OK();
208   } else {
209     // Finish the next partition index in line and Incomplete() to indicate we
210     // expect more calls to Finish
211     Entry& entry = entries_.front();
212     // Apply the policy to all sub-indexes
213     entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
214     auto s = entry.value->Finish(index_blocks);
215     index_size_ += index_blocks->index_block_contents.size();
216     finishing_indexes = true;
217     return s.ok() ? Status::Incomplete() : s;
218   }
219 }
220 
NumPartitions() const221 size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
222 }  // namespace ROCKSDB_NAMESPACE
223